libtld 1.2.0
|
00001 // TLD library -- XML to C++ parser 00002 // Copyright (C) 2011 Made to Order Software Corp. 00003 // 00004 // This program is free software; you can redistribute it and/or modify 00005 // it under the terms of the GNU General Public License as published by 00006 // the Free Software Foundation; either version 2 of the License, or 00007 // (at your option) any later version. 00008 // 00009 // This program is distributed in the hope that it will be useful, 00010 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 // GNU General Public License for more details. 00013 // 00014 // You should have received a copy of the GNU General Public License 00015 // along with this program; if not, write to the Free Software 00016 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00017 00018 #include "tld.h" 00019 #include <QtCore/QMap> 00020 #include <QtCore/QFile> 00021 #include <QtCore/QTextStream> 00022 #include <QtCore/QStringList> 00023 #include <QtXml/QDomDocument> 00024 #include <iostream> 00025 #include <cstdlib> 00026 00027 namespace snap 00028 { 00029 00030 00031 class tld_info 00032 { 00033 public: 00034 QString f_category; 00035 QString f_reason; 00036 QString f_category_name; 00037 QString f_country; // if category is "country", otherwise empty 00038 int f_level; // level of this TLD (1, 2, 3, 4) 00039 QString f_tld; 00040 QString f_inverted; 00041 QString f_reason_name; // for inside <forbid>, otherwise empty 00042 QString f_exception_apply_to; // the TLD this exception applies to (i.e. the actual response) 00043 int f_offset; // the offset of this item 00044 int f_start_offset; // next level start/end offsets, if end == 0, no next level 00045 int f_end_offset; 00046 }; 00047 00048 typedef QMap<QString, tld_info> tld_info_map_t; 00049 00050 typedef QMap<QString, int> country_map_t; 00051 00052 typedef QMap<ushort, int> tld_info_letters_t; 00053 00054 00055 QString tld_encode(const QString& tld, int& level) 00056 { 00057 QString result; 00058 level = 0; 00059 00060 QByteArray utf8 = tld.toUtf8(); 00061 int l(0); 00062 int max(utf8.length()); 00063 const char *p = utf8.data(); 00064 for(int l = 0; l < max; ++l) { 00065 char c(p[l]); 00066 if(static_cast<unsigned char>(c) < 0x20) { 00067 std::cerr << "error: controls characters (^" << (c + '@') 00068 << ") are not allowed in TLDs (" 00069 << p << ").\n"; 00070 exit(1); 00071 } 00072 if((c >= 'A' && c <= 'Z') 00073 || (c >= 'a' && c <= 'z') 00074 || (c >= '0' && c <= '9') 00075 || c == '.' || c == '-') 00076 { 00077 // these are accepted as is; note that we already checked the 00078 // validty of the data w 00079 if(c == '.') 00080 { 00081 ++level; 00082 c = '!'; // this is important otherwise the sort can break 00083 } 00084 result += c; 00085 } 00086 else 00087 { 00088 // add/remove as appropriate 00089 if(c == '/' || c == ':' || c == '&') { 00090 std::cerr << "error: character (^" << c << ") is not allowed in TLDs.\n"; 00091 exit(1); 00092 } 00093 result += '%'; 00094 QString v(QString("%1").arg(c & 255, 2, 16, QLatin1Char('0'))); 00095 result += v[0]; 00096 result += v[1]; 00097 } 00098 } 00099 // at this time the maximum level we declared is 4 but there are cases 00100 // where countries defined 5 levels (which is definitively crazy!) 00101 if(level < 1 || level > 5) 00102 { 00103 std::cerr << "error: level out of range (" << level << ") if larger than the maximum limit, you may want to increase the limit.\n"; 00104 exit(1); 00105 } 00106 00107 // break it up to easily invert it 00108 QStringList split = result.split('!', QString::SkipEmptyParts); 00109 int i(0); 00110 int j(split.size() - 1); 00111 while(i < j) { 00112 split.swap(i, j); 00113 ++i; 00114 --j; 00115 } 00116 // save it back inverted (!a!b!c is now c!b!a!) 00117 result = split.join("!") + "!"; 00118 00119 return result; 00120 } 00121 00122 00123 void read_tlds(const QString& path, tld_info_map_t& map, country_map_t& countries) 00124 { 00125 // get input file 00126 QFile f(path + "/tld_data.xml"); 00127 if(!f.open(QIODevice::ReadOnly)) { 00128 std::cerr << "error: cannot open " << path.toUtf8().data() << "/tld_data.xml input file\n"; 00129 exit(1); 00130 } 00131 00132 // create a DOM and attach file to it 00133 QDomDocument doc; 00134 doc.setContent(&f); 00135 00136 // search for the tld tag 00137 QDomNode n = doc.firstChild(); 00138 if(n.isNull()) { 00139 std::cerr << "error: your TLD document is empty.\n"; 00140 exit(1); 00141 } 00142 while(!n.isNull()) { 00143 if(n.isElement()) { 00144 QDomElement tlc_tag = n.toElement(); 00145 if(tlc_tag.tagName() != "tld") { 00146 std::cerr << "error: the root tag must be a <tld> tag. We got <" << tlc_tag.tagName().toUtf8().data() << "> instead.\n"; 00147 exit(1); 00148 } 00149 break; 00150 } 00151 n = n.nextSibling(); 00152 } 00153 if(n.isNull()) { 00154 std::cerr << "error: your TLD document is expected to have a <tld> tag as the root tag; we could not find it.\n"; 00155 exit(1); 00156 } 00157 n = n.firstChild(); 00158 00159 int country_counter = 0; 00160 00161 // go through the <area> tags 00162 while(!n.isNull()) 00163 { 00164 // make sure it's a tag 00165 if(n.isElement()) 00166 { 00167 QDomElement e = n.toElement(); 00168 if(e.tagName() != "area") 00169 { 00170 std::cerr << "error: only <area> tags are expected in a <tld> XML file, got <" << e.tagName().toUtf8().data() << "> instead.\n"; 00171 exit(1); 00172 } 00173 00174 // Category (international|professionals|language|groups|region|country) 00175 QString category(e.attribute("category", "country")); 00176 QString country; 00177 if(category == "country") 00178 { 00179 // Country Name 00180 country = e.attribute("country", "undefined"); 00181 if(countries.contains(country)) 00182 { 00183 std::cerr << "error: found country \"" << country.toUtf8().data() << "\" defined twice.\n"; 00184 exit(1); 00185 } 00186 countries[country] = ++country_counter; 00187 } 00188 00189 // Actual TLDs (may be empty) 00190 QDomNode t = e.firstChild(); 00191 while(!t.isNull()) 00192 { 00193 if(!t.isComment() && t.isCharacterData()) 00194 { 00195 QString names(t.toCharacterData().data()); 00196 names.replace("\n", " "); 00197 names.replace("\r", " "); 00198 names.replace("\t", " "); 00199 QStringList name_list(names.split(" ", QString::SkipEmptyParts)); 00200 for(QStringList::iterator nm = name_list.begin(); 00201 nm != name_list.end(); 00202 ++nm) 00203 { 00204 if(nm->isEmpty()) 00205 { 00206 continue; 00207 } 00208 int level(0); 00209 QString value_name(tld_encode(*nm, level)); 00210 if(map.contains(value_name)) 00211 { 00212 std::cerr << "error: found TLD \"" << nm->toUtf8().data() << "\" more than once.\n"; 00213 exit(1); 00214 } 00215 00216 tld_info tld; 00217 tld.f_category_name = category; 00218 tld.f_country = country; 00219 tld.f_level = level; 00220 tld.f_tld = *nm; 00221 tld.f_inverted = value_name; 00222 // no reason, we're not inside a forbid tag 00223 // no exception apply to, we're not inside an exception 00224 tld.f_offset = 0; 00225 tld.f_start_offset = USHRT_MAX; 00226 tld.f_end_offset = USHRT_MAX; 00227 00228 map[value_name] = tld; 00229 } 00230 } 00231 else if(t.isElement()) 00232 { 00233 QDomElement f = t.toElement(); 00234 if(f.tagName() == "exceptions") 00235 { 00236 QString apply_to(f.attribute("apply-to", "unknown")); 00237 int unused_level(0); 00238 apply_to = tld_encode(apply_to, unused_level); 00239 00240 QDomNode st = f.firstChild(); 00241 while(!st.isNull()) 00242 { 00243 if(!st.isComment() && st.isCharacterData()) 00244 { 00245 QString names(st.toCharacterData().data()); 00246 names.replace("\n", " "); 00247 names.replace("\r", " "); 00248 names.replace("\t", " "); 00249 QStringList name_list(names.split(" ", QString::SkipEmptyParts)); 00250 for(QStringList::iterator nm = name_list.begin(); 00251 nm != name_list.end(); 00252 ++nm) 00253 { 00254 int level(0); 00255 QString value_name(tld_encode(*nm, level)); 00256 if(map.contains(value_name)) 00257 { 00258 std::cerr << "error: found TLD \"" << nm->toUtf8().data() << "\" more than once (exceptions section).\n"; 00259 exit(1); 00260 } 00261 00262 tld_info tld; 00263 tld.f_category_name = category; 00264 tld.f_country = country; 00265 tld.f_level = level; 00266 tld.f_tld = *nm; 00267 tld.f_inverted = value_name; 00268 // no reason, we're not inside a forbid tag 00269 tld.f_exception_apply_to = apply_to; 00270 tld.f_offset = 0; 00271 tld.f_start_offset = USHRT_MAX; 00272 tld.f_end_offset = USHRT_MAX; 00273 00274 map[value_name] = tld; 00275 } 00276 } 00277 st = st.nextSibling(); 00278 } } 00279 else if(f.tagName() == "forbid") 00280 { 00281 QString reason(f.attribute("reason", "unused")); 00282 00283 QDomNode st = f.firstChild(); 00284 while(!st.isNull()) 00285 { 00286 if(!st.isComment() && st.isCharacterData()) 00287 { 00288 QString names(st.toCharacterData().data()); 00289 names.replace("\n", " "); 00290 names.replace("\r", " "); 00291 names.replace("\t", " "); 00292 QStringList name_list(names.split(" ", QString::SkipEmptyParts)); 00293 for(QStringList::iterator nm = name_list.begin(); 00294 nm != name_list.end(); 00295 ++nm) 00296 { 00297 int level(0); 00298 QString value_name(tld_encode(*nm, level)); 00299 if(map.contains(value_name)) 00300 { 00301 std::cerr << "error: found TLD \"" << nm->toUtf8().data() << "\" more than once (forbidden section).\n"; 00302 exit(1); 00303 } 00304 00305 tld_info tld; 00306 tld.f_category_name = category; 00307 tld.f_country = country; 00308 tld.f_level = level; 00309 tld.f_tld = *nm; 00310 tld.f_inverted = value_name; 00311 tld.f_reason_name = reason; 00312 // no exception apply to, we're not inside an exception 00313 tld.f_offset = 0; 00314 tld.f_start_offset = USHRT_MAX; 00315 tld.f_end_offset = USHRT_MAX; 00316 00317 map[value_name] = tld; 00318 } 00319 } 00320 st = st.nextSibling(); 00321 } 00322 } 00323 else { 00324 std::cerr << "error: only <forbid> and <exceptions> tags are expected in an <area> tag, got <" << f.tagName().toUtf8().data() << "> instead.\n"; 00325 exit(1); 00326 } 00327 } 00328 t = t.nextSibling(); 00329 } 00330 } 00331 n = n.nextSibling(); 00332 } 00333 } 00334 00335 00336 00337 void verify_data(tld_info_map_t& map) 00338 { 00339 int max_tld_length = 0; 00340 for(tld_info_map_t::iterator it = map.begin(); 00341 it != map.end(); 00342 ++it) 00343 { 00344 QString t(it->f_tld); 00345 if(t.length() > max_tld_length) 00346 { 00347 max_tld_length = t.length(); 00348 } 00349 for(int i = t.length() - 1, j = i + 1, k = j; i >= 0; --i) 00350 { 00351 QChar c = t.at(i); 00352 short u = c.unicode(); 00353 if(u == '.') 00354 { 00355 // periods are accepted, but not one after another or just before a dash 00356 if(i + 1 == j) 00357 { 00358 // this captures an ending period which we don't allow in our files (although it is legal in a domain name) 00359 if(j == t.length()) 00360 { 00361 std::cerr << "error: an ending period is not acceptable in a TLD name; found in \"" << t.toUtf8().data() << "\"\n"; 00362 } 00363 else 00364 { 00365 std::cerr << "error: two periods one after another is not acceptable in a TLD name; found in \"" << t.toUtf8().data() << "\"\n"; 00366 } 00367 exit(1); 00368 } 00369 if(i + 1 == k) 00370 { 00371 std::cerr << "error: a dash cannot be just after a period; problem found in \"" << t.toUtf8().data() << "\"\n"; 00372 exit(1); 00373 } 00374 j = i; 00375 k = i; 00376 } 00377 else if(i == 0) 00378 { 00379 std::cerr << "error: the TLD must start with a period; problem found in \"" << t.toUtf8().data() << "\"\n"; 00380 exit(1); 00381 } 00382 else if(u == '-') 00383 { 00384 if(i + 1 == k) 00385 { 00386 if(k == t.length()) 00387 { 00388 std::cerr << "error: a dash cannot be found at the end of a TLD; problem found in \"" << t.toUtf8().data() << "\"\n"; 00389 } 00390 else 00391 { 00392 std::cerr << "error: a dash cannot be just before a period; problem found in \"" << t.toUtf8().data() << "\"\n"; 00393 } 00394 exit(1); 00395 } 00396 k = i; 00397 } 00398 else if(!c.isLetterOrNumber()) 00399 { 00400 // we accept a certain number of signs that are not 00401 // otherwise considered letters... 00402 switch(c.unicode()) { 00403 case 0x093E: // devanagari vowel sign AA 00404 case 0x0982: // Bengali Sign Anusvara 00405 case 0x09BE: // Bengali Vowel Sign AA 00406 case 0x0A3E: // Gurmukhi Vowel Sign AA 00407 case 0x0ABE: // Gujarati Vowel Sign AA 00408 case 0x0BBE: // Tamil Dependent Vowel Sign AA 00409 case 0x0BBF: // Tamil Dependent Vowel Sign I 00410 case 0x0BC2: // Tamil Vowel Sign UU 00411 case 0x0BC8: // Tamil Vowel Sign AI 00412 case 0x0BCD: // Tamil Sign Virama 00413 case 0x0C3E: // Telugu Vowel Sign AA 00414 case 0x0C4D: // Telugu Sign Virama 00415 case 0x0D82: // Sinhala Sign Anusvaraya 00416 case 0x0DCF: // Sinhala Vowel Sign Aela-Pilla 00417 break; 00418 00419 default: 00420 std::cerr << "error: a TLD can only be composed of letters and numbers and dashes; problem found in \"" 00421 << t.toUtf8().data() << "\" -- letter: &#x" << std::hex << (int)c.unicode() << std::dec << "; chr(" << c.unicode() << ")\n"; 00422 } 00423 } 00424 //else we're good 00425 } 00426 00427 if(it->f_category_name == "international") 00428 { 00429 it->f_category = "TLD_CATEGORY_INTERNATIONAL"; 00430 } 00431 else if(it->f_category_name == "professionals") 00432 { 00433 it->f_category = "TLD_CATEGORY_PROFESSIONALS"; 00434 } 00435 else if(it->f_category_name == "language") 00436 { 00437 it->f_category = "TLD_CATEGORY_LANGUAGE"; 00438 } 00439 else if(it->f_category_name == "groups") 00440 { 00441 it->f_category = "TLD_CATEGORY_GROUPS"; 00442 } 00443 else if(it->f_category_name == "region") 00444 { 00445 it->f_category = "TLD_CATEGORY_REGION"; 00446 } 00447 else if(it->f_category_name == "technical") 00448 { 00449 it->f_category = "TLD_CATEGORY_TECHNICAL"; 00450 } 00451 else if(it->f_category_name == "country") 00452 { 00453 it->f_category = "TLD_CATEGORY_COUNTRY"; 00454 } 00455 else if(it->f_category_name == "entrepreneurial") 00456 { 00457 it->f_category = "TLD_CATEGORY_ENTREPRENEURIAL"; 00458 } 00459 else 00460 { 00461 std::cerr << "error: unknown category \"" << it->f_category_name.toUtf8().data() << "\"\n"; 00462 exit(1); 00463 } 00464 00465 // if within a <forbid> tag we have a reason too 00466 if(it->f_reason_name == "proposed") 00467 { 00468 it->f_reason = "TLD_STATUS_PROPOSED"; 00469 } 00470 else if(it->f_reason_name == "deprecated") 00471 { 00472 it->f_reason = "TLD_STATUS_DEPRECATED"; 00473 } 00474 else if(it->f_reason_name == "unused") 00475 { 00476 it->f_reason = "TLD_STATUS_UNUSED"; 00477 } 00478 else if(it->f_reason_name == "reserved") 00479 { 00480 it->f_reason = "TLD_STATUS_RESERVED"; 00481 } 00482 else if(it->f_reason_name == "infrastructure") 00483 { 00484 it->f_reason = "TLD_STATUS_INFRASTRUCTURE"; 00485 } 00486 else if(!it->f_reason_name.isEmpty()) 00487 { 00488 std::cerr << "error: unknown reason \"" << it->f_reason_name.toUtf8().data() << "\"\n"; 00489 exit(1); 00490 } 00491 else 00492 { 00493 it->f_reason = "TLD_STATUS_VALID"; 00494 } 00495 } 00496 // At time of writing it is 21 characters 00497 //std::cout << "longest TLD is " << max_tld_length << "\n"; 00498 } 00499 00500 00501 QFile out_file("tld_data.c"); 00502 QTextStream out; 00503 void setup_output() 00504 { 00505 if(!out_file.open(QIODevice::WriteOnly)) { 00506 std::cerr << "error: cannot open snap_path_tld.cpp output file\n"; 00507 exit(1); 00508 } 00509 out.setDevice(&out_file); 00510 } 00511 00512 00513 00514 void output_utf8(const QString& str) 00515 { 00516 QByteArray utf8_buffer = str.toUtf8(); 00517 const char *utf8 = utf8_buffer.data(); 00518 int max = strlen(utf8); 00519 for(int i = 0; i < max; ++i) 00520 { 00521 unsigned char u(utf8[i]); 00522 if(u > 0x7F) 00523 { 00524 // funny looking, but to avoid problems with the next 00525 // character we put this one \x## inside a standalone 00526 // string... remember that multiple strings one after 00527 // another are simply concatenated in C/C++ 00528 out << "\"\"\\x" << hex << (u & 255) << dec << "\"\""; 00529 } 00530 else 00531 { 00532 out << (char)u; 00533 } 00534 } 00535 } 00536 00537 void output_countries(const country_map_t& countries) 00538 { 00539 int max(0); 00540 for(country_map_t::const_iterator it = countries.begin(); 00541 it != countries.end(); 00542 ++it) 00543 { 00544 if(it.value() > max) 00545 { 00546 max = it.value(); 00547 } 00548 } 00549 00550 // first entry is used for international, etc. 00551 for(int i = 1; i <= max; ++i) 00552 { 00553 out << "const char tld_country" << i << "[] = \""; 00554 output_utf8(countries.key(i)); 00555 out << "\";\n"; 00556 } 00557 } 00558 00559 void save_offset(tld_info_map_t& map, const QString& tld, int offset) 00560 { 00561 int e = tld.lastIndexOf('!', -2); 00562 QString parent = tld.left(e + 1); 00563 if(!map.contains(parent)) 00564 { 00565 std::cerr << "error: TLD \"" << tld.toUtf8().data() 00566 << "\" does not have a corresponding TLD at the previous level (i.e. \"" 00567 << parent.toUtf8().data() << "\").\n"; 00568 exit(1); 00569 } 00570 if(map[parent].f_start_offset == USHRT_MAX) 00571 { 00572 map[parent].f_start_offset = offset; 00573 } 00574 map[parent].f_end_offset = offset + 1; 00575 } 00576 00577 void output_tlds(tld_info_map_t& map, 00578 const country_map_t& countries) 00579 { 00580 // to create the table below we want one entry with an 00581 // empty TLD and that will appear last with the info we 00582 // need to search level 1 00583 tld_info tld; 00584 tld.f_category_name = "international"; 00585 tld.f_country = ""; 00586 tld.f_level = 0; 00587 tld.f_tld = ""; 00588 tld.f_inverted = ""; 00589 tld.f_reason_name = "TLD_STATUS_VALID"; 00590 tld.f_exception_apply_to = ""; 00591 tld.f_offset = 0; 00592 tld.f_start_offset = USHRT_MAX; 00593 tld.f_end_offset = USHRT_MAX; 00594 00595 map[""] = tld; // top-level (i.e. level 0) 00596 00597 // first we determine the longest TLD in terms of levels 00598 // (i.e. number of periods) 00599 int max_level(0); 00600 for(tld_info_map_t::const_iterator it = map.begin(); 00601 it != map.end(); 00602 ++it) 00603 { 00604 if(max_level < it->f_level) 00605 { 00606 max_level = it->f_level; 00607 } 00608 } 00609 00610 // define the offsets used with the exceptions 00611 int i(0); 00612 for(int level = max_level; level > 0; --level) 00613 { 00614 for(tld_info_map_t::iterator it = map.begin(); 00615 it != map.end(); 00616 ++it) 00617 { 00618 if(it->f_level == level) 00619 { 00620 it->f_offset = i; 00621 ++i; 00622 } 00623 } 00624 } 00625 00626 // now we output the table with the largest levels first, 00627 // as we do so we save the index of the start and stop 00628 // points of each level in the previous level (hence the 00629 // need for a level 0 entry) 00630 out << "const struct tld_description tld_descriptions[] =\n{\n"; 00631 int base_max(0); 00632 i = 0; 00633 for(int level = max_level; level > 0; --level) 00634 { 00635 for(tld_info_map_t::const_iterator it = map.begin(); 00636 it != map.end(); 00637 ++it) 00638 { 00639 if(it->f_level == level) 00640 { 00641 if(i != 0) 00642 { 00643 out << ",\n"; 00644 } 00645 unsigned short apply_to(USHRT_MAX); 00646 //unsigned char exception_level(USHRT_MAX); 00647 QString status(it->f_reason); 00648 if(!it->f_exception_apply_to.isEmpty()) { 00649 status = "TLD_STATUS_EXCEPTION"; 00650 apply_to = map[it->f_exception_apply_to].f_offset; 00651 } 00652 out << "\t/* " << i << " */ { " << it->f_category.toUtf8().data() 00653 << ", " << status.toUtf8().data() 00654 << ", " << it->f_start_offset 00655 << ", " << it->f_end_offset 00656 << ", " << apply_to 00657 << ", " << it->f_level 00658 << ", \""; 00659 save_offset(map, it->f_inverted, i); 00660 // we only have to save the current level 00661 int e = it->f_inverted.lastIndexOf('!', -2); 00662 QString base(it->f_inverted.mid(e + 1, it->f_inverted.length() - e - 2)); 00663 if(base.length() > base_max) 00664 { 00665 base_max = base.length(); 00666 } 00667 output_utf8(base); 00668 if(it->f_category == "TLD_CATEGORY_COUNTRY") 00669 { 00670 out << "\", tld_country" << countries[it->f_country]; 00671 } 00672 else 00673 { 00674 out << "\", (const char *) 0"; 00675 } 00676 out << " }"; 00677 ++i; 00678 } 00679 } 00680 } 00681 out << "\n};\n"; 00682 00683 out << "unsigned short tld_start_offset = " << map[""].f_start_offset << ";\n"; 00684 out << "unsigned short tld_end_offset = " << map[""].f_end_offset << ";\n"; 00685 out << "int tld_max_level = " << max_level << ";\n"; 00686 } 00687 00688 00689 void output_offsets(const tld_info_map_t& map, 00690 const tld_info_letters_t& letters) 00691 { 00692 // we know that the table always starts at zero so we skip the first 00693 // entry (plus the first entry is for the '%' which is not contiguous 00694 // with 'a') 00695 out << "const int tld_offsets[] = {\n"; 00696 for(tld_info_letters_t::const_iterator it = letters.begin() + 1; 00697 it != letters.end(); 00698 ++it) 00699 { 00700 out << "\t/* '" << (char)it.key() << "' */ " << it.value() << ",\n"; 00701 } 00702 out << "\t/* total size */ " << map.size() << "\n};\n"; 00703 } 00704 00705 00706 void output_header() 00707 { 00708 out << "/* *** AUTO-GENERATED *** DO NOT EDIT ***\n"; 00709 out << " * This list of TLDs was auto-generated using snap_path_parser.cpp.\n"; 00710 out << " * Fix the parser or XML file used as input instead of this file.\n"; 00711 out << " *\n"; 00712 out << " * Copyright (C) 2011 Made to Order Software Corp.\n"; 00713 out << " *\n"; 00714 out << " * This program is free software; you can redistribute it and/or modify\n"; 00715 out << " * it under the terms of the GNU General Public License as published by\n"; 00716 out << " * the Free Software Foundation; either version 2 of the License, or\n"; 00717 out << " * (at your option) any later version.\n"; 00718 out << " *\n"; 00719 out << " * This program is distributed in the hope that it will be useful,\n"; 00720 out << " * but WITHOUT ANY WARRANTY; without even the implied warranty of\n"; 00721 out << " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"; 00722 out << " * GNU General Public License for more details.\n"; 00723 out << " *\n"; 00724 out << " * You should have received a copy of the GNU General Public License\n"; 00725 out << " * along with this program; if not, write to the Free Software\n"; 00726 out << " * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA\n"; 00727 out << " */\n"; 00728 out << "#include \"tld_data.h\"\n"; 00729 out << "#include \"tld.h\"\n"; 00730 } 00731 00732 void output_footer() 00733 { 00734 } 00735 00736 00737 // this is useful to see what the heck we're working on 00738 void output_map(const tld_info_map_t& map) 00739 { 00740 for(tld_info_map_t::const_iterator it = map.begin(); 00741 it != map.end(); 00742 ++it) 00743 { 00744 std::cout << it->f_tld.toUtf8().data() << ":" 00745 << it->f_category_name.toUtf8().data(); 00746 if(!it->f_country.isNull()) { 00747 std::cout << " (" << it->f_country.toUtf8().data() << ")"; 00748 } 00749 if(!it->f_reason_name.isNull()) { 00750 std::cout << " [" << it->f_reason_name.toUtf8().data() << "]"; 00751 } 00752 std::cout << "\n"; 00753 } 00754 } 00755 00756 00757 } // namespace snap 00758 00759 00760 00761 int main(int argc, char *argv[]) 00762 { 00763 if(argc != 2) { 00764 std::cerr << "error: usage 'tld_parser <path>'\n"; 00765 exit(1); 00766 } 00767 snap::tld_info_map_t map; 00768 snap::country_map_t countries; 00769 //snap::tld_info_letters_t letters; 00770 snap::read_tlds(argv[1], map, countries); 00771 snap::verify_data(map); 00772 snap::setup_output(); 00773 snap::output_header(); 00774 snap::output_countries(countries); 00775 snap::output_tlds(map, countries); 00776 //snap::output_offsets(map, letters); -- letters is not computed 00777 snap::output_footer(); 00778 //snap::output_map(map); 00779 } 00780 00781 00782 // vim: ts=4 sw=4
This document is part of the libtld Project.
Copyright by Made to Order Software Corp.