libtld 1.2.0

tld_parser.cpp

Go to the documentation of this file.
00001 // TLD library -- XML to C++ parser
00002 // Copyright (C) 2011  Made to Order Software Corp.
00003 //
00004 // This program is free software; you can redistribute it and/or modify
00005 // it under the terms of the GNU General Public License as published by
00006 // the Free Software Foundation; either version 2 of the License, or
00007 // (at your option) any later version.
00008 //
00009 // This program is distributed in the hope that it will be useful,
00010 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00011 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012 // GNU General Public License for more details.
00013 //
00014 // You should have received a copy of the GNU General Public License
00015 // along with this program; if not, write to the Free Software
00016 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00017 
00018 #include "tld.h"
00019 #include <QtCore/QMap>
00020 #include <QtCore/QFile>
00021 #include <QtCore/QTextStream>
00022 #include <QtCore/QStringList>
00023 #include <QtXml/QDomDocument>
00024 #include <iostream>
00025 #include <cstdlib>
00026 
00027 namespace snap
00028 {
00029 
00030 
00031 class tld_info
00032 {
00033 public:
00034     QString             f_category;
00035     QString             f_reason;
00036     QString             f_category_name;
00037     QString             f_country;  // if category is "country", otherwise empty
00038     int                 f_level; // level of this TLD (1, 2, 3, 4)
00039     QString             f_tld;
00040     QString             f_inverted;
00041     QString             f_reason_name;  // for inside <forbid>, otherwise empty
00042     QString             f_exception_apply_to; // the TLD this exception applies to (i.e. the actual response)
00043     int                 f_offset; // the offset of this item
00044     int                 f_start_offset; // next level start/end offsets, if end == 0, no next level
00045     int                 f_end_offset;
00046 };
00047 
00048 typedef QMap<QString, tld_info> tld_info_map_t;
00049 
00050 typedef QMap<QString, int>  country_map_t;
00051 
00052 typedef QMap<ushort, int>  tld_info_letters_t;
00053 
00054 
00055 QString tld_encode(const QString& tld, int& level)
00056 {
00057     QString result;
00058     level = 0;
00059 
00060     QByteArray utf8 = tld.toUtf8();
00061     int l(0);
00062     int max(utf8.length());
00063     const char *p = utf8.data();
00064     for(int l = 0; l < max; ++l) {
00065         char c(p[l]);
00066         if(static_cast<unsigned char>(c) < 0x20) {
00067             std::cerr << "error: controls characters (^" << (c + '@')
00068                     << ") are not allowed in TLDs ("
00069                     << p << ").\n";
00070             exit(1);
00071         }
00072         if((c >= 'A' && c <= 'Z')
00073         || (c >= 'a' && c <= 'z')
00074         || (c >= '0' && c <= '9')
00075         || c == '.' || c == '-')
00076         {
00077             // these are accepted as is; note that we already checked the
00078             // validty of the data w
00079             if(c == '.')
00080             {
00081                 ++level;
00082                 c = '!'; // this is important otherwise the sort can break
00083             }
00084             result += c;
00085         }
00086         else
00087         {
00088             // add/remove as appropriate
00089             if(c == '/' || c == ':' || c == '&') {
00090                 std::cerr << "error: character (^" << c << ") is not allowed in TLDs.\n";
00091                 exit(1);
00092             }
00093             result += '%';
00094             QString v(QString("%1").arg(c & 255, 2, 16, QLatin1Char('0')));
00095             result += v[0];
00096             result += v[1];
00097         }
00098     }
00099     // at this time the maximum level we declared is 4 but there are cases
00100     // where countries defined 5 levels (which is definitively crazy!)
00101     if(level < 1 || level > 5)
00102     {
00103         std::cerr << "error: level out of range (" << level << ") if larger than the maximum limit, you may want to increase the limit.\n";
00104         exit(1);
00105     }
00106 
00107     // break it up to easily invert it
00108     QStringList split = result.split('!', QString::SkipEmptyParts);
00109     int i(0);
00110     int j(split.size() - 1);
00111     while(i < j) {
00112         split.swap(i, j);
00113         ++i;
00114         --j;
00115     }
00116     // save it back inverted (!a!b!c is now c!b!a!)
00117     result = split.join("!") + "!";
00118 
00119     return result;
00120 }
00121 
00122 
00123 void read_tlds(const QString& path, tld_info_map_t& map, country_map_t& countries)
00124 {
00125     // get input file
00126     QFile f(path + "/tld_data.xml");
00127     if(!f.open(QIODevice::ReadOnly)) {
00128         std::cerr << "error: cannot open " << path.toUtf8().data() << "/tld_data.xml input file\n";
00129         exit(1);
00130     }
00131 
00132     // create a DOM and attach file to it
00133     QDomDocument doc;
00134     doc.setContent(&f);
00135 
00136     // search for the tld tag
00137     QDomNode n = doc.firstChild();
00138     if(n.isNull()) {
00139         std::cerr << "error: your TLD document is empty.\n";
00140         exit(1);
00141     }
00142     while(!n.isNull()) {
00143         if(n.isElement()) {
00144             QDomElement tlc_tag = n.toElement();
00145             if(tlc_tag.tagName() != "tld") {
00146                 std::cerr << "error: the root tag must be a <tld> tag. We got <" << tlc_tag.tagName().toUtf8().data() << "> instead.\n";
00147                 exit(1);
00148             }
00149             break;
00150         }
00151         n = n.nextSibling();
00152     }
00153     if(n.isNull()) {
00154         std::cerr << "error: your TLD document is expected to have a <tld> tag as the root tag; we could not find it.\n";
00155         exit(1);
00156     }
00157     n = n.firstChild();
00158 
00159     int country_counter = 0;
00160 
00161     // go through the <area> tags
00162     while(!n.isNull())
00163     {
00164         // make sure it's a tag
00165         if(n.isElement())
00166         {
00167             QDomElement e = n.toElement();
00168             if(e.tagName() != "area")
00169             {
00170                 std::cerr << "error: only <area> tags are expected in a <tld> XML file, got <" << e.tagName().toUtf8().data() << "> instead.\n";
00171                 exit(1);
00172             }
00173 
00174             // Category (international|professionals|language|groups|region|country)
00175             QString category(e.attribute("category", "country"));
00176             QString country;
00177             if(category == "country")
00178             {
00179                 // Country Name
00180                 country = e.attribute("country", "undefined");
00181                 if(countries.contains(country))
00182                 {
00183                     std::cerr << "error: found country \"" << country.toUtf8().data() << "\" defined twice.\n";
00184                     exit(1);
00185                 }
00186                 countries[country] = ++country_counter;
00187             }
00188 
00189             // Actual TLDs (may be empty)
00190             QDomNode t = e.firstChild();
00191             while(!t.isNull())
00192             {
00193                 if(!t.isComment() && t.isCharacterData())
00194                 {
00195                     QString names(t.toCharacterData().data());
00196                     names.replace("\n", " ");
00197                     names.replace("\r", " ");
00198                     names.replace("\t", " ");
00199                     QStringList name_list(names.split(" ", QString::SkipEmptyParts));
00200                     for(QStringList::iterator nm = name_list.begin();
00201                                               nm != name_list.end();
00202                                               ++nm)
00203                     {
00204                         if(nm->isEmpty())
00205                         {
00206                             continue;
00207                         }
00208                         int level(0);
00209                         QString value_name(tld_encode(*nm, level));
00210                         if(map.contains(value_name))
00211                         {
00212                             std::cerr << "error: found TLD \"" << nm->toUtf8().data() << "\" more than once.\n";
00213                             exit(1);
00214                         }
00215 
00216                         tld_info tld;
00217                         tld.f_category_name = category;
00218                         tld.f_country = country;
00219                         tld.f_level = level;
00220                         tld.f_tld = *nm;
00221                         tld.f_inverted = value_name;
00222                         // no reason, we're not inside a forbid tag
00223                         // no exception apply to, we're not inside an exception
00224                         tld.f_offset = 0;
00225                         tld.f_start_offset = USHRT_MAX;
00226                         tld.f_end_offset = USHRT_MAX;
00227 
00228                         map[value_name] = tld;
00229                     }
00230                 }
00231                 else if(t.isElement())
00232                 {
00233                     QDomElement f = t.toElement();
00234                     if(f.tagName() == "exceptions")
00235                     {
00236                         QString apply_to(f.attribute("apply-to", "unknown"));
00237                         int unused_level(0);
00238                         apply_to = tld_encode(apply_to, unused_level);
00239 
00240                         QDomNode st = f.firstChild();
00241                         while(!st.isNull())
00242                         {
00243                             if(!st.isComment() && st.isCharacterData())
00244                             {
00245                                 QString names(st.toCharacterData().data());
00246                                 names.replace("\n", " ");
00247                                 names.replace("\r", " ");
00248                                 names.replace("\t", " ");
00249                                 QStringList name_list(names.split(" ", QString::SkipEmptyParts));
00250                                 for(QStringList::iterator nm = name_list.begin();
00251                                                           nm != name_list.end();
00252                                                           ++nm)
00253                                 {
00254                                     int level(0);
00255                                     QString value_name(tld_encode(*nm, level));
00256                                     if(map.contains(value_name))
00257                                     {
00258                                         std::cerr << "error: found TLD \"" << nm->toUtf8().data() << "\" more than once (exceptions section).\n";
00259                                         exit(1);
00260                                     }
00261 
00262                                     tld_info tld;
00263                                     tld.f_category_name = category;
00264                                     tld.f_country = country;
00265                                     tld.f_level = level;
00266                                     tld.f_tld = *nm;
00267                                     tld.f_inverted = value_name;
00268                                     // no reason, we're not inside a forbid tag
00269                                     tld.f_exception_apply_to = apply_to;
00270                                     tld.f_offset = 0;
00271                                     tld.f_start_offset = USHRT_MAX;
00272                                     tld.f_end_offset = USHRT_MAX;
00273 
00274                                     map[value_name] = tld;
00275                                 }
00276                             }
00277                             st = st.nextSibling();
00278                         }                   }
00279                     else if(f.tagName() == "forbid")
00280                     {
00281                         QString reason(f.attribute("reason", "unused"));
00282 
00283                         QDomNode st = f.firstChild();
00284                         while(!st.isNull())
00285                         {
00286                             if(!st.isComment() && st.isCharacterData())
00287                             {
00288                                 QString names(st.toCharacterData().data());
00289                                 names.replace("\n", " ");
00290                                 names.replace("\r", " ");
00291                                 names.replace("\t", " ");
00292                                 QStringList name_list(names.split(" ", QString::SkipEmptyParts));
00293                                 for(QStringList::iterator nm = name_list.begin();
00294                                                           nm != name_list.end();
00295                                                           ++nm)
00296                                 {
00297                                     int level(0);
00298                                     QString value_name(tld_encode(*nm, level));
00299                                     if(map.contains(value_name))
00300                                     {
00301                                         std::cerr << "error: found TLD \"" << nm->toUtf8().data() << "\" more than once (forbidden section).\n";
00302                                         exit(1);
00303                                     }
00304 
00305                                     tld_info tld;
00306                                     tld.f_category_name = category;
00307                                     tld.f_country = country;
00308                                     tld.f_level = level;
00309                                     tld.f_tld = *nm;
00310                                     tld.f_inverted = value_name;
00311                                     tld.f_reason_name = reason;
00312                                     // no exception apply to, we're not inside an exception
00313                                     tld.f_offset = 0;
00314                                     tld.f_start_offset = USHRT_MAX;
00315                                     tld.f_end_offset = USHRT_MAX;
00316 
00317                                     map[value_name] = tld;
00318                                 }
00319                             }
00320                             st = st.nextSibling();
00321                         }
00322                     }
00323                     else {
00324                         std::cerr << "error: only <forbid> and <exceptions> tags are expected in an <area> tag, got <" << f.tagName().toUtf8().data() << "> instead.\n";
00325                         exit(1);
00326                     }
00327                 }
00328                 t = t.nextSibling();
00329             }
00330         }
00331         n = n.nextSibling();
00332     }
00333 }
00334 
00335 
00336 
00337 void verify_data(tld_info_map_t& map)
00338 {
00339     int max_tld_length = 0;
00340     for(tld_info_map_t::iterator it = map.begin();
00341                               it != map.end();
00342                               ++it)
00343     {
00344         QString t(it->f_tld);
00345         if(t.length() > max_tld_length)
00346         {
00347             max_tld_length = t.length();
00348         }
00349         for(int i = t.length() - 1, j = i + 1, k = j; i >= 0; --i)
00350         {
00351             QChar c = t.at(i);
00352             short u = c.unicode();
00353             if(u == '.')
00354             {
00355                 // periods are accepted, but not one after another or just before a dash
00356                 if(i + 1 == j)
00357                 {
00358                     // this captures an ending period which we don't allow in our files (although it is legal in a domain name)
00359                     if(j == t.length())
00360                     {
00361                         std::cerr << "error: an ending period is not acceptable in a TLD name; found in \"" << t.toUtf8().data() << "\"\n";
00362                     }
00363                     else
00364                     {
00365                         std::cerr << "error: two periods one after another is not acceptable in a TLD name; found in \"" << t.toUtf8().data() << "\"\n";
00366                     }
00367                     exit(1);
00368                 }
00369                 if(i + 1 == k)
00370                 {
00371                     std::cerr << "error: a dash cannot be just after a period; problem found in \"" << t.toUtf8().data() << "\"\n";
00372                     exit(1);
00373                 }
00374                 j = i;
00375                 k = i;
00376             }
00377             else if(i == 0)
00378             {
00379                 std::cerr << "error: the TLD must start with a period; problem found in \"" << t.toUtf8().data() << "\"\n";
00380                 exit(1);
00381             }
00382             else if(u == '-')
00383             {
00384                 if(i + 1 == k)
00385                 {
00386                     if(k == t.length())
00387                     {
00388                         std::cerr << "error: a dash cannot be found at the end of a TLD; problem found in \"" << t.toUtf8().data() << "\"\n";
00389                     }
00390                     else
00391                     {
00392                         std::cerr << "error: a dash cannot be just before a period; problem found in \"" << t.toUtf8().data() << "\"\n";
00393                     }
00394                     exit(1);
00395                 }
00396                 k = i;
00397             }
00398             else if(!c.isLetterOrNumber())
00399             {
00400                 // we accept a certain number of signs that are not
00401                 // otherwise considered letters...
00402                 switch(c.unicode()) {
00403                 case 0x093E: // devanagari vowel sign AA
00404                 case 0x0982: // Bengali Sign Anusvara
00405                 case 0x09BE: // Bengali Vowel Sign AA
00406                 case 0x0A3E: // Gurmukhi Vowel Sign AA
00407                 case 0x0ABE: // Gujarati Vowel Sign AA
00408                 case 0x0BBE: // Tamil Dependent Vowel Sign AA
00409                 case 0x0BBF: // Tamil Dependent Vowel Sign I
00410                 case 0x0BC2: // Tamil Vowel Sign UU
00411                 case 0x0BC8: // Tamil Vowel Sign AI
00412                 case 0x0BCD: // Tamil Sign Virama
00413                 case 0x0C3E: // Telugu Vowel Sign AA
00414                 case 0x0C4D: // Telugu Sign Virama
00415                 case 0x0D82: // Sinhala Sign Anusvaraya
00416                 case 0x0DCF: // Sinhala Vowel Sign Aela-Pilla
00417                     break;
00418 
00419                 default:
00420                     std::cerr << "error: a TLD can only be composed of letters and numbers and dashes; problem found in \""
00421                         << t.toUtf8().data() << "\" -- letter: &#x" << std::hex << (int)c.unicode() << std::dec << "; chr(" << c.unicode() << ")\n";
00422                 }
00423             }
00424             //else we're good
00425         }
00426 
00427         if(it->f_category_name == "international")
00428         {
00429             it->f_category = "TLD_CATEGORY_INTERNATIONAL";
00430         }
00431         else if(it->f_category_name == "professionals")
00432         {
00433             it->f_category = "TLD_CATEGORY_PROFESSIONALS";
00434         }
00435         else if(it->f_category_name == "language")
00436         {
00437             it->f_category = "TLD_CATEGORY_LANGUAGE";
00438         }
00439         else if(it->f_category_name == "groups")
00440         {
00441             it->f_category = "TLD_CATEGORY_GROUPS";
00442         }
00443         else if(it->f_category_name == "region")
00444         {
00445             it->f_category = "TLD_CATEGORY_REGION";
00446         }
00447         else if(it->f_category_name == "technical")
00448         {
00449             it->f_category = "TLD_CATEGORY_TECHNICAL";
00450         }
00451         else if(it->f_category_name == "country")
00452         {
00453             it->f_category = "TLD_CATEGORY_COUNTRY";
00454         }
00455         else if(it->f_category_name == "entrepreneurial")
00456         {
00457             it->f_category = "TLD_CATEGORY_ENTREPRENEURIAL";
00458         }
00459         else
00460         {
00461             std::cerr << "error: unknown category \"" << it->f_category_name.toUtf8().data() << "\"\n";
00462             exit(1);
00463         }
00464 
00465         // if within a <forbid> tag we have a reason too
00466         if(it->f_reason_name == "proposed")
00467         {
00468             it->f_reason = "TLD_STATUS_PROPOSED";
00469         }
00470         else if(it->f_reason_name == "deprecated")
00471         {
00472             it->f_reason = "TLD_STATUS_DEPRECATED";
00473         }
00474         else if(it->f_reason_name == "unused")
00475         {
00476             it->f_reason = "TLD_STATUS_UNUSED";
00477         }
00478         else if(it->f_reason_name == "reserved")
00479         {
00480             it->f_reason = "TLD_STATUS_RESERVED";
00481         }
00482         else if(it->f_reason_name == "infrastructure")
00483         {
00484             it->f_reason = "TLD_STATUS_INFRASTRUCTURE";
00485         }
00486         else if(!it->f_reason_name.isEmpty())
00487         {
00488             std::cerr << "error: unknown reason \"" << it->f_reason_name.toUtf8().data() << "\"\n";
00489             exit(1);
00490         }
00491         else
00492         {
00493             it->f_reason = "TLD_STATUS_VALID";
00494         }
00495     }
00496     // At time of writing it is 21 characters
00497     //std::cout << "longest TLD is " << max_tld_length << "\n";
00498 }
00499 
00500 
00501 QFile out_file("tld_data.c");
00502 QTextStream out;
00503 void setup_output()
00504 {
00505     if(!out_file.open(QIODevice::WriteOnly)) {
00506         std::cerr << "error: cannot open snap_path_tld.cpp output file\n";
00507         exit(1);
00508     }
00509     out.setDevice(&out_file);
00510 }
00511 
00512 
00513 
00514 void output_utf8(const QString& str)
00515 {
00516     QByteArray utf8_buffer = str.toUtf8();
00517     const char *utf8 = utf8_buffer.data();
00518     int max = strlen(utf8);
00519     for(int i = 0; i < max; ++i)
00520     {
00521         unsigned char u(utf8[i]);
00522         if(u > 0x7F)
00523         {
00524             // funny looking, but to avoid problems with the next
00525             // character we put this one \x## inside a standalone
00526             // string... remember that multiple strings one after
00527             // another are simply concatenated in C/C++
00528             out << "\"\"\\x" << hex << (u & 255) << dec << "\"\"";
00529         }
00530         else
00531         {
00532             out << (char)u;
00533         }
00534     }
00535 }
00536 
00537 void output_countries(const country_map_t& countries)
00538 {
00539     int max(0);
00540     for(country_map_t::const_iterator it = countries.begin();
00541                             it != countries.end();
00542                             ++it)
00543     {
00544         if(it.value() > max)
00545         {
00546             max = it.value();
00547         }
00548     }
00549 
00550     // first entry is used for international, etc.
00551     for(int i = 1; i <= max; ++i)
00552     {
00553         out << "const char tld_country" << i << "[] = \"";
00554         output_utf8(countries.key(i));
00555         out << "\";\n";
00556     }
00557 }
00558 
00559 void save_offset(tld_info_map_t& map, const QString& tld, int offset)
00560 {
00561     int e = tld.lastIndexOf('!', -2);
00562     QString parent = tld.left(e + 1);
00563     if(!map.contains(parent))
00564     {
00565         std::cerr << "error: TLD \"" << tld.toUtf8().data()
00566                     << "\" does not have a corresponding TLD at the previous level (i.e. \""
00567                     << parent.toUtf8().data() << "\").\n";
00568         exit(1);
00569     }
00570     if(map[parent].f_start_offset == USHRT_MAX)
00571     {
00572         map[parent].f_start_offset = offset;
00573     }
00574     map[parent].f_end_offset = offset + 1;
00575 }
00576 
00577 void output_tlds(tld_info_map_t& map,
00578                 const country_map_t& countries)
00579 {
00580     // to create the table below we want one entry with an
00581     // empty TLD and that will appear last with the info we
00582     // need to search level 1
00583     tld_info tld;
00584     tld.f_category_name = "international";
00585     tld.f_country = "";
00586     tld.f_level = 0;
00587     tld.f_tld = "";
00588     tld.f_inverted = "";
00589     tld.f_reason_name = "TLD_STATUS_VALID";
00590     tld.f_exception_apply_to = "";
00591     tld.f_offset = 0;
00592     tld.f_start_offset = USHRT_MAX;
00593     tld.f_end_offset = USHRT_MAX;
00594 
00595     map[""] = tld; // top-level (i.e. level 0)
00596 
00597     // first we determine the longest TLD in terms of levels
00598     // (i.e. number of periods)
00599     int max_level(0);
00600     for(tld_info_map_t::const_iterator it = map.begin();
00601                             it != map.end();
00602                             ++it)
00603     {
00604         if(max_level < it->f_level)
00605         {
00606             max_level = it->f_level;
00607         }
00608     }
00609 
00610     // define the offsets used with the exceptions
00611     int i(0);
00612     for(int level = max_level; level > 0; --level)
00613     {
00614         for(tld_info_map_t::iterator it = map.begin();
00615                                 it != map.end();
00616                                 ++it)
00617         {
00618             if(it->f_level == level)
00619             {
00620                 it->f_offset = i;
00621                 ++i;
00622             }
00623         }
00624     }
00625 
00626     // now we output the table with the largest levels first,
00627     // as we do so we save the index of the start and stop
00628     // points of each level in the previous level (hence the
00629     // need for a level 0 entry)
00630     out << "const struct tld_description tld_descriptions[] =\n{\n";
00631     int base_max(0);
00632     i = 0;
00633     for(int level = max_level; level > 0; --level)
00634     {
00635         for(tld_info_map_t::const_iterator it = map.begin();
00636                                 it != map.end();
00637                                 ++it)
00638         {
00639             if(it->f_level == level)
00640             {
00641                 if(i != 0)
00642                 {
00643                     out << ",\n";
00644                 }
00645                 unsigned short apply_to(USHRT_MAX);
00646                 //unsigned char exception_level(USHRT_MAX);
00647                 QString status(it->f_reason);
00648                 if(!it->f_exception_apply_to.isEmpty()) {
00649                     status = "TLD_STATUS_EXCEPTION";
00650                     apply_to = map[it->f_exception_apply_to].f_offset;
00651                 }
00652                 out << "\t/* " << i << " */ { " << it->f_category.toUtf8().data()
00653                                     << ", " << status.toUtf8().data()
00654                                     << ", " << it->f_start_offset
00655                                     << ", " << it->f_end_offset
00656                                     << ", " << apply_to
00657                                     << ", " << it->f_level
00658                                     << ", \"";
00659                 save_offset(map, it->f_inverted, i);
00660                 // we only have to save the current level
00661                 int e = it->f_inverted.lastIndexOf('!', -2);
00662                 QString base(it->f_inverted.mid(e + 1, it->f_inverted.length() - e - 2));
00663                 if(base.length() > base_max)
00664                 {
00665                     base_max = base.length();
00666                 }
00667                 output_utf8(base);
00668                 if(it->f_category == "TLD_CATEGORY_COUNTRY")
00669                 {
00670                     out << "\", tld_country" << countries[it->f_country];
00671                 }
00672                 else
00673                 {
00674                     out << "\", (const char *) 0";
00675                 }
00676                 out << " }";
00677                 ++i;
00678             }
00679         }
00680     }
00681     out << "\n};\n";
00682 
00683     out << "unsigned short tld_start_offset = " << map[""].f_start_offset << ";\n";
00684     out << "unsigned short tld_end_offset = " << map[""].f_end_offset << ";\n";
00685     out << "int tld_max_level = " << max_level << ";\n";
00686 }
00687 
00688 
00689 void output_offsets(const tld_info_map_t& map,
00690                     const tld_info_letters_t& letters)
00691 {
00692     // we know that the table always starts at zero so we skip the first
00693     // entry (plus the first entry is for the '%' which is not contiguous
00694     // with 'a')
00695     out << "const int tld_offsets[] = {\n";
00696     for(tld_info_letters_t::const_iterator it = letters.begin() + 1;
00697                             it != letters.end();
00698                             ++it)
00699     {
00700         out << "\t/* '" << (char)it.key() << "' */ " << it.value() << ",\n";
00701     }
00702     out << "\t/* total size */ " << map.size() << "\n};\n";
00703 }
00704 
00705 
00706 void output_header()
00707 {
00708     out << "/* *** AUTO-GENERATED *** DO NOT EDIT ***\n";
00709     out << " * This list of TLDs was auto-generated using snap_path_parser.cpp.\n";
00710     out << " * Fix the parser or XML file used as input instead of this file.\n";
00711     out << " *\n";
00712     out << " * Copyright (C) 2011  Made to Order Software Corp.\n";
00713     out << " *\n";
00714     out << " * This program is free software; you can redistribute it and/or modify\n";
00715     out << " * it under the terms of the GNU General Public License as published by\n";
00716     out << " * the Free Software Foundation; either version 2 of the License, or\n";
00717     out << " * (at your option) any later version.\n";
00718     out << " *\n";
00719     out << " * This program is distributed in the hope that it will be useful,\n";
00720     out << " * but WITHOUT ANY WARRANTY; without even the implied warranty of\n";
00721     out << " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n";
00722     out << " * GNU General Public License for more details.\n";
00723     out << " *\n";
00724     out << " * You should have received a copy of the GNU General Public License\n";
00725     out << " * along with this program; if not, write to the Free Software\n";
00726     out << " * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA\n";
00727     out << " */\n";
00728     out << "#include \"tld_data.h\"\n";
00729     out << "#include \"tld.h\"\n";
00730 }
00731 
00732 void output_footer()
00733 {
00734 }
00735 
00736 
00737 // this is useful to see what the heck we're working on
00738 void output_map(const tld_info_map_t& map)
00739 {
00740     for(tld_info_map_t::const_iterator it = map.begin();
00741                             it != map.end();
00742                             ++it)
00743     {
00744         std::cout << it->f_tld.toUtf8().data() << ":"
00745             << it->f_category_name.toUtf8().data();
00746         if(!it->f_country.isNull()) {
00747             std::cout << " (" << it->f_country.toUtf8().data() << ")";
00748         }
00749         if(!it->f_reason_name.isNull()) {
00750             std::cout << " [" << it->f_reason_name.toUtf8().data() << "]";
00751         }
00752         std::cout << "\n";
00753     }
00754 }
00755 
00756 
00757 } // namespace snap
00758 
00759 
00760 
00761 int main(int argc, char *argv[])
00762 {
00763     if(argc != 2) {
00764         std::cerr << "error: usage 'tld_parser <path>'\n";
00765         exit(1);
00766     }
00767     snap::tld_info_map_t map;
00768     snap::country_map_t countries;
00769     //snap::tld_info_letters_t letters;
00770     snap::read_tlds(argv[1], map, countries);
00771     snap::verify_data(map);
00772     snap::setup_output();
00773     snap::output_header();
00774     snap::output_countries(countries);
00775     snap::output_tlds(map, countries);
00776     //snap::output_offsets(map, letters); -- letters is not computed
00777     snap::output_footer();
00778     //snap::output_map(map);
00779 }
00780 
00781 
00782 // vim: ts=4 sw=4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines

This document is part of the libtld Project.

Copyright by Made to Order Software Corp.