libtld 1.2.0

tld_test_tld_names.cpp

Go to the documentation of this file.
00001 /* TLD library -- test the TLD interface against the Mozilla effective TLD names
00002  * Copyright (C) 2011  Made to Order Software Corp.
00003  *
00004  * This program is free software; you can redistribute it and/or modify
00005  * it under the terms of the GNU General Public License as published by
00006  * the Free Software Foundation; either version 2 of the License, or
00007  * (at your option) any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software
00016  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00017  */
00018 
00019 #include "tld.h"
00020 #include <string>
00021 #include <vector>
00022 #include <stdlib.h>
00023 #include <stdio.h>
00024 #include <boost/algorithm/string.hpp>
00025 #include <QtCore/QString>
00026 
00027 
00028 
00029 int err_count = 0;
00030 
00031 /*
00032  * This test calls the tld() function with all the TLDs as defined
00033  * by Mozilla to determine whether we are up to date.
00034  *
00035  * extern enum tld_result tld(const char *uri, struct tld_info *info);
00036  */
00037 
00038 typedef std::vector<std::string> string_vector_t;
00039 string_vector_t tlds;
00040 
00041 
00046 QString tld_encode(const QString& tld, int& level)
00047 {
00048     QString result;
00049     level = 0;
00050 
00051     QByteArray utf8 = tld.toUtf8();
00052     int l(0);
00053     int max(utf8.length());
00054     const char *p = utf8.data();
00055     for(int l = 0; l < max; ++l) {
00056         char c(p[l]);
00057         if(static_cast<unsigned char>(c) < 0x20) {
00058             fprintf(stderr, "error: controls characters (^%c) are not allowed in TLDs (%s).\n", c, p);
00059             exit(1);
00060         }
00061         if((c >= 'A' && c <= 'Z')
00062         || (c >= 'a' && c <= 'z')
00063         || (c >= '0' && c <= '9')
00064         || c == '.' || c == '-')
00065         {
00066             // these are accepted as is; note that we already checked the
00067             // validty of the data w
00068             if(c == '.')
00069             {
00070                 ++level;
00071             }
00072             result += c;
00073         }
00074         else
00075         {
00076             // add/remove as appropriate
00077             if(c == '/' || c == ':' || c == '&') {
00078                 fprintf(stderr, "error: character (^%c) is not allowed in TLDs.\n", c);
00079                 exit(1);
00080             }
00081             result += '%';
00082             QString v(QString("%1").arg(c & 255, 2, 16, QLatin1Char('0')));
00083             result += v[0];
00084             result += v[1];
00085         }
00086     }
00087     // at this time the maximum level we declared is 4 but there are cases
00088     // where countries defined 5 levels (which is definitively crazy!)
00089     if(level < 0 || level > 5)
00090     {
00091         fprintf(stderr, "error: level out of range (%d) if larger than the maximum limit, you may want to increase the limit.\n", level);
00092         exit(1);
00093     }
00094 
00095     return result;
00096 }
00097 
00098 
00099 /*
00100  * The function reads the effective_tld_names.dat file in memory.
00101  *
00102  * We call exit(1) if we find an error while reading the data.
00103  */
00104 void test_load()
00105 {
00106     FILE *f = fopen("effective_tld_names.dat", "r");
00107     char buf[256];
00108     buf[sizeof(buf) -1] = '\0';
00109     int line(0);
00110     while(fgets(buf, sizeof(buf) - 1, f) != NULL) {
00111         ++line;
00112         int l = strlen(buf);
00113         if(l == sizeof(buf) - 1) {
00114             // the fgets() failed in this case so forget it
00115             fprintf(stderr, "effective_tld_names.data:%d:error: line too long.\n", line);
00116             ++err_count;
00117         }
00118         else {
00119             std::string s(buf);
00120             boost::algorithm::trim(s);
00121             if(s.length() == 1) {
00122                 // all TLDs are at least 2 characters
00123                 fprintf(stderr, "effective_tld_names.data:%d:error: line too long.\n", line);
00124                 ++err_count;
00125             }
00126             else if(s.length() > 1 && s[0] != '/' && s[1] != '/') {
00127                 // this is not a comment and not an empty line, that's a TLD
00128                 tlds.push_back(s);
00129 //printf("found [%s]\n", s.c_str());
00130             }
00131         }
00132     }
00133 }
00134 
00135 
00136 /*
00137  * This test checks out URIs that end with an invalid TLD. This is
00138  * expected to return an error every single time.
00139  */
00140 void test_tlds()
00141 {
00142     for(string_vector_t::const_iterator it(tlds.begin()); it != tlds.end(); ++it) {
00143         tld_info info;
00144         std::string url("www.this-is-a-long-domain-name-that-should-not-make-it-in-a-tld.");
00145         if(it->at(0) == '*') {
00146             url = "we-want-to-test-just-one-domain-name" + it->substr(1);
00147             tld_result r = tld(url.c_str(), &info);
00148             if(r == TLD_RESULT_SUCCESS) {
00149                 // if it worked then we have a problem
00150                 fprintf(stderr, "error: tld(\"%s\", &info) accepted when 2nd level names are not accepted.\n",
00151                         it->c_str());
00152                 ++err_count;
00153             }
00154             else if(r != TLD_RESULT_INVALID) {
00155                 // we're good if invalid since that's what we expect in this case
00156                 // any other result is an error
00157                 fprintf(stderr, "error: tld(\"%s\", &info) failed.\n", it->c_str());
00158                 ++err_count;
00159             }
00160         }
00161         else if(it->at(0) != '!') {
00162             url += *it;
00163             int level;
00164             QString utf16(QString::fromUtf8(url.c_str()));
00165             QString u(tld_encode(utf16, level));
00166             QByteArray uri(u.toUtf8());
00167             tld_result r = tld(uri.data(), &info);
00168             if(r == TLD_RESULT_SUCCESS || r == TLD_RESULT_INVALID) {
00169                 // it succeeded, but is it the right length?
00170                 utf16 = QString::fromUtf8(it->c_str());
00171                 u = tld_encode(utf16, level);
00172                 if(strlen(info.f_tld) != u.size() + 1) {
00173                     fprintf(stderr, "error: tld(\"%s\", &info) length mismatch (\"%s\", %d/%d).\n",
00174                             uri.data(), info.f_tld, (int)strlen(info.f_tld), (int)(u.size() + 1));
00175 QString s(QString::fromUtf8(it->c_str()));
00176 fprintf(stderr, "%d> %s [%s] -> %d ", r, it->c_str(), u.toUtf8().data(), s.length());
00177 for(int i(0); i < s.length(); ++i) {
00178 fprintf(stderr, "&#x%04X;", s.at(i).unicode());
00179 }
00180 fprintf(stderr, "\n");
00181                     ++err_count;
00182                 }
00183             }
00184             else {
00185                 //fprintf(stderr, "error: tld(\"%s\", &info) failed.\n", it->c_str());
00186 QString s(QString::fromUtf8(it->c_str()));
00187 printf("%d> %s [%s] -> %d ", r, it->c_str(), u.toUtf8().data(), s.length());
00188 for(int i(0); i < s.length(); ++i) {
00189 printf("&#x%04X;", s.at(i).unicode());
00190 }
00191 printf("\n");
00192                 ++err_count;
00193             }
00194         }
00195     }
00196 }
00197 
00198 
00199 
00200 
00201 int main(int argc, char *argv[])
00202 {
00203     fprintf(stderr, "testing tld version %s\n", tld_version());
00204 
00205     /* call all the tests, one by one
00206      * failures are "recorded" in the err_count global variable
00207      * and the process stops with an error message and exit(1)
00208      * if err_count is not zero.
00209      */
00210     test_load();
00211 
00212     if(err_count == 0) {
00213         test_tlds();
00214     }
00215 
00216     if(err_count)
00217     {
00218         fprintf(stderr, "%d error%s occured.\n",
00219                     err_count, err_count != 1 ? "s" : "");
00220     }
00221     exit(err_count ? 1 : 0);
00222 }
00223 
00224 /* vim: ts=4 sw=4
00225  */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines

This document is part of the libtld Project.

Copyright by Made to Order Software Corp.