libtld 1.2.0
|
00001 /* TLD library -- test the TLD interface against the Mozilla effective TLD names 00002 * Copyright (C) 2011 Made to Order Software Corp. 00003 * 00004 * This program is free software; you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation; either version 2 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program; if not, write to the Free Software 00016 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00017 */ 00018 00019 #include "tld.h" 00020 #include <string> 00021 #include <vector> 00022 #include <stdlib.h> 00023 #include <stdio.h> 00024 #include <boost/algorithm/string.hpp> 00025 #include <QtCore/QString> 00026 00027 00028 00029 int err_count = 0; 00030 00031 /* 00032 * This test calls the tld() function with all the TLDs as defined 00033 * by Mozilla to determine whether we are up to date. 00034 * 00035 * extern enum tld_result tld(const char *uri, struct tld_info *info); 00036 */ 00037 00038 typedef std::vector<std::string> string_vector_t; 00039 string_vector_t tlds; 00040 00041 00046 QString tld_encode(const QString& tld, int& level) 00047 { 00048 QString result; 00049 level = 0; 00050 00051 QByteArray utf8 = tld.toUtf8(); 00052 int l(0); 00053 int max(utf8.length()); 00054 const char *p = utf8.data(); 00055 for(int l = 0; l < max; ++l) { 00056 char c(p[l]); 00057 if(static_cast<unsigned char>(c) < 0x20) { 00058 fprintf(stderr, "error: controls characters (^%c) are not allowed in TLDs (%s).\n", c, p); 00059 exit(1); 00060 } 00061 if((c >= 'A' && c <= 'Z') 00062 || (c >= 'a' && c <= 'z') 00063 || (c >= '0' && c <= '9') 00064 || c == '.' || c == '-') 00065 { 00066 // these are accepted as is; note that we already checked the 00067 // validty of the data w 00068 if(c == '.') 00069 { 00070 ++level; 00071 } 00072 result += c; 00073 } 00074 else 00075 { 00076 // add/remove as appropriate 00077 if(c == '/' || c == ':' || c == '&') { 00078 fprintf(stderr, "error: character (^%c) is not allowed in TLDs.\n", c); 00079 exit(1); 00080 } 00081 result += '%'; 00082 QString v(QString("%1").arg(c & 255, 2, 16, QLatin1Char('0'))); 00083 result += v[0]; 00084 result += v[1]; 00085 } 00086 } 00087 // at this time the maximum level we declared is 4 but there are cases 00088 // where countries defined 5 levels (which is definitively crazy!) 00089 if(level < 0 || level > 5) 00090 { 00091 fprintf(stderr, "error: level out of range (%d) if larger than the maximum limit, you may want to increase the limit.\n", level); 00092 exit(1); 00093 } 00094 00095 return result; 00096 } 00097 00098 00099 /* 00100 * The function reads the effective_tld_names.dat file in memory. 00101 * 00102 * We call exit(1) if we find an error while reading the data. 00103 */ 00104 void test_load() 00105 { 00106 FILE *f = fopen("effective_tld_names.dat", "r"); 00107 char buf[256]; 00108 buf[sizeof(buf) -1] = '\0'; 00109 int line(0); 00110 while(fgets(buf, sizeof(buf) - 1, f) != NULL) { 00111 ++line; 00112 int l = strlen(buf); 00113 if(l == sizeof(buf) - 1) { 00114 // the fgets() failed in this case so forget it 00115 fprintf(stderr, "effective_tld_names.data:%d:error: line too long.\n", line); 00116 ++err_count; 00117 } 00118 else { 00119 std::string s(buf); 00120 boost::algorithm::trim(s); 00121 if(s.length() == 1) { 00122 // all TLDs are at least 2 characters 00123 fprintf(stderr, "effective_tld_names.data:%d:error: line too long.\n", line); 00124 ++err_count; 00125 } 00126 else if(s.length() > 1 && s[0] != '/' && s[1] != '/') { 00127 // this is not a comment and not an empty line, that's a TLD 00128 tlds.push_back(s); 00129 //printf("found [%s]\n", s.c_str()); 00130 } 00131 } 00132 } 00133 } 00134 00135 00136 /* 00137 * This test checks out URIs that end with an invalid TLD. This is 00138 * expected to return an error every single time. 00139 */ 00140 void test_tlds() 00141 { 00142 for(string_vector_t::const_iterator it(tlds.begin()); it != tlds.end(); ++it) { 00143 tld_info info; 00144 std::string url("www.this-is-a-long-domain-name-that-should-not-make-it-in-a-tld."); 00145 if(it->at(0) == '*') { 00146 url = "we-want-to-test-just-one-domain-name" + it->substr(1); 00147 tld_result r = tld(url.c_str(), &info); 00148 if(r == TLD_RESULT_SUCCESS) { 00149 // if it worked then we have a problem 00150 fprintf(stderr, "error: tld(\"%s\", &info) accepted when 2nd level names are not accepted.\n", 00151 it->c_str()); 00152 ++err_count; 00153 } 00154 else if(r != TLD_RESULT_INVALID) { 00155 // we're good if invalid since that's what we expect in this case 00156 // any other result is an error 00157 fprintf(stderr, "error: tld(\"%s\", &info) failed.\n", it->c_str()); 00158 ++err_count; 00159 } 00160 } 00161 else if(it->at(0) != '!') { 00162 url += *it; 00163 int level; 00164 QString utf16(QString::fromUtf8(url.c_str())); 00165 QString u(tld_encode(utf16, level)); 00166 QByteArray uri(u.toUtf8()); 00167 tld_result r = tld(uri.data(), &info); 00168 if(r == TLD_RESULT_SUCCESS || r == TLD_RESULT_INVALID) { 00169 // it succeeded, but is it the right length? 00170 utf16 = QString::fromUtf8(it->c_str()); 00171 u = tld_encode(utf16, level); 00172 if(strlen(info.f_tld) != u.size() + 1) { 00173 fprintf(stderr, "error: tld(\"%s\", &info) length mismatch (\"%s\", %d/%d).\n", 00174 uri.data(), info.f_tld, (int)strlen(info.f_tld), (int)(u.size() + 1)); 00175 QString s(QString::fromUtf8(it->c_str())); 00176 fprintf(stderr, "%d> %s [%s] -> %d ", r, it->c_str(), u.toUtf8().data(), s.length()); 00177 for(int i(0); i < s.length(); ++i) { 00178 fprintf(stderr, "&#x%04X;", s.at(i).unicode()); 00179 } 00180 fprintf(stderr, "\n"); 00181 ++err_count; 00182 } 00183 } 00184 else { 00185 //fprintf(stderr, "error: tld(\"%s\", &info) failed.\n", it->c_str()); 00186 QString s(QString::fromUtf8(it->c_str())); 00187 printf("%d> %s [%s] -> %d ", r, it->c_str(), u.toUtf8().data(), s.length()); 00188 for(int i(0); i < s.length(); ++i) { 00189 printf("&#x%04X;", s.at(i).unicode()); 00190 } 00191 printf("\n"); 00192 ++err_count; 00193 } 00194 } 00195 } 00196 } 00197 00198 00199 00200 00201 int main(int argc, char *argv[]) 00202 { 00203 fprintf(stderr, "testing tld version %s\n", tld_version()); 00204 00205 /* call all the tests, one by one 00206 * failures are "recorded" in the err_count global variable 00207 * and the process stops with an error message and exit(1) 00208 * if err_count is not zero. 00209 */ 00210 test_load(); 00211 00212 if(err_count == 0) { 00213 test_tlds(); 00214 } 00215 00216 if(err_count) 00217 { 00218 fprintf(stderr, "%d error%s occured.\n", 00219 err_count, err_count != 1 ? "s" : ""); 00220 } 00221 exit(err_count ? 1 : 0); 00222 } 00223 00224 /* vim: ts=4 sw=4 00225 */
This document is part of the libtld Project.
Copyright by Made to Order Software Corp.