libtld 1.2.0
|
00001 /* TLD library -- TLD, domain name, and sub-domain extraction 00002 * Copyright (C) 2011 Made to Order Software Corp. 00003 * 00004 * This program is free software; you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation; either version 2 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program; if not, write to the Free Software 00016 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00017 */ 00018 00019 #include "tld.h" 00020 #include "tld_data.h" 00021 #include <malloc.h> 00022 #include <limits.h> 00023 00024 00025 00110 int cmp(const char *a, const char *b, int n) 00111 { 00112 /* n represents the maximum number of characters to check in b */ 00113 while(n > 0 && *a != '\0') 00114 { 00115 if(*a < *b) 00116 { 00117 return -1; 00118 } 00119 if(*a > *b) 00120 { 00121 return 1; 00122 } 00123 ++a; 00124 ++b; 00125 --n; 00126 } 00127 if(*a == '\0') 00128 { 00129 if(n > 0) 00130 { 00131 return -1; 00132 } 00133 return 0; 00134 } 00135 /* in this case n == 0 so a is larger */ 00136 return 1; 00137 } 00138 00169 int search(int i, int j, const char *domain, int n) 00170 { 00171 int p, r; 00172 00173 while(i < j) 00174 { 00175 p = (j - i) / 2 + i; 00176 const struct tld_description *tld = tld_descriptions + p; 00177 r = cmp(tld->f_tld, domain, n); 00178 if(r < 0) 00179 { 00180 /* eliminate the first half */ 00181 i = p + 1; 00182 } 00183 else if(r > 0) 00184 { 00185 /* eliminate the second half */ 00186 j = p; 00187 } 00188 else 00189 { 00190 /* match */ 00191 return p; 00192 } 00193 } 00194 00195 return -1; 00196 } 00197 00198 00315 enum tld_result tld(const char *uri, struct tld_info *info) 00316 { 00317 const char *end = uri; 00318 const char **level_ptr; 00319 int level = 0, start_level, i, r, p; 00320 00321 /* set defaults in the info structure */ 00322 info->f_category = TLD_CATEGORY_UNDEFINED; 00323 info->f_status = TLD_STATUS_UNDEFINED; 00324 info->f_country = (const char *) 0; 00325 info->f_tld = (const char *) 0; 00326 info->f_offset = -1; 00327 00328 if(uri == (const char *) 0 || uri[0] == '\0') 00329 { 00330 return TLD_RESULT_NULL; 00331 } 00332 00333 /* Note: We never free the pointer, not necessary since we 00334 * just quit once we're done; it is released then. */ 00335 level_ptr = malloc(sizeof(const char *) * tld_max_level); 00336 00337 while(*end != '\0') 00338 { 00339 if(*end == '.') 00340 { 00341 if(level >= tld_max_level) 00342 { 00343 /* At this point the maximum number of levels in the 00344 * TLDs is 5 00345 */ 00346 for(i = 1; i < tld_max_level; ++i) 00347 { 00348 level_ptr[i - 1] = level_ptr[i]; 00349 } 00350 level_ptr[tld_max_level - 1] = end; 00351 } 00352 else 00353 { 00354 level_ptr[level] = end; 00355 ++level; 00356 } 00357 if(level >= 2 && level_ptr[level - 2] + 1 == level_ptr[level - 1]) 00358 { 00359 /* two periods one after another */ 00360 return TLD_RESULT_BAD_URI; 00361 } 00362 } 00363 ++end; 00364 } 00365 /* if level is not at least 1 then there are no period */ 00366 if(level == 0) 00367 { 00368 /* no TLD */ 00369 return TLD_RESULT_NO_TLD; 00370 } 00371 00372 start_level = level; 00373 --level; 00374 r = search(tld_start_offset, tld_end_offset, 00375 level_ptr[level] + 1, end - level_ptr[level] - 1); 00376 if(r == -1) 00377 { 00378 /* unknown */ 00379 return TLD_RESULT_NOT_FOUND; 00380 } 00381 00382 /* check for the next level if there is one */ 00383 p = r; 00384 while(level > 0 && tld_descriptions[r].f_start_offset != USHRT_MAX) 00385 { 00386 r = search(tld_descriptions[r].f_start_offset, 00387 tld_descriptions[r].f_end_offset, 00388 level_ptr[level - 1] + 1, 00389 level_ptr[level] - level_ptr[level - 1] - 1); 00390 if(r == -1) 00391 { 00392 /* we are done, return the previous level */ 00393 break; 00394 } 00395 p = r; 00396 --level; 00397 } 00398 00399 /* did we hit an exception? */ 00400 if(tld_descriptions[p].f_status == TLD_STATUS_EXCEPTION) 00401 { 00402 /* return the actual TLD and not the exception */ 00403 p = tld_descriptions[p].f_exception_apply_to; 00404 ++level; 00405 //fprintf(stderr, "returning %d/%d (%d/%d)\n", p, tld_descriptions[p].f_exception_level, level, start_level); 00406 level = start_level - tld_descriptions[p].f_exception_level; 00407 } 00408 00409 /* return a valid result */ 00410 info->f_category = tld_descriptions[p].f_category; 00411 info->f_status = tld_descriptions[p].f_status; 00412 info->f_country = tld_descriptions[p].f_country; 00413 info->f_tld = level_ptr[level]; 00414 info->f_offset = level_ptr[level] - uri; 00415 00416 return info->f_status == TLD_STATUS_VALID 00417 ? TLD_RESULT_SUCCESS 00418 : TLD_RESULT_INVALID; 00419 } 00420 00421 00432 const char *tld_version() 00433 { 00434 return LIBTLD_VERSION; 00435 } 00436 00437 00777 /* vim: ts=4 sw=4 00778 */
This document is part of the libtld Project.
Copyright by Made to Order Software Corp.