libtld 1.2.0

tld.c

Go to the documentation of this file.
00001 /* TLD library -- TLD, domain name, and sub-domain extraction
00002  * Copyright (C) 2011  Made to Order Software Corp.
00003  *
00004  * This program is free software; you can redistribute it and/or modify
00005  * it under the terms of the GNU General Public License as published by
00006  * the Free Software Foundation; either version 2 of the License, or
00007  * (at your option) any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software
00016  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00017  */
00018 
00019 #include "tld.h"
00020 #include "tld_data.h"
00021 #include <malloc.h>
00022 #include <limits.h>
00023 
00024 
00025 
00110 int cmp(const char *a, const char *b, int n)
00111 {
00112     /* n represents the maximum number of characters to check in b */
00113     while(n > 0 && *a != '\0')
00114     {
00115         if(*a < *b)
00116         {
00117             return -1;
00118         }
00119         if(*a > *b)
00120         {
00121             return 1;
00122         }
00123         ++a;
00124         ++b;
00125         --n;
00126     }
00127     if(*a == '\0')
00128     {
00129         if(n > 0)
00130         {
00131             return -1;
00132         }
00133         return 0;
00134     }
00135     /* in this case n == 0 so a is larger */
00136     return 1;
00137 }
00138 
00169 int search(int i, int j, const char *domain, int n)
00170 {
00171     int p, r;
00172 
00173     while(i < j)
00174     {
00175         p = (j - i) / 2 + i;
00176         const struct tld_description *tld = tld_descriptions + p;
00177         r = cmp(tld->f_tld, domain, n);
00178         if(r < 0)
00179         {
00180             /* eliminate the first half */
00181             i = p + 1;
00182         }
00183         else if(r > 0)
00184         {
00185             /* eliminate the second half */
00186             j = p;
00187         }
00188         else
00189         {
00190             /* match */
00191             return p;
00192         }
00193     }
00194 
00195     return -1;
00196 }
00197 
00198 
00315 enum tld_result tld(const char *uri, struct tld_info *info)
00316 {
00317     const char *end = uri;
00318     const char **level_ptr;
00319     int level = 0, start_level, i, r, p;
00320 
00321     /* set defaults in the info structure */
00322     info->f_category = TLD_CATEGORY_UNDEFINED;
00323     info->f_status = TLD_STATUS_UNDEFINED;
00324     info->f_country = (const char *) 0;
00325     info->f_tld = (const char *) 0;
00326     info->f_offset = -1;
00327 
00328     if(uri == (const char *) 0 || uri[0] == '\0')
00329     {
00330         return TLD_RESULT_NULL;
00331     }
00332 
00333     /* Note: We never free the pointer, not necessary since we
00334      *       just quit once we're done; it is released then. */
00335     level_ptr = malloc(sizeof(const char *) * tld_max_level);
00336 
00337     while(*end != '\0')
00338     {
00339         if(*end == '.')
00340         {
00341             if(level >= tld_max_level)
00342             {
00343                 /* At this point the maximum number of levels in the
00344                  * TLDs is 5
00345                  */
00346                 for(i = 1; i < tld_max_level; ++i)
00347                 {
00348                     level_ptr[i - 1] = level_ptr[i];
00349                 }
00350                 level_ptr[tld_max_level - 1] = end;
00351             }
00352             else
00353             {
00354                 level_ptr[level] = end;
00355                 ++level;
00356             }
00357             if(level >= 2 && level_ptr[level - 2] + 1 == level_ptr[level - 1])
00358             {
00359                 /* two periods one after another */
00360                 return TLD_RESULT_BAD_URI;
00361             }
00362         }
00363         ++end;
00364     }
00365     /* if level is not at least 1 then there are no period */
00366     if(level == 0)
00367     {
00368         /* no TLD */
00369         return TLD_RESULT_NO_TLD;
00370     }
00371 
00372     start_level = level;
00373     --level;
00374     r = search(tld_start_offset, tld_end_offset,
00375                 level_ptr[level] + 1, end - level_ptr[level] - 1);
00376     if(r == -1)
00377     {
00378         /* unknown */
00379         return TLD_RESULT_NOT_FOUND;
00380     }
00381 
00382     /* check for the next level if there is one */
00383     p = r;
00384     while(level > 0 && tld_descriptions[r].f_start_offset != USHRT_MAX)
00385     {
00386         r = search(tld_descriptions[r].f_start_offset,
00387                 tld_descriptions[r].f_end_offset,
00388                 level_ptr[level - 1] + 1,
00389                 level_ptr[level] - level_ptr[level - 1] - 1);
00390         if(r == -1)
00391         {
00392             /* we are done, return the previous level */
00393             break;
00394         }
00395         p = r;
00396         --level;
00397     }
00398 
00399     /* did we hit an exception? */
00400     if(tld_descriptions[p].f_status == TLD_STATUS_EXCEPTION)
00401     {
00402         /* return the actual TLD and not the exception */
00403         p = tld_descriptions[p].f_exception_apply_to;
00404         ++level;
00405 //fprintf(stderr, "returning %d/%d (%d/%d)\n", p, tld_descriptions[p].f_exception_level, level, start_level);
00406         level = start_level - tld_descriptions[p].f_exception_level;
00407     }
00408 
00409     /* return a valid result */
00410     info->f_category = tld_descriptions[p].f_category;
00411     info->f_status = tld_descriptions[p].f_status;
00412     info->f_country = tld_descriptions[p].f_country;
00413     info->f_tld = level_ptr[level];
00414     info->f_offset = level_ptr[level] - uri;
00415 
00416     return info->f_status == TLD_STATUS_VALID
00417                 ? TLD_RESULT_SUCCESS
00418                 : TLD_RESULT_INVALID;
00419 }
00420 
00421 
00432 const char *tld_version()
00433 {
00434     return LIBTLD_VERSION;
00435 }
00436 
00437 
00777 /* vim: ts=4 sw=4
00778  */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines

This document is part of the libtld Project.

Copyright by Made to Order Software Corp.