Coding

Document Sample
Coding Powered By Docstoc
					Coding
        Search Engine                                                                                     มากมายหลายโปรแกรม
เพื่อความสะดวกและง่ายต่อการเรี ยกใช้ โปรแกรม                                     จึงได้ มีการจัดโปรแกรมแบ่งแยกย่อยเป็ นไดเรกทอรี
  ่
ซึง                                                                                                                                                  ต่อไป
        1. D i           r e c t o r y                                                   A d m i n                                          ประกอบด้ วย
             -       c     l e a          n u p _ c o m m o n                                                                              . p h             p
             -       c     l e a           n u p _ e n g i n e                                                                            . p h              p
             -       c    l e a          n u p _ k e y w o r d s                                                                             . p h           p
             -       d     e b u          g _ f u n c t i o n s                                                                             . p h            p
             -       f        i             l     e   s   .    p                                                                              h              p
             -       i        n             d     e   x    .   p                                                                               h             p
             -       r    o b o           t _ f u n c t i o n s                                                                            . p h             p
             -       s       p            i    d    e   r    .   p                                                                              h            p
             -       u       p            d     a   t   e    .   p                                                                              h            p
             -       u     p d           a t e _ f r a m e .                                                                                  p h            p
        2. D i       r e c t o r y                                           i n c l u d e s                                                ประกอบด้ วย
             -       c          o                n               f                   i        g                   .               p             h            p
             -       c         o         n                   n               e            c           t               .               p          h           p
             -       s           t                   y                   l               e                .                   p                h             p
        3. D i           r e c t o r y                                                        l i b s                                       ประกอบด้ วย
             -       a      u      t     h                                                     .      p     h     p
             -       f u n c t i o n _ p h p d                                                i g _ f o r m . p h p
             -       m y s q l _ f u n c                                                      t i o n s . p h p
             -       p h p d i g _ f u n c                                                       t i o n s . p h p
        4. D i       r e c t o r y                                               l o c a l e s                                              ประกอบด้ วย
             -       e     n         -       l           a           n           g       u        a           g           e       .        p         h       p
        5. D i           r e c t o r y                                                                s q l                                 ประกอบด้ วย
             -       i         n         i          _      d t       b     .      s     q      l
          นอกจากนี ้ยังมีโปรแกรมอีก 2 โปรแกรม คือ index.php ซึงเป็ นโปรแกรม search และ test1.php
                                                              ่
ซึงเป็ นโปรแกรมทีใช้ ในการแปล keyword ภาษาไทย เป็ น keyword ภาษาอังกฤษ ก่อนนาไป search หาข้ อมูล
  ่              ่
                         D i r e c t o r y                                               a d m i n
c l e a n u p _ c o m m o n . p h p
<                                                                              ?
$ r e l a t i v e _ s c r i p t _ p a t h                      =    ' . . ' ;
include "$relative_script_path/includes/config.php";
include "$relative_script_path/libs/auth.php";
?                                                                              >
<               h            t                m                l               >
<           /           h            e              a             d            >
<title>PhpDig : Cleanup common words</title>
<                                                                              ?
include "$relative_script_path/includes/style.php";
?                                                                              >
<           /           h            e              a             d            >
< b o d y             b g c o l o r = " w h i t e " >
< h 2 > C l e a n           c o m m o n             w o r d s < / h 2 >
<                                                                              ?
/ / s e t       t h e    m a x       t i m e          t o     a n     h o u r
s e t _ t i m e _ l i m i t ( 3 6 0 0 ) ;
$     n       u     m      t     o        t               =            0       ;
$common_words = common_words("$relative_script_path/includes/common_words.txt");
while (list($common) = each($common_words))
                                       {
   //list of common words in the keywords table
   $query = "select key_id from keywords where keyword like '$common'";
   $res = mysql_db_query($database,$query,$id_connect);
   i       f                 (         $          r         e        s         )
                                       {
   while (list($key_id) = mysql_fetch_row($res))
                                         {
      //delete references to this keyword in the engine table
      $query = "DELETE FROM engine WHERE key_id=$key_id";
      mysql_db_query($database,$query,$id_connect);
     $numdel = mysql_affected_rows($id_connect);
     print "$numdel deleted for $common ($key_id)<br>";
     $ n u m t o t             + =      $ n u m d e l ;
                                   }
  //delete this common word from the keywords table
  $query = "DELE TE from keywords where keyword like '$common'";
                                 }
  mysql_db_query($database,$query,$id_connect);
                                 }
print "<h3>Total $numtot cleaned.</h3>";
?                                                              >
<                    b                    r                    >
<A href="index.php" target="_top">[Back]</A> to admin interface.
<         /         b          o         d          y          >
<         /          h         t         m           l         >
c     l      e      a      n      u      p      _      e      n      g      i      n      e      .     p      h      p
<                                                                                                                      ?
$ r e l a t i v e _ s c r i p t _ p a t h                                                   =           ' . . ' ;
include "$relative_script_path/includes/config.php";
include "$relative_script_path/libs/auth.php";
?              >            <                 h                 t               m                    l                >
<                  /              h                   e                    a                     d                    >
< t i t l e > P h p D i g : C l e a n i n g i n d e x < / t i t l e >
<                                                                                                                      ?
include "$relative_script_path/includes/style.php";
?                                                                                                                     >
<                  /              h                   e                    a                     d                    >
< b o d y                    b g c o l o r = " w h i t e " >
< h 2 > C l e a n i n g                                           i n d e x < / h 2 >
<                                                                                                                      ?
s e t _ t i m e _ l i m i t ( 3 6 0 0 ) ;
p r i n t                       " w a i t . . . < b r > " ;
/ / l i s t o f k e y _ i d ' s i n e n g i n e t a b l e
$query = "SELECT key_id FROM engine GROUP BY key_id";
$id = mysql_db_query($database,$query,$id_connect);
while (list($key_id) = mysql_fetch_row($id))
                                                         {
    //search this id in the keywords table
    $query = "SELECT key_id FROM keywords WHERE key_id=$key_id";
    $id_key = mysql_db_query($database,$query,$id_connect);
    i f ( m y s q l _ n u m _ r o w s ( $ i d _ k e y ) < 1 )
                                                           {
      //if non-existent, delete this useless id from the engine table
      $              d             e               l                               +                   +               ;
      p           r      i          n           t                     "          X                         "           ;
      $query_delete = "DELETE FROM engine WHERE key_id=$key_id";
      $ i d _ d e l = m y s ql _ d b _ q u e r y ( $d a t a b a se , $ q u e r y_ d e l e t e , $ i d _c o n n e c t ) ;
        }                                       e                      l                       s                      e
        p         r      i          n            t                     "          .                       "            ;
        m y s q l _ f r e e _ r e s u l t ( $ i d _ k e y ) ;
                                                         }
/ / l i s t o f s p i d e r _ i d f r o m e n g i n e t a b l e
$query = "SELECT spid er_id FROM engine GROUP BY spider_id";
$id = mysql_db_query($database,$query,$id_connect);
while (list($spider_id) = mysql_fetch_row($id))
                                                         {
    $query = "SELECT spider_id FROM spider WHERE spider_id=$spider_id";
    $id_spider = mysql_db_query($database,$query,$id_connect);
    i f ( m y s q l _ n u m _ r o w s ( $ i d _ s p i d e r ) < 1 )
                                                           {
      //if no-existent in the spider page, delete from engine
      $              d             e               l                                +                 +                ;
      p           r      i          n           t                     "          X                        "            ;
      $query_delete = "DELETE FROM engine WHERE spid er_id=$spider_id";
      $ i d _ d e l = m y s ql _ d b _ q u e r y ( $d a t a b a se , $ q u e r y_ d e l e t e , $ i d _c o n n e c t ) ;
                                                           }
    e                                 l                                      s                                        e
      p           r      i          n            t                     "          -                       "            ;
        m y s q l _ f r e e _ r e s u l t ( $ i d _ s p i d e r ) ;
                                                         }
i            f                          (             $               d               e                l               )
print "<br>$del index references targeted an inexis tent keyword.";
e                                 l                                        s                                          e
p r i n t " < b r > E n g i n e i s c o h e r e n t . " ;
?                                                                                                                     >
<                                 b                                        r                                          >
<A href="index.php" target="_top">[Back]</A> to admin interface.
<                  /              b                   o                    d                     y                    >
<                  /              h                   t                   m                       l                   >
c l e a n u p _ k e y w o r d s . p h p
<                                                                                                                     ?
$ r e l a t i v e _ s c r i p t _ p a t h                                                  =          ' . . ' ;
include "$relative_script_path/includes/config.php";
include "$relative_script_path/libs/auth.php";
?                                                                                                                    >
<                   h                    t                       m                          l                        >
<                 /              h                   e                    a                     d                    >
<title>PhpDig : Cleaning dictionnary</title>
<                                                                                                                     ?
include "$relative_script_path/includes/style.php"
?                                                                                                                    >
<                 /              h                   e                    a                     d                    >
< b o d y                   b g c o l o r = " w h i t e " >
< h 2 > C l e a n i n g                           d i c t i o n n a r y < / h 2 >
<                                                                                                                     ?
s e t _ t i m e _ l i m i t ( 3 6 0 0 ) ;
p r i n t                      " w a i t . . . < b r > " ;
/ / l i s t                 o f             k e y w o r d ' s                                         i d ' s
$query = "SELECT key_id FROM keywords";
$id = mysql_db_query($database,$query,$id_connect);
while (list($key_id) = mysql_fetch_row($id))
                                                        {
   $query = "SE L E CT key_id FR OM eng ine WHE R E key_id= $key_id";
   $id_key = mysql_db_query($database,$query,$id_connect);
   i f ( m y s q l _ n u m _ r o w s ( $ i d _ k e y ) < 1 )
                                                          {
     //if this key_id is not in engine database, delete it
     p           r      i          n           t                     "          X                        "            ;
     $query_delete = "DELETE FROM keywords WHERE key_ id=$key_id";
     $ i d _ d e l = m y s ql _ d b _ q u e r y ( $d a t a b a se , $ q u e r y_ d e l e t e , $ i d _c o n n e c t ) ;
     $                d                e                   l                  +                    +                  ;
                                  }
  e                       l                s                   e
      p   r     i     n       t        "       .         "     ;

   m y s q l _ f r e e _ r e s u l t ( $ i d _ k e y ) ;
                                  }
i      f               (       $       d       e       l       )
print "<br>$del keywords where not in one page at least.";
e                    l                    s                    e
print "<br>All keywords are in one or more page.";
?                                                              >
<                    b                    r                    >
<A href="index.php" target="_top">[Back]</A> to admin interface.
<         /          b         o         d         y           >
<         /          h          t        m          l          >
d e b u g _ f u n c t i o n s . p h p
<                                                                  ?
f u n c t i o n     D S P _ T A B L E _ D A T A S ( $ d a t a s )
                                   {
i f ( ! i s _ a r r a y ( $ d a t a s ) )
  $    d      a   t     a   s    [       0    ]       =        1   ;
e                         l                     s                  e
                                     {
  l i s t ( $ i d _ o n e )         =      e a c h ( $ d a t a s ) ;
  r    e      s  e      t   (   $        d    a   t   a   s    )   ;
                                     }
i f ( ! i s _ a r r a y ( $ d a t a s [ $ i d _ o n e ] ) )
                                     {
  $         i         d                    =               0       ;
  while (list($index,$value) = each($datas))
                                       {
     $ d a t a c o p y [ $ i d ] [ ' i n d e x ' ] = $ i n d e x ;
     $ d a t a c o p y [ $ i d ] [ ' v a l u e ' ] = $ v a l u e ;
     $            i           d               +         +          ;
                                       }
  $ d a t a s               =        $ d a t a c o p y ;
                                     }

 $ r o w s                 =       c o u n t ( $ d a t a s ) ;
 $ c o l u m n s = c o u n t ( $ d a t a s [ $ i d _ o n e ] ) ;
 print "$rows rows & $columns columns<BR>";
 print "<TABLE border='1' cellspacing='0' cellpadding='3'> \n";
 p r i n t                        " \ t < T R > \ n " ;
 while(list($index) = each($datas[$id_one]))
                                               {
    print "\t\t<TD style='font-weight:bold; background-color:#CCCCCC'>$index</TD>\n";
                                               }
    p r i n t               " \ t < / T R > \ n " ;
    r    e     s    e    t    (    $     d a   t    a    s   )    ;
    w h i l e ( l i s t ( $ i n d e x ) = e a c h ( $ d a t a s ) )
                                       {
       p r i n t                " \ t < T R > \ n " ;
       r e s e t ( $ d a t a s [ $ i n d e x ] ) ;
       while(list($useless,$value) = each($datas[$index]))
                                         {
          p r i n t " \ t \ t < T D > $ v a l u e < / T D > \ n " ;
                                         }
       p r i n t                " \ t < T R > \ n " ;
                                       }
    p r i n t              " < / T A B L E > \ n " ;

                                  }
?                                                                 >
i         n          d          e          x          .          p          h          p
<                                                                                       ?
$ r e l a t i v e _ s c r i p t _ p a t h                             =       ' . . ' ;
include "$relative_script_path/includes/config.php";
include "$relative_script_path/libs/auth.php";
?                                                                                       >
<                 h                 t                m                 l                >
<              /              h             e              a              d             >
<title>PhpDig : <? pmsg('admin') ?></title>
<                                                                                       ?
include "$relative_script_path/includes/style.php";
?                                                                                       >
<              /              h             e              a              d             >
< b o d y                 b g c o l o r = " w h i t e " >
< d i v                 a l i g n = ' c e n t e r ' >
<img src="../phpdiglogo.gif" width="246" height="77" alt="PhpDig <? print $phpdig_version
?            >            "           >            <           b            r           >

< h 3 > < ? p m s g ( ' i n d e x _ u r i ' ) ? > < / h 3 >
<form action='spider.php' method='post'>
<input type='text' name='url' value='http://' size='32'>
<                  B                   R                  >
< ?    p m s g ( ' s p i d e r _ d e p t h ' )       ? >   :
< s e l e c t          n a m e = ' l i m i t ' >
<                                                         ?
//select list for the depth limit of spidering
for($i = 1; $i <= $spider_max_limit; $i++)
                             {
print "\t<option value='$i'>$i</option>\n";
}                                      ?                  >
<       /     s      e       l      e        c     t      >
<input type='submit' name='spider' value='Dig this !'>
<               /               f                     o              r             m                  >
<                  /                     d                    i                  v                    >
< P s t y l e = ' b a c k g r o u n d - c o l o r : # C C D D F F ; ' >
<                                                                                                     ?
print msg('warning')." ".msg('spider_warn');
?                                                                                                     >
<                                /                                   P                                >
<                               H                                     R                               >
< d i v                  a l i g n = ' c e n t e r ' >
< h 3 > < ? p m s g ( ' s i t e _ u p d a t e ' ) ? > < / h 3 >
< F O R M a c t i o n = " u p d a t e _ f r a m e . p h p " >
< S E L E C T N A M E = " s i t e _ i d " s i z e = ' 1 0 ' >
<                                                                                                     ?
/ / l i s t o f s i t e s i n t h e d a t a b a s e
$ q ue r y = " SE L E C T s i t e _ i d, s i t e_ u r l FR O M s i t e s OR D E R B Y s i t e _u r l " ;
$result_id = mysql_db_query($database,$query,$id_connect);
while (list($id,$url) = mysql_fetch_row($result_id))
   print "\t<OPTION value='$id'>$url</OPTION>\n";
?                                                                                                     >
<           /         S               E               L         E           C         T               >
<                                B                                    R                               >
<INPUT TYPE="submit" name="update" value="Update form" >
<INPUT TYPE="submit" name="delete" value="Delete site" >
<              /               F                     O               R             M                  >
<                                b                                    r                               >
<a href='cleanup_engine.php'><? print msg('clean')." ".msg('t_index'); ?></a><br>
<a href='cleanup_keywords.php'><? print msg('clean')." ".msg('t_dic'); ?></a><br>
<a href='cleanup_common.php'><? print msg('clean')." ".msg('t_stopw'); ?></a><br>
<                 /                      D                    I                 V                     >
<               /               b                      o              d            y                  >
<               /                h                     t             m              l                 >
r o b o t _ f u n c t i o n s . p h p
<                                                                   ?
//retrieves an url and returns temp file parameters
function store_temp_html_file($uri,$prefix='temp/',$suffix='.html')
                                {
$temp_filename = md5(time()+getmypid()).$suffix;
/ / t e s t c o n t e n t - t y p e o f t h i s p a g e
$ r e s u l t _ t e s t = t e s t _ u r l ( $ u r i , ' d a t e ' ) ;

if (is_array($result_test) && ($result_test['status'] == 'HTML' || $result_test['status'] ==
'      P       L       A      I      N        T       E      X      T        '       )      )
                                              {
  $ c o n t e n t _ t y p e = $ r e s u l t _ t e s t [ ' s t a t u s ' ] ;
  $last_modified = $result_test['lm_date'];
  i f           ( $ l a s t _ m o d i f i e d )
    $last_modified = http_to_sqldate($last_modified);
  e                            l                            s                              e
    $last_modified = date("YmdHis",time());

$    f i l e _ c o n t e n t            =     f i l e ( $ u r i ) ;
$   t e m p f i l e = $ p r e f i     x . $ t e m p _ f i l e n a m e ;
$   f _ h a n d l e r = f o p e        n ( $ t e m p f i l e , ' w ' ) ;
i   f    ( i s _ a r r a y ( $       f i l e _ c o n t e n t ) )
                                     {
  while (list($n,$line) = each($file_content))
    f p u t s ( $ f _ h a n d l e r , t r i m ( $ l i n e ) . " \ n " ) ;
                                     }
f c l o s e ( $ f _ h a n d l e r ) ;
return compact('tempfile','last_modified','content_type');
                                   }
e                      l                        s                       e
r       e        t        u        r         n                 0        ;
                                               }

function
modify_spider_reccord($database,$id_connect,$site_id,$path,$file,$first_words,$upddate,$md5,$
lastmodified,$num_words)
                                             {
/ / r e t r i e v e s                       t h e            s p i d e r _ i d
$query_select = "SELECT spider_id FROM spider WHERE site_id='$site_id' AND path LIKE
' $ p a t h ' A N D f i l e L I K E ' $ f i l e ' " ;
$result_double = mysql_result_select($database,$id_connect,$query_select);

i f        ( ! i s _ a r r a y ( $ r e s u l t _ d o u b l e ) )
                                                  {
$ r e q u e t e = " I N S E R T I N T O s p i d e r S E T
path='$path',file='$file',first_words='".addslashes($first_words)."',upddate='$upddate',md5='$md5
',site_id='$site_id',num_words='$num_words',last_modified='$lastmodified'";
$result_insert = mysql_db_query($da tabase,$requete,$id_connect);
$spider_id = mysql_insert_id($id_connect);
                                                  }
e                                  l                               s                            e
                                                  {
/ / u p d a t e                                             r e c c o r d
$spider_id = $result_double[0]['spider_id'];
$query = "UPDATE spider SET
first_words='".addslashes($first_words)."',upddate='$upddate',md5='$md5',num_words='$num_w
ords',last_modified='$lastmodified' WHERE spider_id=".$spider_id;
$result_update = mysql_db_query($database,$query,$id_connect);
                                                  }
r e t u r n                               $ s p i d e r _ i d ;
                                                  }

//tests if the reccord of spider_id is a double.
function test_double($database,$id_connect,$site_id,$md5,$new_upddate,$last_modified)
                                           {
//tests if there is a double an if yes, update the modifying date
$query_double = "SELECT spider_id FROM spider WHERE site_id='$site_id' AND md5 LIKE
'           $            m           d            5          '          "           ;
$result_double = mysql_result_select($database,$id_connec t,$query_double);
i f      ( i s _ a r r a y ( $ r e s u l t _ d o u b l e ) )
                                             {
   $exists_spider_id = $result_double[0]['spider_id'];
   $query = "UPDATE spider SET upddate=$new_upddate,last_modified='$last_modified'
W H E R E s p i d e r _ i d = $ e x i s t s _ s p i d e r _ i d " ;
   $result_update = mysql_db_query($database,$query,$id_connect);
   r e t u r n             $ e x i s t s _ s p i d e r _ i d ;
                                             }
e                            l                           s                          e
  r          e         t          u          r        n                   0         ;
                                           }

//indexe un fichier et renvoie un identifiant fiche
function
index_file($database,$id_connect,$tempfile,$site_id,$origine,$localdomain,$path,$file,$content_t
ype,$upddate,$last_modified,$tags,$ftp_id='')
                                               {
/           /          g           l           o           b           a           l           s
global $limit_days,$small_words_size,$max_words_size,
    $title_weight,$chunk_size,$summary_length,$common_words,$banned,
    $relative_script_path,$s_yes,$s_no,$br;



/   /    c             u     r   r    e   n   t   _    d    a   t    e
$ d a t e          =       d a t e ( " Y m d H i s " , t i m e ( ) ) ;
i f         ( ! i s _ f i l e ( $ t e m p f i l e ) )
  r          e     t     u    r     n          0    ;

$page_desc = html_to_plain_text($tags['description']);
$page_keywords = html_to_plain_text($tags['keywords']);

$ f i l e _ c o n t e n t = f i l e ( $ t e m p f i l e ) ;
$    t   e    x   t   a   l t  s        =        "    "   ;

/ / v e r i f y            t h e a r r a y $ t e x t i s e m p t y
u     n     s            e        t        (       $          t    e       x         t       )        ;
$     n    _            c        h        u        n          k            =                 0        ;
$     n     _            c        l       i        n        e              =                 0        ;
while (list            ($num,$line) = each($file_content))
                                                      {
   i           f                     (          $           l        i        n            e          )
                                                      {
   / / e x t r a c t a l t a t t r i b u t e s o f i m a g e s
   if (eregi("alt=[[:blank:]]*[\'\"][[:blank:]]*([ a-z0-9\xc8\xcb]+)[[:blank:]]*[\'\"]",$line,$regs));
      $ t e x t a l t s                              . =        $ r e g s [ 1 ] ;
   //extract the domains names not local and not banned to add in keywords
   while (eregi("<a([^>]*href[[:blank:]]*=[[:blank:]]*[\'\"]?(((http://)+(([.a-zA-Z0-9-])+(:[0-
9]+)*))*([:%/?=&;\\,._a-zA-Z0-9-]*))[#\'\" ]?)",$line,$regs))
                                                        {
        $line = str_replace($regs[1],"",$line);
        if ($regs[5] && $regs[5] != $localdomain && !eregi($banned,$regs[5]) && ereg('[a-
z     ]       +      '      ,       $       r      e       g    s       [      5       ]       )     )
                                                          {
            $ n b r e _ m o t s [ $ r e g s [ 5 ] ]                                          + + ;
                                                          }
                                                        }
   $      n       _      c     l      i       n      e             +      +       ;
   //cut the text after $n_chunk characters
   if (strlen($text[$n_chunk]) > $chunk_size)
                                           {
     / / c u t o n l y b e f o r e a n o p e n i n g t a g
     if ($content_type == 'PLAINTEXT' or eregi("^[[:blank:]]*<[a-z]+[^>]*>",$line))
                                             {
        $      n     _     c     l    i      n     e           =            0     ;
        $      n      _      c      h   u       n      k            +      +      ;
                                             }
                                           }
   $ t e x t [ $ n _ c h u n k ] . = t r i m ( $ l i n e ) . " " ;
                                         }
                                        }
/ / s t o r e          t h e       n u m b e r           o f     c h u n k s
$ m a x _ c h u n k                      =        $ n _ c h u n k ;
//free the array containing file content
u n s e t ( $ f i l e _ c o n t e n t ) ;

$    d     o     c    _     t    i    t    l     e          =          "    "     ;

//purify from html tags and store the title
if (is_array($text) && $content_type != 'PLAINTEXT')
                                  {
r    e     s     e     t        (     $  t    e     x     t     )    ;
while (list($n_chunk,$chunk) = each($text))
                                    {
   $ c h u n k = h t m l _ t o _ p l a i n _ t e x t ( $ c h u n k ) ;
   $ t e x t [ $ n _ c h u n k ] = $ c h u n k [ ' c o n t e n t ' ] ;
   $ d o c _ t i t l e . = $ c h u n k [ ' t i t l e ' ] ;
                                    }
                                  }
//set     the title  in order <title>, filename, or unknown
i    f          (    $     d   o  c   _  t   i   t   l  e   )
   $ t    i t r e _   r e s u m e   =   $ d o c _ t i t l e ;
e l        s e i    f        ( $ t e m p _ f i l e )
  $ t      i t r e    _ r e s u m e      =      $ f i l e ;
e                        l                 s                e
  $ t     i t r e _ r e s u m e     =   " U n t i t l e d " ;

/ / t i t l e            a n d        s m a l l          d e s c r i p t i o n
$first_words = $titre_resume."\n".substr($page_desc['content'].$text[0],0,$summary_length);

/ / h a s h e d s t r i n g t o d e t e c t d o u b l e s
$md5 = md5($titre_resume.$page_desc['content'].$text[$max_chunk]).'_'.filesize($tempfile);

/     /     d      o     u     b      l     e            t    e     s      t           :
$test_double = test_double($database,$id_connect,$site_id,$md5,$upddate,$last_modified);

//if no double detected, continue indexing
i f   ( $ t e s t _ d o u b l e   = =    0 )
                      {
$ t e x t _ t i t l e           =     " " ;

//weight of title and description is there
for ($itl = 0;$itl < $title_weight; $itl++)
                                                       {
   $text_title .= $doc_title." ".$page_desc['content']." ";
                                                       }
$te x t[ ] = $te xt _ ti t le .$t ex ta l ts ['c on te nt ']. " ".$pa ge_ key w or ds [ 'con te nt '] ;
//words list and occurence of each of them
r       e     s       e         t                (       $       t     e     x       t       )        ;
while (list($n_chunk,$text2) = each($text))
                       {
$text2 = epure_text($text2,$small_words_size);

$ s e p a r a t o r s                                    =       "         " ;
u      n    s      e     t    (     $      t    o      k     e     n     )     ;
for ($token = strtok($text2, $separators); $token; $token = strtok($separators))
                                         {
   $ n b r e _ m o t s [ $ t o k e n ] + + ;
   $        t         o         t        a         l       +         +         ;
                                         }
                                       }

$ d i s t i n c t _ w o r d s = @ c o u n t ( $ n b r e _ m o t s ) ;
/ / m o d i f y                 t h e        s p i d e r                r e c c o r d
$spider_id =
modify_spider_reccord($database,$id_connect,$site_id,$path,$file,$first_words,$upddate,$md5,$
last_modified,$distinct_words);

//here st or e extrac t t he text ual content
store_text_content($relative_script_path,$spider_id,$text,$ftp_id);
/ / e n d                o f         t e x t u a l .

/ / d e l e t e   o l d    e n g i n e    r e c c o r d
$query = "DELETE FROM engin e WHERE spider_id=$spider_id";
mysql_db_query($database,$query,$id_connect);

/ / d a t a b a s e            i n s e r t
$      i      t            =           0     ;
$ s q l v a l u e s              =      " " ;
while (list($key, $value) = @each($nbre_mots))
                         {
  $ k e y       =    t r i m ( $ k e y ) ;
   / / n o s m a l l w o r d s n o r s t o p w o r d s
   if (strlen($key) > $small_words_size and strlen($key) <= $max_words_size and
$ c o m m o n _ w o r d s [ $ k e y ]                             ! =      1 )
                                          {
   //if keyword exists, retrieve id, else insert it
   $requete = "SELECT key_id FROM keywords WHERE keyword like '$key'";
   $result_insert = mysql_db_query($database,$requete,$id_connect);
   $num = mysql_num_rows($result_insert);
   i      f          (    $     n     u       m          =    =          0    )
                                            {
      / / i n s e r t s                  n e w           k e y w o r d
      m y s q l _ f r e e _ r e s u l t ( $ r e s u l t _ i n s e r t ) ;
      $requete = "INSERT INTO keywords SET keyword = '$key'";
      $result_insert = mysql_db_query($database,$requete,$id_connect);
      $key_id = mysql_insert_id($id_connect);

                                               }
    e                             l                       s                       e
                                               {
        / / e         x i s           t i n g            k e y w o r d
        $keyid         = mys        ql_fetch_row($result_insert);
        m y s q l    _ f r e e      _ r e s u l t ( $ r e s u l t _ i n s e r t ) ;
        $ k e        y _ i          d      =       $ k e y i d [ 0 ] ;
                                                 }
    /    /       N   e   w            i n d e x              r e c o r d
    i        f            (        $     i     t          =     =           0     )
                                                 {
        $sqlvalues             .= "($spider_id,$key_id,$value)";
        $     i                  t                   =                  1         ;
                                                 }
    e                               l                      s                      e
        $sqlvalues            .= ",\n($spider_id,$key_id,$value)";
                                                                }
                                                                }

    u   n       s       e       t       (   $       n       b       r       e       _   m       o       t       s   )   ;

   / / O n e q u e r y f o r t h e e n t i r e p a g e
   $requete = "INSERT INTO engine (spider_id,key_id, weight) VALUES $sqlvalues\n";
   $result_insert = mysql_db_query($database,$requete,$id_connect);
p     r      i     n     t             $       s      _       y   e      s       ;
                                       }
e                        l                            s                          e
                                         {
  $ s p i d e r _ i d                                        =       - 1 ;
  p r i n t $ s _ n o . m s g ( ' d o u b l e ' ) . $ b r ;
                                         }

r   e       t       u       r       n           $       s       p       i       d       e   r       _       i       d   ;
                                                            }

/ / l i s t              a         s p i d e r                 r e c c o r d
function read_spider_r eccord($database,$id_connect,$site_id,$path,$file)
                                             {
$ r e q u e t e           = " S E L E C T s p i d e r _ i d ,
         f                    i                   l                 e                ,
         f     i       r        s       t     _     w       o     r       d      s   ,
         s p i d e r . u p d d a t e ,
         m                            d                       5                      ,
         s   i     t       e      s       .     s   i     t     e     _      i    d  ,
         p                     a                  t                 h                ,
         n       u         m          _        w      o        r        d      s     ,
         l   a       s       t      _       m     o     d     i     f      i     e  d
     FR O M s pi d er L E FT J OI N s i te s O N sp id e r. s i te _ id = s i te s .s i t e_ id
     WHERE spider.site_id='$site_id' AND spider.path like '$path' AND spider.file like
'       $          f         i           l         e               '           "               ;
$result = mysql_result_select($database,$id_connect,$requete);
i f      ( ! i s _ a r r a y ( $ r e s u l t ) )
                                           {
   r      e          t         u           r        n                          0               ;
                                           }
e                          l                           s                                      e
                                           {
  r e t u r n                     $ r e s u l t [ 0 ] ;
                                           }
                                         }

/ / m e t a t a g s          i n     l o w e r c a s e
f u n c t i o n f o r m a t _ m e t a _ t a g s ( $ f i l e )
                             {
$ t a g     =   g e t _ m e t a _ t a g s ( $ f i l e ) ;

i       f       (       i   s       _   a   r       r       a   y       (   $       t   a   g       )   )
                                       {
    / / f o r m a t           t y p e       o f   m e t a t a g s
    w h i l e ( l i s t ( $ i d , $ v a l u e ) = e a c h ( $ t a g ) )
       $ t a g [ s t r t o l o w e r ( $ i d ) ] = $ t a g [ $ i d ] ;

    r       e       t           u       r       n                   $           t       a       g       ;
                                                        }
                                                    }

/ / p a r s e   t h e   r e v i s i t - a f t e r    t a g
function parse_revisit_after($revisit_after,$limit_days=0)
                             {
if (eregi('([0-9]+) *((day).*|(week).*|(month).*|(year).*)',$revisit_after,$regs))
                                               {
   $ d e l a y               =         8 6 4 0 0 * $ r e g s [ 1 ] ;
   i      f            (        $        r     e     g       s      [      4     ]  )
      $       d      e       l        a      y             *      =             7   ;
   i      f            (        $        r     e     g       s      [      5     ]  )
      $     d      e       l        a      y           *      =            3     0  ;
   i      f            (        $        r     e     g       s      [      6     ]  )
      $     d    e       l      a       y          *     =             3     6    5 ;
                                               }
/ / s e t                      d e f a u l t                          v a l u e
i       f              (          !        $     d       e      l        a     y    )
     $ d e l a y           =        8 6 4 0 0 * $ l i m i t _ d a y s ;

r    e     t     u     r     n      (    $      d     e     l     a     y     )     ;
                                         }

//delete a spider reccord and content file
function delete_spider_reccord($database,$id_connect,$spider_id,$ftp_id='')
                                    {
g l o b a l $ r e l a t i v e _ s c r i p t _ p a t h , $ f t p _ i d ;
$query = "DELETE FROM engine WHE RE spider_id=$spider_id";
$result_id = mysql_db_query($database,$query,$id_connect);
$query = "DELETE FROM spider WHERE spider_id=$spider_id;";
$result_id = mysql_db_query($database,$query,$id_connect);
delete_text_content($relative_script_path,$spider_id, $ftp_id);
                                    }

/ / s t o r e a c o n t e n t _ t e x t f r o m a s p i d e r _ i d
function store_text_content($relative_script_path,$spider_id,$text,$ftp_id='')
                                      {
i f       ( C O N T E N T _ T E X T                           = =        1 )
                                         {

  $file_text_path = $relative_script_path.'/'.TEXT_CONTENT_PATH.$spider_id.'.txt';
  if ($f_handler = @fopen($file_text_path,'a'))
                                              {
   r      e      s     e       t     (        $    t    e   x     t     )        ;
   while (list($n_chunk,$text_to_store) = each($text))
      fputs($f_handler,wordwrap($text_to_store));
   f c l o s e ( $ f _ h a n d l e r ) ;
  / / h e r e                      t h e             f t p       c a s e
  i f       ( F T P _ E N A B L E                    & &   $ f t p _ i d )
                                                {
     @ftp_delete($ftp_id,$spider_id.'.txt');
     ftp_put($ftp_id,$spider_id.'.txt',$file_text_path,FTP_ASCII);
                                                }
                                              }
  e                           l                         s                        e
     print "Warning : Unable to create the content file $file_text_path ! $br";
                                              }
                                            }

//delete a content_text from a spider_id
function delete_text_content($relative_script_path,$spider_id,$ftp_id= '')
                                           {
i f        ( C O N T E N T _ T E X T                          = =         1 )
                                           {
$file_text_path = $relative_script_path.'/'.TEXT_CONTENT_PATH.$spider_id.'.txt';
i f      ( i s _ f i l e ( $ f i l e _ t e x t _ p a t h ) )
  u n l i n k ( $ f i l e _ t e x t _ p a t h ) ;

/ / t h e r e   d e l e t e t h e                          f t p   f i l e
i f   ( F T P _ E N A B L E   & &                          $ f t p _ i d )
 @ f t p _ d e l e te ( $ f t p _ i d ,$s p i d e r _ i d .'. t x t ' ) ;
                                    }
                                    }

//connect to the ftp if      the ftp is on and the connection ok.
//the content files are     stored locally and could be uploaded
/ / m a n u                 a l l y               l a t e r .
f u n c t i o n     p h      p d i g _ f t p _ c o n n e c t ( )
                                    {
if (CONTENT_TEXT == 1 && FTP_ENABLE == 1)
                                      {
  / / l a u n c h          c o n n e c t        p r o c e d u r e
  if ($ftp_id = ftp_connect(FTP_HOST,FTP_PORT))
                                        {
    /         /           l            o        g       i         n
    if (ftp_login ($ftp_id, FTP_USER, FTP_PASS))
                                          {
      f t p _ p a s v ( $ f t p _ i d , F T P _ P A S V ) ;
      / / e c h o        f t p _ p w d ( $ f t p _ i d ) ;
      / / c h a n g e t o p h p d i g d i r e c t o r y
      i f ( f t p _ c h d i r ( $ f t p _ i d , F T P _ P A T H ) )
                                            {
        //if content_text doesnt exists, create it
        if (!@ftp_chdir ($ftp_id, FTP_TEXT_PATH))
                                              {
          ftp_mkdir ($ftp_id, FTP_TEXT_PATH);
          ftp_chdir ($ftp_id, FTP_TEXT_PATH);
                                              }
        r e t u r n                         $ f t p _ i d ;
                                            }
                                          }
                                        }
                                   }
/ / e l s e      r e t u r n           e m p t y                   s t r i n g
                               }

/ / c l o s e       t h e      f       t p      i f     e x i s t s
f u n c t i o n p h p d i g _ f t       p _ c l o s e ( $ f t p _ i d )
                               {
i     f         (     $     f          t       p           _       i           d       )
  f t p _ q u i t (                    $   f       t   p       _       i   d       )   ;
                               }
?                                                                                      >
s        p      i      d       e       r     .      p       h      p
<                                                                   ?
/ / - - - - - - - - - - - -        - - - s p i d e r s   c r i p t .
//---------------operates            both indexing and    spidering
$ d e b u t                         =       t i m e         ( ) ;
$ r e l a t i v e _ s c r           i p t _ p a t h  =      ' . . ' ;

include "$relative_script_path/includes/config.php";
include "$relative_script_path/admin/robot_functions.php";
include "$relative_script_path/admin/debug_functions.php";
s e t _ t i m e _ l i m i t ( 3 6 0 0 0 ) ;
$ d a t e = d a t e ( " Y m d H i s " , t i m e ( ) ) ;

/ / t e s t             o    n       c g i       o r        h t t p
//set string           messages (shell or browser)
i f    ( ! $           R E M O T E _ A D D R )
                                      {
    $ r u n _ m o d e                       =       ' c g i ' ;
    $       b    r             =            "     \      n     "   ;
    $     s    _    y    e       s        =          "     +     " ;
    $     s    _    n    o                =          "     X     " ;
    $ s _ l i n k                     =       " @ u r l " ;
    //here parse the parameters for the the reindexing...
    i f       ( $ a r g v [ 1 ]               = =      ' a l l ' )
      $ r e s p i d e r _ m o d e                 =     ' a l l ' ;
    e l s e i f ( $ a r g v [ 1 ] = = ' f o r c e a l l ' )
                                        {
      $ r e s p i d e r _ m o d e = ' r e i n d e x _ a l l ' ;
                                        }
    e l s e i f                    ( $ a r g v [ 1 ] )
                                        {
      $ u r l              =         $ a r g v [ 1 ] ;
    $ r e s p i d e r _ m o d e                           =     ' s i t e ' ;
                                           }
  e                         l                           s                          e
                                           {
    print "Usage = php -f spider.php all|forceall|[an url as http://something]".$br;
    d                         i                           e                        ;
                                           }
                                         }
e                         l                            s                           e
                                         {
  include "$relative_script_path/libs/auth.php";
  $ r u n _ m o d e                            =           ' h t t p ' ;
  $ b r                =           " < B R > \ n " ;
  $s_yes = "<img src='yes.gif' width='10' height='10' border='0' align='middle'>";
  $s_no = "<img src='no.gif' width='10' height='10' border='0' align='middle'>";
  $s_link = " <A HREF='@url' Target='_blank'>@url</A> ";
                                         }

//connect to distant ftp for text content (if constants are defined)
$ f t p _ i d = p h p d i g _ f t p _ c o n n e c t ( ) ;

/ / m o d e u r l : t e s t n e w o r e x i s t i n g s i t e
if ($url && $url != 'http://' && (!$respider_mode || $respider_mode == 'site'))
                                       {
   /     /    f       o       r     m       a     t             u     r        l
   $ p u         =       p a r s e _ u r l ( $ u r l ) ;
   i f       ( ! $ p u [ ' s c h e m e ' ] )
     $ p u [ ' s c h e m e ' ]                    =       " h t t p " ;
   $url = $pu['scheme']."://".$pu['host']."/";
   $subpu = url_purify($pu['path'].$subpu['file'].$pu['query']);

$query = "SELECT site_id FROM sites WHERE site_url like '$url'";
$result = mysql_db_query($database,$query,$id_connect);
i f    ( m y s q l _ n u m _ r o w s ( $ r e s u l t ) )
                            {
  /  /    e   x   i  s   t  i   n  g       s   i   t   e
  list($site_id) = mysql_fetch_row($result);
  m y s q l _ f r e e _ r e s u l t ( $ r e s u l t ) ;

  $query_tempspider = "INSERT INTO tempspider (site_id,file,path) VALUES
('$site_id','".$subpu['file']."','".$subpu['path']."')";
  mysql_db_query($database,$query_tempspider,$id_connect);
                                           }
e                          l                               s                                   e
                                           {
  /        /       n         e         w                      s            i          t        e
  $qu e ry = " I NSE R T IN T O s i te s SE T s i t e_u r l= ' $u r l' ,u pd da t e = N O W( )" ;
  mysql_db_query($database,$query,$id_connect);
  $site_id = mysql_insert_id($id_connect);
  $      n    e      w     _     s       i    t        e                 =               1      ;

   //new spidering = insert first row in tempspider
   $ s u b p u [ ' u r l ' ]               =      $ u r l ;
   $ e x c l u d e = t e s t _ r o b o t s _ t x t ( $ u r l ) ;

   $subpu = detect_dir_html($subpu,$exclude);
   i f       ( $ s u b p u [ ' o k ' ]                          = =        1 )
                                          {
     s e t _ t i m e _ l i m i t ( 0 ) ;
     $query = "INSERT INTO tempspider SET
file='".$subpu['file']."',path='".$subpu['path']."',level=0,site_id='$site_id'";
     mysql_db_query($database,$query,$id_connect);
                                          }
                                        }
                                           }

/ / r e t r i e v e  l i s t   o f   u r l s
i   f      (   $   s i   t   e  _  i   d   )
  $where_site =   "WHERE site_id=$site_id";

$query= "SELECT site_id,site_url FROM sites $where_site";
$list_sites = mysql_result_select($database,$id_connect,$query);

i f       ( $ r u n _ m o d e                         = =         ' h t t p ' )
                           {
?                                                                                       >
<                 h                 t               m                  l                >
<                 h                 e                a                d                 >
<                                                                                       ?
include "$relative_script_path/includes/style.php";
?                                                                                       >
<              /              h             e              a              d             >
< b o d y                 b g c o l o r = " w h i t e " >
<img src="../phpdiglogo.gif" width="246" height="77" alt="phpdig <? print $phpdig_version
?            >            "           >           <            b            r           >
< h 3 > < ? p m s g ( ' s p i d e r i n g ' ) ; ? > < / h 3 >
<                                                                                       ?
                                            }
e                             l                            s                            e
                                            {
p m s g ( ' s p i d e r i n g ' ) ;
                                            }

if (!$limit or $limit > $spider_max_limit)
  $ l i m i t = $ s p i d e r _ d e f a u l t _ l i m i t ;
/ /        r e t r i v e s                                     s i t e s
i f     ( i s _ a r r a y ( $ l i s                        t _ s i t e s ) )
                            {
while(list($useless,$site_datas) =                         each($list_sites))
                            {
$ s i t e _ i d = $ s i t e _ d a t a s                    [ ' s i t e _ i d ' ] ;
$ u r l = $ s i t e _ d a t a s [ '                       s i t e _ u r l ' ] ;

//just keep the reccords not indexed before
$query = "DELETE FROM tempspider WHERE site_id = '$site_id' and (indexed = 1 or error =
1                        )                             "                               ;
mysql_db_query($database,$query,$id_connect);

//refill the tempspider with not expired spiders reccords, eventually refined
s w i t c h ( $ r e s p i d e r _ m o d e )
                                        {
    c a s e             " r e i n d e x _ a l l " :
    $ a n d m o r e _ t e m p s p i d e r                        =      ' ' ;
    $ f o r c e _ f i r s t _ r e i n d e x                        =      1 ;
    b              r             e             a              k              ;

   d           e          f         a            u             l          t            :
   $andmore_tempspider = 'AND upddate < now()';
                                         }
$query_tempspider = "INSERT INTO tempspider (site_id,file,path) SELECT site_id,file,path
FROM spider WHERE site_id=$site_id $andmore_tempspider";
mysql_db_query($database,$query_tempspider,$id_connect);

/    /    f    i   r     s  t         l    e     v     e                               l
$      l    e    v      e    l          =            0                                 ;
/ / s t o r e        r o b o t s . t x t        d a t a                                s
$ e x c l u d e = t e s t _ r o b o t s _ t x t ( $ u r l )                            ;
p r i n t            " S I T E                 :     $ u r l $ b r " ;
$     n      _     l     i     n         k         s        =            0       ;
for ($level = 0; $level <= $limit; $level++)
                                           {
  //retrieve list of links from this level
  $query = "SELECT DISTINCT path,file,indexed FROM tempspider WHERE level = $level
AND site_id=$site_id AND error = 0 order by file";
  $result_id = mysql_db_query($database,$query,$id_connect);
  $n_links = mysql_num_rows($result_id);
  print msg('level')." $level : $n_links ".msg('links_more').$br;
  i f           ( $ n _ l i n k s                               >          0 )
                                             {
     while ($new_links = mysql_fetch_array($result_id))
                                                 {
         //keep alive the ftp connection (if exists)
         i f ( F T P _ E N A B L E & & $ f t p _ i d )
            f t p _ p w d ( $ f t p _ i d ) ;

         / / i n d e x i n                g     t h i s          p a g e
         $ t e m p _ p a t h = $          n e w _ l i n k s [ ' p a t h ' ] ;
         $ t e m p _ f i l e = $          n e w _ l i n k s [ ' f i l e ' ] ;
         $already_indexed =                $new_links['indexed'];
         $ t e m p s p i d e r _ i d      = $ n e w _ l i n k s [ ' i d ' ] ;

           //Retrieve dates if page is already in database
           $test_exists =
read_spider_reccord($database,$id_connect,$site_id,$temp_path,$temp_file);

         i f   ( i s _ a r r a y ( $ t e s t _ e x i s t s ) )
                                    {
           $exists_spider_id = $test_exists['spider_id'];
  $upddate = $test_exists['upddate'];
  $last_modif_old = $test_exists['last_modified'];
                         }
e               l               s                e
                         {
  $ e x i s t s _ s p i d e r _ i d      =    0 ;
                         }

$url_indexing = $url.$temp_path.$temp_file;

//verify if 'revisit-after' date is expired or if page doesn't exists, or force is on.
if ($exists_spider_id == 0 || $upddate < $date || $force_first_reindex == 1)
                                            {
//test content-type of this page if not excluded
if (!test_robots($exclude,$temp_path))
   $result_test = store_temp_html_file($url_inde xing,'temp/');

i f    ( i s _ a r r a y ( $ r e s u l t _ t e s t ) )
                               {
extract($result_test); //last_modified, content_type, tempfile

/ / r e s e t           n o m o d i f               v a r i a b l e
$      n     o     m       o   d      i    f           =          0     ;
$ o k _ f o r _ s p i d e r                                 =        0 ;
//if the saved last-modified date is sup or equal than the corresponding
/ / h e a d e r , s e t $ n o m o d i f t o 1
i f ( $ e x i s t s _ s p i d e r _ i d > 0 & &
$last_modif_old > = $las t_modified)
           $ n o m o d i f                                =         1 ;
e                        l                      s                      e
                                      {
   /     /     c    o       n   t      i   n      u     e     .    .    .
           $     n    o     m     o    d     i    f         =          0     ;

           //Retrieve meta -tags                      for    this page
           i f ( $ c o n t e n t _ t y p e            = =    ' H T M L ' )
                                      {
             i f   ( i s _ f i l e ( $ t               e m p f i l e ) )
               $tag = format_meta_t                   ags($tempfile);
                                      }

           i f       ( i s _ a r r a y ( $ t a g ) )
                                     {
               //biwise operation on robots tags for noindex
               $noindex = 6 & test_robots_tags($tag);
               $nofollow = 5 & test_robots_tags($tag);
               $revisit_after = $tag['revisit-after'];
                                     }

            //parse next update date with "revist-after" content
            $new_upddate =
date("YmdHis",time()+parse_revisit_after($revisit_after,$limit_days));

          //load the file in an Array if all is ok
          if ($nomodif == 1 && $force_first_reindex == 0)
                                           {
          $ o k _ f o r _ s p i d e r                            =       0 ;
          $ o k _ f o r _ i n d e x                             =        0 ;
          p r i n t       " N o         m o d i f i e d             :     " ;
          / / s e t t h e n e x t r e v i s i t d a t e
          $query = "UPDATE spider SET upddate='$new_upddate' WHERE spider_id =
'   $   e x i s t s _ s p i d e r _ i d ' " ;
          mysql_db_query($database,$query,$id_connect);
                                                    }
             elseif ($noindex > 0 || $already_indexed ==                                  1)
                                       {
             print "Meta Robots = NoIndex, or already indexed :                           ";
             $ o k _ f o r _ s p i d e r                 =    1                            ;
             $ o k _ f o r _ i n d e x                  =     0                            ;
                                       }
             e                l                s                                           e
                                       {
             $ o k _ f o r _ i n d e x                  =     1                             ;
             $ o k _ f o r _ s p i d e r                 =    1                             ;
                                       }
                                     }

            / / le t' s go fo r i nd ex ing th e c on ten t
            i f        ( $ o k _ f o r _ i n d e x                            = =        1 )
                                                     {
            $spider_id =
index_file($database,$id_connect,$tempfile,$site_id,$origine,$localdomain,$temp_path,$temp_fi
le,$content_type,$new_upddate,$last_modified,$tag,$ftp_id);
                                                     }
            e                         l                             s                       e
                                                     {
            p r i n t m s g ( ' n o _ t o i n d e x ' ) . $ b r ;
                                                     }
            print ($progress++).':'.$url_indexing.$br;
                                                     }
            e                         l                             s                       e
                                                       {
              /      /    n      o      n       e             s      t    o     r     e     d
              i f        ( $ e x i s t s _ s p i d e r _ i d )
                                                         {
              / / d e l e t e t h e e x i s t i n g s p i d e r _ i d
              print $s_no.msg('error').' 404'.$br;
              delete_spider_reccord($database,$id_connect,$exists_spider_id);
                                                    }
            //mark the tempspider reccord as error
            $query = "UPDATE tempspider SET error = 1 WHERE id = $tempspider_id";
            mysql_db_query($database,$query,$id_connect);
                                                  }
                                                }
          e                        l                        s                         e
                                                {
          p                  r                  i                  n                  t
$s_no.($progress++).":".str_replace('@url',$url_indexing,$s_link).msg('id_recent').$br;
                                                }
          / / d i s p l a y p r o g r e s s i n d i c a t o r
          print "(".msg('time')." : ".gmdate("H:i:s",time() -$debut).")".$br;

      //update temp table with 'indexed' flag
      $requete = "UPDATE tempspider SET indexed=1 WHERE site_id=$site_id and path
like '$temp_path' and file like '$temp_file'";
      $result_update = mysql_db_query($database,$requete,$id_ connect);



         //explore each page to find new links
         if ((($spider_id > 0 || $ok_for_spider) || $force_first_reindex == 1) && $nofollow ==
0       & &             $ l e v e l                       <          $ l i m i t )
            $urls = explore ($tempfile,$url,$new_links['path'],$new_links['file']);

          / /      D   E L E T E        T E M P F I L E
          i f        ( i s _ f i l e ( $ t e m p f i l e ) )
                                      {
          u    n    l i n k ( $ t e m p f i l e ) ;
          u      n      s       e   t   (   $   t   e   m   p       f   i   l   e   )   ;
                                                    }

          /                 /           D           E           B           U    G
          /                                                                       *
          p          r     i   n    t           "    <     h     r      >    "    ;
          echo       "<b>$url".$new_links['path'].$new_links['file']." : </b><br>";
          d s         p _ t a b l e _ d a t a s ( $ u r l s ) ;
          p          r     i   n    t           "    <     h     r      >    "    ;
          *                                                                       /
          /              /         G           U          B            E         D

          i f           ( i s _ a r r a y ( $ u r l s ) )
                                                    {
              w h i l e ( $ l i e n s = e a c h ( $ u r l s ) )
                                                      {
                  //not an apache fancy index (with sorts by columns)
                  if ($apache_indexes[$liens[1]['file']] != 1)
                                                      {
                  $       e    x     i      s     t      s            =           0     ;
                  $ e x i s t s _ t e m p _ s p i d e r = 0 ;
                  //is this link already in temp table ?
                  $query = "SELECT DISTINCT path,file FROM tempspider WHERE path
like '".$liens[1]['path']."' AND file like '".$liens[1]['file']."' AND site_id=$site_id";
                  $test_id = mysql_db_quer y($database,$query,$id_connect);
                  $exists = mysql_num_rows($test_id);
                  $ e x i s t s _ t e m p _ s p i d e r = $ e x i s t s ;
                  m y s q l _ f r e e _ r e s u l t ( $ t e s t _ i d ) ;

                     i f    ( $ s p i d e r _ r o o t _ i d )
                       $andmore = " AND spider_id <> $spider_root_id ";
                     //is this link already in spider ?
                    $query = "SELECT DISTINCT path,file FROM spider WHERE path like
'".$liens[1]['path']."' AND file like '".$liens[1]['file']."' AND site_id=$site_id $andmore";
                    $test_id = mysql_db_query($database,$query,$id_connect);
                    $exists += mysql_num_rows($test_id);
                    m y s q l _ f r e e _ r e s u l t ( $ t e s t _ i d ) ;

                  $ l i e n s [ 1 ] [ ' u r l ' ]                       =      $ u r l ;

                  / / t e s t v a l i d i t y o f t h e n e w l i n k
                  i f         ( $ e x i s t s             <     1 )
                    $cur_link = detect_dir_html($liens[1],$exclude);
                  e                  l               s              e
                    $ c u r _ l i n k [ ' o k ' ]            =    0 ;

                  i f     ( $ c u r _ l i n k [ ' o k ' ] = = 1 )
                                             {
                      $    s _ e r r o r                 =     0 ;
                      p     r   i   n    t        '    +     '   ;
                                             }
                  e                 l               s            e
                      $   s _ e r r o r                  =     1 ;

                      //insert in temp table for next level
                      i f ( $ e x i s t s _ t e m p _ s p i d e r < 1 )
                                                            {
                      $values = "('".$cur_link['path']."',
'".$cur_link['file']."',".($level+1).",$site_id,$s_error)";
                      $query = "INSERT INTO tempspider (path, file, level, site_id, error)
V A L U E S                                         $ v a l u e s " ;
                      mysql_db_query($database,$query,$id_connect);
                                                            }
                //display something to avoid browser-side timeout
                f        l          u          s          h     (         )        ;
                                                     }
                                                     }
             e       c       h          o                    $    b         r      ;
                                                   }
                                                 }
       $ f o r c e _ f i r s t _ r e i n d e x                           =     0 ;
       print $br."temps : ".gmdate("H:i:s",time() -$debut).$br;
                                              }
   e                        l                               s                      e
                                              {
     p r i n t         p m s g ( ' n o _ t e m p ' ) . $ b r ;
     b              r                 e                 a           k              ;
                                              }
   m y s q l _ f r e e _ r e s u l t ( $ r e s u l t _ i d ) ;
                                            }
i f        ( $ r u n _ m o d e                         = =     ' h t t p ' )
                                          {
/ / r e s u l t s - i n - h t t p - m o d e - - - - - - - - - - - - - - - - -
$query = "SELECT DISTINCT path,file FROM tempspider WHERE site_id=$site_id AND error
= 0 AND indexed = 1 ORDER by path,file";
$result_id = mysql_db_query($database,$query,$id_connect);
$n_ li nk s = my sq l_n um _ ro ws ($ re s u l t_ id );

print "<hr><h3>".msg('links_found')." : $n_links</h3>";

while ($liens = mysql_fetch_row($result_id))
                                 {
  print "<a href=\"$url".$liens[0].$liens[1]."\" target=\"_blank\"
>".urldecode($liens[0].$liens[1])."</a><br>\n";
                                 }
                                  }
e                     l                      s                      e
                         {
    print msg('links_found')." : ".$n_links.$br;
                         }

                         }
                         }
/ / d i s p l a y   e n d    o f   i n d e x i n g
p   m   s  g   (  '   i  d   _   e  n   d  '   )  ;
p h p d i g _ f t p _ c l o s e ( $ f t p _ i d ) ;

i f     ( $ r u n _ m o d e               = =      ' h t t p ' )
{                                            ?                      >
<                     h                      r                      >
<A href="index.php" >[<? pmsg('back') ?>]</A> <? pmsg('to_admin') ?>.
<          /          b          o           d          y           >
<          /          h           t          m           l          >
<          ?                      }                     ?           >
u                 p       d           a       t    e       .       p       h               p
<                                                         ?
$ r e l a t i v e _ s c r i p t _ p a t h    =    ' . . ' ;
include "$relative_script_path/includes/config.php";
include "$relative_script_path/libs/auth.php";
include "$relative_script_path/admin/robot_functions.php";

s     e       t       m e _ l i m i t ( 3 6 0
                      _ t i                                                    0       )   ;
s      r              ad  n  (   t   i   m   e  (    )                             )       ;
i        f               (     $       p   a    t                          h               )
                                   {
    $ p a t h       =      u r l d e c o d e ( $ p a                       t h ) ;
    $ a n d p a t h = " A N D p a t h l i k e ' $ p a t                    h % ' " ;
                                   }

s e t t y p e                       ( $ s i t e _ i d , " i n t e g e r " ) ;
i f       ( $                       s i t e _ i d               = =       0 )
                                                 {
    h e a d e r                   ( " l o c a t i o n : i n d e x . p h p " ) ;
                                                 }

e         l           s       e       i   f            (   $   s       u       p           )
                                      {
    $query = "SELECT spider_id FROM spider WHERE site_id=$site_id $andpath";
    $result_id = mysql_db_query($database,$query,$id_connect);

    i f      m y s q l _ n u m _ r o w s ( $ r e s u l t _ i d ) > 0 )
              (
                                      {
      $ f t p _ i d = p h p d i g _ f t p _ c o n n e c t ( ) ;
      $    i   n          =         "     I   N          (     0  "  ;
      while (list($spider_id) = mysql_fetch_row($result_id))
                                        {
       delete_text_content($relative_script_path,$spider_id,$ftp_id);
       $ i n       . =      " , $ s p i d e r _ i d " ;
                                      }
     $    i      n           .      =            "     )      "     ;
     p h p d i g _ f t p _ c l o s e ( $ f t p _ i d ) ;

     $query = "DELETE FROM engine WHERE spider_id $in";
     $result_id = mysql_db_query($database,$query,$id_connect);

     $query = "DELETE FROM spider WHERE site_id=$site_id $andpath";
     $result_id = mysql_db_query($database,$query,$id_connect);
                                   }
                                 }

e      l     s      e      i     f             (      $      e      x      p      )
                                        {
 $query = "DELETE FROM tempspider WHERE site_id=$site_id and indexed = 1";
 mysql_db_query($database,$query,$id_connect);
 $query = "INSERT INTO tempspider (site_id,file,path) SELECT site_id,file,path FROM
spider WHERE site_id=$site_id $andpath";
 mysql_db_query($database,$query,$id_connect);

    header ("location:spider.php?site_id=$site_id");
                           }

?                                                 >
<         h         t         m         l         >
<       /        h       e       a        d       >
<title>PhpDig : <? pmsg('update') ?> </title>
<                                                 ?
include "$relative_script_path/includes/style.php";
$query = "SELECT path,spider_id FROM spider WHERE site_id=$site_id GROUP BY path
O R           D E R                     b      y           p a        t h          " ;
$result_id = mysql_db_query($database,$query,$id_connect);
$ n u m = m y s q l _ n u m _ r o w s ( $ r e s u l t _ i d ) ;
i      f              (       $     n       u      m              <              1       )
  m y s q l _ f r e e _ r e s u l t ( $ r e s u l t _ i d ) ;
?                                                                                       >
<              /               h             e             a              d             >
< b o d y                 b g c o l o r = " w h i t e " >
< a                n a m e = " A A A "                                                  >
<img src="../phpdiglogo.gif" width="246" height="77" alt="PhpDig <? print $phpdig_version
?            >            "           >            <           b            r           >
< ?        p m s g ( ' u p d a t e _ m e s s ' )                                    ? >
<                               h                           r                           >
< h 3 > < ? p m s g ( ' t r e e _ f o u n d ' ) ? > : < / h 3 >
< P s t y l e = ' b a c k g r o u n d - c o l o r : # C C D D F F ; ' >
< ?      p m s g ( ' u p d a t e _ h e l p ' )                          ? > < b r >
<B><? pmsg('warning') ?> </B><? pmsg('update_warn') ?>
<                               /                          P                            >
<                                            P                                          >
<                                                                                       ?
$ a n a m e                                 =            " A A A " ;
f o r        ( $ n          =     0 ;      $ n < $ n u m ;                 $ n + + )
                                               {
  $ a n a m e 2                                =         $ a n a m e ;
  list($path_name,$aname)=mysql_fetch_row($result_id);
  p r i n t " < A N A M E = ' $ a n a m e ' > \ n " ;
  $paths = explode("/",rawurldecode($path_name));
  $ m = c o u n t ( $ p a t h s ) ;
  $ p a t h _ n a m e _ a f f                                      =        " - " ;
  f o r ( $ i = $ m - 2 ; $ i > 0 ; $ i = $ i - 1 )
                                                 {
     $ p a t h _ n a m e _ a f f          . =       " & n b s p ; - & n b s p ; - " ;
                                                }
  $ p a t h _ n a m e _            a f f . = $ p a t h s [ $ m - 2 ] ;
  i     f          (                $    m         <           3     )
    $path_name_aff                  = "<B>$path_name_aff</B>";
  e           l                      s        e           i          f
     (      $        m                       <               4       )
     $path_name_aff                  = "<I>$path_name_aff</I>";

   print "
<A HREF='update.php?site_id=$site_id&path=".urlencode($path_name)."&sup=1#$aname2'
target='_self'><img src='no.gif' width='10' height='10' border='0' align='middle'
alt='".msg('delete')."'></A>&nbsp;\n";
   print "<A HREF='update.php?path=".urlencode($path_name)."&site_id=$site_id&exp=1'
target='_top' ><img src='yes.gif' width='10' height='10' border='0' align='middle'
alt='".msg('reindex')."'></A>&nbsp;\n";
   i f            ( $ p a t h _ n a m e                                    = =      " " )
       $path_name_aff = "<I><B style='color:red;'>Racine</B></I>";
   print $path_name_aff."&nbsp;
<A HREF='files.php?path=".urlencode($path_name)."&site_id=$site_id' target='files' ><img
src='details.gif' width='10' height='10' border='0' align='middle'
alt='".msg('files')."'></A><BR>\n";
                                                     }
?                                                                                        >
<                                 /                                P                     >
<                                 h                                r                     >
<A href="index.php" target="_top">[<? pmsg('back') ?>]</A> <? pmsg('to_admin') ?>.
<                  /              b               o                d              y      >
<                  /              h                t               m              l      >
u         p       d   a   t   e       _   f   r       a       m   e   .       p       h   p
<                                                         ?
$ r e l a t i v e _ s c r i p t _ p a t h    =    ' . . ' ;
include "$relative_script_path/includes/config.php";
include "$relative_script_path/libs/auth.php";
include "$relative_script_path/admin/robot_functions.php";

i             f           (       $       d       e       l       e       t       e       )
                                   {
    s e t t y p e ( $ s i t e _ i d , ' i n t e g e r ' ) ;
    $query = "SELECT spider_id FR OM spide r WHERE site_id=$site_id";
    $result_id = mysql_db_query($database,$query,$id_connect);

    i f   ( m y s q l _ n u m _ r o w s ( $ r e s u l t _ i d ) > 0 )
                                      {
      $     i   n         =         "     I   N          (    0   "   ;
      $ f t p _ i d = p h p d i g _ f t p _ c o n n e c t ( ) ;
      while (list($spider_id) = mysql_fetch_row($r esult_id))
                                        {
         delete_text_content($relative_script_path,$spider_id,$ftp_id);
         $ i n        . =      " , $ s p i d e r _ i d " ;
                                        }
      p h p d i g _ f t p _ c l o s e ( $ f t p _ i d ) ;
      $       i    n            .     =             "      )    "     ;
      $query = "DELETE FROM engine WHERE spider_id $in";
      $result_id = mysql_db_query($database,$query,$id_connect);

      $query = "DELETE FROM spider WHERE site_id=$site_id";
      $result_id = mysql_db_query($database,$query,$id_connect);

                                              }
  $query = "DELETE FROM sites WHERE site_id=$site_id";
  $result_id = mysql_db_query($database,$query,$id_connect);

  h e a d e r            ( " l o c a t i o n : i n d e x . p h p " ) ;
                                                   }
i     f            (          !        $     s         i        t      e       _         i      d        )
   h e a d e r ( " l o c a t i o n : i n d e x . p h p " ) ;
?                                                                                                       >
<               h                        t                   m                     l                    >
<               h                        e                    a                   d                     >
<            /                   h               e                  a                 d                 >
<FRAMESET COLS="50%,50%" BORDER="0" FRAMESPACI NG="0">
<FRAME SRC="update.php?site_id=<? print $site_id ?>&recup=<? print $recup ?>"
NAME="tree" NORESIZE frameborder="NO">
< FR A M E SR C = " f i l es . p h p " N A M E = " f i l e s" R E S I Z E f r a m e b o rde r = " N O " >
<       /      F           R            A       M             E         S         E          T          >
<          n      o               f        r             a         m            e           s           >
<        /      n            o           f      r            a         m          e           s         >
<            /                   h               t                 m                   l                >
f           i         l            e               s       .       p        h          p
<                                                         ?
$ r e l a t i v e _ s c r i p t _ p a t h     =   ' . . ' ;
include "$relative_script_path/includes/config.php";
include "$relative_script_path/libs/auth.php";
include "$relative_script_path/admin/robot_functions.php";

i       f         (       $    s     i   d p   e    r    _      i    d     )
                                     {
$query = "SELECT site_id,path,file FROM spider where spider_ id=$spider_id";
  $result_id = mysql_db_query($database,$query,$id_connect);
  i f ( m y s q l _ n u m _ r o w s ( $ r e s u l t _ i d ) )
    list($site_id,$path,$file) = mysql_fetch_row($result_id);

    i       f             (            $       s       p       i   d    e       r       )
                                                 {
    $query = "DELETE FROM tempspider W HERE site_id=$site_id";
    $result_id = mysql_db_query($database,$query,$id_connect);
    $query = "INSERT INTO tempspider SET site_id=$site_id,path='$path',file='$file'";
    $result_id = mysql_db_query($database,$query,$id_connect);
    header ("location:spider.php?site_id=$site_id&mode=small&spider_root_id=$spider_id");
                                                  }
  i          f                      (           $       s         u          p          )
                                                  {
    $ f t p _ i d = p h p d i g _ f t p _ c o n n e c t ( ) ;
    delete_spider_reccord($database,$id_connect,$spider_id,$ftp_id);
    p h p d i g _ f t p _ c l o s e ( $ f t p _ i d ) ;
                                                  }
                                              }
i     f               (       $       s       i     t     e      _       i      d       )
                                              {
$query = "SELECT site_url FROM sites WHERE site_id=$site_id";
$result_id = mysql_db_query($database,$query,$id_connect);
list ($url) = @mysql_fetch_row($result_id);
$query = "SELECT file,spider_id FROM spider WHERE site_id=$site_id AND path like '$path'
O      R     D     E       R          b      y          f     i     l   e       "      ;
$result_id = mysql_db_query($database,$query,$id_connect);
$ n u m = m y s q l _ n u m _ r o w s ( $ r e s u l t _ i d ) ;
i      f            (       $      n      u    m                <             1        )
  m y s q l _ f r e e _ r e s u l t ( $ r e s u l t _ i d ) ;

                                           }
?                                                                     >
<             h              t            m              l            >
<           /          h           e           a           d          >
< t i t l e > < ? p m s g ( ' f i l e s ' ) ? > < / t i t l e >
<? include "$relative_script_path/includes/style.php"; ?>
<           /          h           e           a           d          >
< b o d y           b g c o l o r = " w h i t e " >
<img src="fill.gif" width="246" height="77"><br>
< ?      p m s g ( ' b r a n c h _ s t a r t ' )                   ? >
<                      h                       r                      >
< ?       i f      ( ! $ s i t e _ i d )                     {    ? >
< P s t y l e = ' b a c k g r o u n d - c o l o r : # C C D D F F ; ' >
< ? p m s g ( ' b r a n c h _ h e l p 1 ' ) ? > < B R >
<                      /                       P                      >
<     ?          }         e     l   s      e         {         ?     >
<    a          n    a    m     e    =     "     A    A     A   "     >
< h 3 > < ? p r i n t $ n u m ? > p a g e s < / h 3 >
< P s t y l e = ' b a c k g r o u n d - c o l o r : # C C D D F F ; ' >
< ? p m s g ( ' b r a n c h _ h e l p 2 ' ) ; ? > < B R >
<B><? pmsg('warning') ?> </B><? pmsg('branch_warn') ?>
<                      /                       P                      >
<                                  P                                  >
<                                                                                   ?
$ a         n a          m      e           =           " A A                A " ;
f o r       ( $ n         =      0 ;       $ n < $ n u m ;                  $ n + + )
                                               {
  $ a n a m e 2                      =          $ s p i d e r _ i d ;
  i f      ( $ n        = =       0 )       $ a n a m e 2 = " A A A " ;
  list($file_name,$spider_id)=mysql_fetch_row($result_id);
  p r i n t " < A N A M E = ' $ a n a m e ' > \ n " ;
  $ h r e f = $ u r l . $ p a t h . $ f i l e _ n a m e ;
  print "<A HREF='files.php?spider_id=$spider_id&sup=1#$aname2'><img src='no.gif'
width='10' height='10' border='0' align='middle'></A>&nbsp; \n";
  print "<A HREF='files.php?spider_id=$spider_id&spider=1' target='_top' ><img src='yes.gif'
width='10' height='10' border='0' align='middle'></A>&nbsp; \n";
  print "<A HREF='$href' target='_blank'>".rawurldecode($file_name)."&nbsp;</A><BR>\n";
                                               }
?                                                                                         >
<                            /                             P                              >
<                            h                              r                             >
<             ?                              }                             ?              >
<             /              b              o              d               y              >
<             /              h               t             m               l              >
                Directory                  includes
c       o       n   f  i                 g    .   p                h        p
<                                                                           ?
//-------------CONFIGURATION FILE-------
$ p h p d i g _ v e r s i o n                   =      " 1 . 4 . 1 " ;
$phpdig_language = "en"; // options en - fr and more if avaible
define(PHPDIG_ADM_AUTH,'1'); //turn on or off the http auth in admin. 1 is on
d e f i n e ( P H P D I G _ A D M _ U S E R , ' 3 s p i d e r s ' ) ;
d e f i n e ( P H P D I G _ A D M _ P A S S , ' p a t ' ) ;

/ / t e m p l a t e           f i l e        a n d       s t y l e
$ t e m p l a t e = ' . / t e m p l a t e s / p h p d i g . h t m l ' ;
define(HIGHLIGHT_BACKGROUND,'yellow');
define(WEIGHT_IMGSRC,'./tpl_img/weight.gif');
d e f i n e ( W E I G H T _ H E I G H T , ' 5 ' ) ;
d e f i n e ( W E I G H T _ W I D T H , ' 5 0 ' ) ;

/ / - - - - - - - - - D E F A U L T     V A L U E S
$search_default_limit        = 10; //results per page

$spider_max_limit     = 12; //max recurse levels in sipder
$spider_default_limit          = 1; //default value
$respider_limit         = 5; //recurse limit for update

$lim it_day s                  = 7; //def a ult day s befor e re in dex a page
$small_words_size                       = 2; //words to not index
$max_words_size                              = 30; //max word size

$title_weight                       = 3;      //relative title weight
$chunk_size                    = 8000; //chunk size for regex processing
$summary_length                          = 500;         //length of results summary

define('TEXT_CONTENT_PATH','text_content/'); //dir of textual content (relative to phpdig
p                 a                    t                         h                      )
define('CONTENT_TEXT',1); //enable or not text content

define('FTP_ENABLE',0);//enable ftp content for distant PhpDig
define('FTP_HOST','vclass.mgt.psu.ac.th'); //if distant PhpDig, ftp host;
d e f i n e ( ' F T P _ P O R T ' , 2 1 ) ; / / f t p p o r t
d e f i n e ( ' F T P _ P A S V ' , 1 ) ; / / p a s s i v e m o d e
define('FTP_PATH','/home/staff/parinya/public_html/project2001/search_engine'); //distant path
f r o m                     t h e                   f t p                 r o o t
define('FT P_T E XT _PAT H','text_content'); //path for text files (default)
d e f i n e ( ' F T P _ U S E R ' , ' p a r i n y a ' ) ;
d e f i n e ( ' F T P _ P A S S ' , ' t u m t u m ' ) ;

//regular expression to ban useless external links in indexing
//prevents to add those !%*$ฃ urls in keywords
$ b a n n e d = ' ^ a d \ . | b a n n e r | d o u b l e c l i c k ' ;

/ / - - - - -           - - - - - H            T M L      E N T I T I E                     S
$ s p e c =               a r r a y (           " & a m p " = > " & "                       ,
      " & a              g r a v              e "       = >         "   "                    ,
      "&egrave"                     =>                       " "                             ,
      " & u              g r a v              e "      = >         " ๙ "                     ,
      " & o             a c u t e                ; "     = >        " ๓ "                    ,
      "&eacute"                     =>                       " "                             ,
      "&icirc"                     =>                       " "                              ,
      " & o               c i r c               "     = >         " ๔ "                      ,
      " & u               c i r c              "      = >        " ๛ "                       ,
      "&ecirc"                     =>                       " "                              ,
      "&ccedil"                     =>                       " "                             ,
     " & # 1 5 6 "       = >        " o e "                                    ,
     "    &  g  t "    =   >         "        "                                ,
     "    &  l t  "    =   >         "        "                                ,
     " & d e g "         = >           "       "                               ,
     " & a p o s "        = >             " ' "                                ,
     " & q u o t "        = >            "      "                              ,
     " & a c i r c "        = >           "     "                              ,
     " & i u m l "        = >           " ๏ "                                  ,
     "&euml"        =>          " "                                            ,
     " & a u m l "        = >            "      "                              ,
     " & o u m l "        = >            " ๖ "                                 ,

     " & n b s p "                        =       >             "          "   ,
     "&iacute;"             =>                        " "                      ,
     " & r e g "                      =       >             "               " ,
     " & c o p y                  "           =       >               "       "
     )                                                                        ;
/ / m o n t h       n a m e s         i n     i s o                  d a t e s
$ m o n t h _ n a m e s =         a r r a y ( ' j a                 n ' = > 1 ,
         '        f    e    b         '     =       >                   2      ,
         '       m      a     r        '    =       >                   3     ,
         '        a    p     r        '     =       >                   4      ,
         '       m      a     y         '    =       >                   5     ,
         '        j    u    n         '     =       >                   6      ,
         '        j    u     l       '      =       >                   7      ,
         '       a     u     g         '    =       >                   8      ,
         '        s    e    p         '     =       >                   9      ,
         '      o    c    t       '       =     >      1                  0    ,
         '      n    o    v        '      =     >      1                  1   ,
         '       d     e    c        '      =       >                   1     2
         )                                                                    ;
/ / a p a c h e m u l t i i n d e x e s              p a   r a m e t e r s
$apache_indexes = array (                           "?N    =A" => 1,
            "  ?  N     =  D    "                   =      >         1   ,
            "  ?  M     =  A    "                   =      >         1   ,
            "  ?  M     =  D    "                   =      >         1   ,
            "  ?  S     =  A    "                   =      >        1    ,
            "  ?  S     =  D    "                   =      >        1    ,
            "  ?  D     =  A    "                   =      >         1   ,
            " ? D = D "                =              >         1 ) ;



/ / i n c l u d e s                  l a n g u a g e              f i l e
if (is_file("$relative_script_path/locales/$phpdig_language -language.php"))
  {include "$relative_script_path/locales/$phpdig_language -language.php";}
e                          l                       s                       e
  {include "$relative_script_path/locales/en-language.php";}

/ / c o n n e c t i o n            t o     d a t a b a s e
if (is_file("$relative_script_path/includes/connect.php"))
  {include "$relative_script_path/includes/connect.php";}
/ / i n c l u d e s           o f      l i b r a i r i e s
include "$relative_script_path/libs/p hpdig_functions.php";
include "$relative_script_path/libs/function_phpdig_form.php";
include "$relative_script_path/libs/mysql_functions.php";
?                                                            >
c      o      n       n       e      c       t      .      p       h      p
<                                                          ?
/ / c o n n e c t i o n t o t h e M y S q l s e r v e r
$id_connect = @mysql_connect ("localhost","3spiders","pat");
//here the name of your database where are phpdig tables
$ d a t a b a s e           =     " s e a r c h " ;
?                                                          >
                D i r e c t o r y             l i b s
a         u        t       h      .               p         h         p
<                                                     ?
$ u s e r          =    P H P D I G _ A D M _ U S E R ;
$ p w d        =       P H P D I G _ A D M _ P A S S ;

f     u n c t i o n                         a u t h ( ) {
  $ r e a l m = " A d m i n i s t r a t i o n P h p D i g " ;
  Header("WWW-Authenticate: Basic realm='".$realm."'");
  H e a d e r ( " H T T P / 1 . 0 4 0 1 U n a u t h o r i z e d " ) ;
  echo " ous ne pouve acc d e r                c e t t e p a g e " ;
  / / l a r e d i r e c t i o n e s t i m p o s s i b l e
  // mais vous pouvez inclure une page html d'erreur
  e                x                   i                t             ;
                                    }
i f     ( P H P D I G _ A D M _ A U T H                     = =    1 )
                                    {
if( !isset($PHP_AUTH_USER) && !isset($PHP_AUTH_PW) ) {
  a           u            t          h           (         )          ;
                                    }
e              l               s            e                         {
  if( $PHP_AUTH_USER==$user && $PHP_AUTH_PW==$pwd ) {
    //      la       suite       du        script      sera     e cut e
                                      }
  e                l                 s                 e              {
    // rappel de la fonction d'identification
    a            u           t           h         (         )         ;
                                      }
                                    }
                                    }
?                                                                     >
f u n c t i o n _ p h p d i g _ f o r m . p h p
<                                                                                 ?
/ / f o r m            f o r        t h e        s e a r c h            q u e r y .

// $query_string is the previous query if exists
/ /       $ o p t i o n                 i s       s e a r c h              o p t i o n
/ / $ l i m i t e i s t h e n u m r e s u l t s p e r p a g e
// $result_page is path to the search.php script
// $site is the site to limit the results
/ /       $ p a t h            a s       t h e        s a m e            p u r p o s e
function phpdig_form($query_string =
"",$option="start",$limite=10,$result_page="search.php",$site="",$path="",$mode='classic')
                                               {
$ c h e c k _ s t a r t [ " s t a r t " ] = " c h e c k e d " ;
$ c h e c k _ a n y [ " a n y " ] = " c h e c k e d " ;
$ c h e c k _ e x a c t [ " e x a c t " ] = " c h e c k e d " ;

$ l i m i t 1 0 [ 1 0 ]     =   " s e l e c t e d " ;
$ l i m i t 3 0 [ 3 0 ]     =   " s e l e c t e d " ;
$ l i m i t 1 0 0 [ 1 0 0 ]   =   " s e l e c t e d " ;

$result['form_head'] = "<form action='$result_page' method='post'>
<input type='hidden' name='site' value='$site'>
<input type='hidden' name='path' value='$path'>
<input type='hidden' name='result_page' value='$result_page'>
"                                                                       ;
$ r e s u l t [ ' f o r m _ f o o t ' ] = " < / f o r m > " ;
$ r e s u l t [ ' f o r m _ t i t l e ' ] = m s g ( ' s e a r c h ' ) ;
$result['form_field'] = "<input type='text' name='query_string' size='24'
value='".htmlentities(stripslashes($query_string))."'>";
$result['form_select'] = msg('display')."
  < s e l e c t             n a m e = ' l i m i t e ' >
  <option ".$limit10[$limite].">10</option>
  <option ".$limit30[$limite].">30</option>
  <option ".$limit100[$limite].">100</option>
  <           /          s             e           l            e           c            t        >
  " . m s g ( ' r e s u l t s ' ) . "
 "                                                                                                 ;
$result['form_button'] = "<input type='submit' name='search' value='Go...'>";
$result['form_radio'] = "<input type= \"radio\" name=\"option\" value=\"start\"
".$check_start[$option].">".msg('w_begin')."&nbsp;
 <input type=\"radio\" name=\"option\" value=\"exact\"
".$check_exact[$option].">".msg('w_whole')."&nbsp;
 <input type=\"radio\" name=\"option\" value=\"any\"
".$check_any[$option].">".msg('w_part')."&nbsp;
 "                                                                                                 ;
i f          ( $ m o d e                        = =           ' c l a s s i c ' )
                                                  {
e x t r a c t ( $ r e s u l t ) ;
?                                                                                                 >
< ?           p r i n t                    $ f o r m _ h e a d                                 ? >
< ta b le b or de r= " 0" c el l sp ac i ng= ' 1' c el l pa dd in g= '2 ' bg co lo r= " #00 00 00">
 <                                t                                 r                             >
  < t d a l i g n= " c en t e r " b gc o l o r= '# A AC C F F' >
  <B style="font-size:12;" ><? print $form_title ?></B>
  <                       /                        t                        d                     >
 <                       /                         t                        r                     >
 <                                t                                 r                             >
  < t d a l i g n = " l e f t " b g c o l o r = ' # C C C C C C ' >
  < ?           p r i n t                 $ f o r m _ f i e l d                                ? >
  < ?         p r i n t                 $ f o r m _ b u t t o n                                ? >
  < ?          p r i n t                 $ f o r m _ s e l e c t                               ? >
  <                       /                        t                        d                     >
 <                       /                         t                        r                     >
<                              t                                 r                          >
< t d     a l i g n = " c e n t e r " b g c o l o r = ' # C C C C C C ' >
< ?         p r i n t                   $ f o r m _ r a d i o                             ? >
<                      /                       t                       d                    >
<                      /                       t                       r                    >
<            /             t              a            b            l          e            >
<               /              f              o                 r            m              >
<                                                                                           ?
                                               }
e                               l                                s                          e
r     e     t      u       r      n            $        r     e      s     u     l     t     ;
                                               }
/ / p a r s e                  a        p h p d i g                t e m p l a t e
function parse_phpdig_te mplate($template,$t_strings,$table_results)
                                               {
i f        ( ! i s _ f i l e ( $ t e m p l a t e ) )
                                                 {
  p r i n t " N o t e m p l a t e f i l e f o u n d ! " ;
  r           e          t            u          r           n                     0         ;
                                                 }
$       i       n     _         l        o    o          p             =             0       ;
$ f _ h a n d l e r = f o p e n ( $ t e m p l a t e , ' r ' ) ;
w h i l e ( $ l i n e = f g e t s ( $ f _ h a n d l e r , 4 0 9 6 ) )
                                                  {
    if (ereg('(.*)<phpdig:results>(.*)',$line,$regs))
                                                    {
      $ l i n e                        . =            $ r e g s [ 1 ] ;
      $ l o o p _ p a r t                             =        $ r e g s [ 2 ] ;
      $      i      n        _      l       o      o       p             =           1       ;
                                                    }
    i f           ( $ i n _ l o o p                                      = =             1 )
                                                    {
    if (ereg('(.*)</phpdig:results>(.*)',$line,$regs))
                                              {
      $ l o o p _ p a r t                     . =          $ r e g s [ 1 ] ;
      $ l i n e            . =      $ l i n e . $ r e g s [ 2 ] ;
      $       i     n     _     l     o      o       p            =        0     ;
      / / p a r s e                               t h e             l o o p
      i f ( i s _ a r r a y ( $ t a b l e _ r e s u l t s ) )
                                                {
         r e s e t ( $ t a b l e _ r e s u l t s ) ;
         while (list($id,$result) = each($table_results))
                                                   {
              $ r e s u l t [ ' n ' ]                            =     $ i d ;
              $ t h i s _ l o o p               =      $ l o o p _ p a r t ;
              while(list($var,$content) = each($result))
                 $this_loop = str_replace("<phpdig:$var/>",$content,$this_loop);
              $this_loop = ereg_replace("<phpdig:[a -z_]*/>","",$this_loop);
              p r i n t                $ t h i s _ l o o p ;
                                                   }
                                                }
                                              }
    e                         l                             s                    e
      $ l o o p _ p a r t                            . =         $ l i n e ;
                                            }
    i f            ( $ i n _ l o o p                              = =        0 )
                                            {
    while(ereg('<phpdig:([a-z_]*)/>',$line,$regs))
        $line = ereg_replace('<phpdig:'.$regs[1].'/>',$t_strings[$regs[1]],$line);
    p       r       i     n       t                $     l     i     n    e      ;
                                            }
                                          }
                                        }
?                                                                                >
m y s q l _ f u n c t i o n s . p h p
<                                                                   ?
//executes a select and returns a whole resultset
function mysql_result_select($database,$id_connect,$query_select)
                                  {
if (!eregi('^[^a-z]*select',$query_select))
   r     e        t       u     r       n             -         1    ;
$res_id = mysql_db_query($database,$query_select,$id_connect);
i f ( m y s q l _ n u m _ r o w s ( $ r e s _ i d ) > 0 )
                                    {
  $           i                    =                      0          ;
  while ($res_datas = mysql_fetch_array($res_id,MYSQL_ASSOC))
                                      {
     $ r e s u l t [ $ i ]            =     $ r e s _ d a t a s ;
     $                i               +             +                ;
                                      }
  r    e    t   u     r     n       $     r    e  s     u   l     t ;
                                    }
e                       l                       s                   e
  r       e         t         u     r        n                0     ;
                                  }
?                                                                   >
p h p d i g _ f u n c t i o n s . p h p
<                                                                               ?
/ / - - - - - - - - - - - - - S T R I N G                       F U N C T I O N S

//=================================================
// conve r ts an iso da te to an m ys ql date
f u n c t i o n h t t p _ t o _ s q l d a t e ( $ d a t e )
                                               {
g l o b a l                       $ m o n t h _ n a m e s ;
if (eregi('(([a-z]{3})\, ([0-9]{1,2}) ([a-z]+) ([0-9]{4}) ([0-9:]{8}) ([a-z]+))',$date,$regs))
                                                 {
   $month = sprintf('%02d',$month_names[strtolower($regs[4])]);
   $ y e a r = s p r i n t f ( ' % 0 4 d ' , $ r e g s [ 5 ] ) ;
   $ d a y = s p r i n t f ( ' % 0 2 d ' , $ r e g s [ 3 ] ) ;
   $hour = sprintf('%06d',str_replace(':','',$regs[6]));
   r e t u r n " $ y e a r $ m o n t h $ d a y $ h o u r " ;
                                                 }
                                               }
//=================================================
/ / r e t u r n s                  a      l o c a l i z e d                 s t r i n g
f u n c t i o n                      m s g ( $ s t r i n g = ' ' )
                                               {
g l o b a l                        $ p h p d i g _ m e s s ;
r e t u r n n l 2 b r ( $ p h p d i g _ m e s s [ $ s t r i n g ] ) ;
                                               }

/ / p r i n t   a        l o c a                   l i z e d   s t r i n g
f u n c t i o n         p m s g                    ( $ s t r i n g = ' ' )
                                {
g l o b a l            $ p h p                       d  i g _ m e s s ;
p r i n t n l 2 b r ( $ p h p d i g                 _ m e s s [ $ s t r i n g ] ) ;
                                }
//=================================================
/ / l o a d t h e c o m m o n w o r d s i n a n a r r a y
f u n c t i o n c o m m o n _ w o r d s ( $ f i l e = ' ' )
                                     {
$ l i n e s             =       @ f i l e ( $ f i l e ) ;
i f         ( i s _ a r r a y ( $ l i n e s ) )
                                       {
  w h i l e ( l i s t ( $ i d , $ w o r d ) = e a c h ( $ l i n e s ) )
      $ c o m m o n [ t r i m ( $ w o r d ) ]                =      1 ;
                                       }
e                       l                     s                       e
  $ c o m m o n [ ' a a a a ' ]                           =        1 ;
r     e   t    u    r     n        $     c  o   m    m     o     n    ;
                                     }

//=================================================
/ / h i g h l i g h t                         a      s t r i n g    p a r t
func ti on hi ghl igh t( $wo rd= "",$ st r i ng= "")
                                              {
i          f                 (         $          w       o    r     d    )
  return @eregi_replace("($word)","<B style='background-
color:".HIGHLIGHT_BACKGROUND.";'>\\1</B>",$string);
e                              l                          s               e
  r      e     t     u     r      n             $    r   e   s   u l   t  ;
                                              }

//=================================================
/ / r e p l a c e a l l c h a r a c t e r s w i t h a n a c c e n t
f u n c t i o n        s t r i p a c c e n t s ( $ c h a i n e )
                                   {
r e t u r n (              s t r t r (        $ c h a i n e ,
"                ๓๔ ๖             ๏ ๙ ๛ ๑          "       ,
"AAAAAAaaaaaaOOOOOOooooooEEEEeeeeCcIIIIiiiiUUUUuuuuyNn" ) );
                           }

//=================================================
//epure a string from all non alnum words (words can contain & and _ character)
function epure_text($text,$min_word_length=2)
                                                 {
$text = stripaccents(strtolower ($text));
$text = ereg_replace("[[:blank:]][0-9]+[[:blank:]]"," ",ereg_replace("[^[:alnum:]_&]+"," ",$text));
$text = ereg_replace("[[:blank:]][^ ]{1,$min_word_length}[[:blank:]]"," "," ".$text." ");
return trim(ereg_replace("[[:blank:]]+"," ",$text));
                                                 }

//=================================================
/ / a d v a n c e d    s t r i p t a g s      f u n c t i o n .
/ / r e t u r n s           t e x t      a n d       t i t l e
f u n c t i o n h t m l _ t o _ p l a i n _ t e x t ( $ t e x t )
                                {
/    /    h    t   m    l     e   n    t    i     t    i   e    s
g      l    o    b   a    l         $     s     p    e     c    ;

/ / r e p l a c e b l a n k c h a r a c t e r s b y s p a c e s
$text = ereg_replace("[\r\n\t]+"," ",$text);
/ / e x t r a c t s                            t i t l e
if ( eregi("<title *>([^<>]*)</title *>",$text,$regs) )
  $ t i t l e              =      $ r e g s [ 1 ] ;
e                     l                    s                  e
  $      t     i   t     l   e           =         "    "     ;
//delete content of head, script, and style tags
$text = eregi_replace("<head[^<>]*>.*</head>"," ",$text);
$text = eregi_replace("<script[^>]*>.*</script>"," ",$text);
$text = eregi_replace("<style[^>]*>.*</style>"," ",$text);
$text = eregi_replace("(<[a-z0-9 ]+>)","\\1 ",eregi_replace("(</[a-z0-9 ]+>)","\\1 ",$text));

//tries to replace htmlentities by ascii equivalent
r   e     s   e   t        (    $   s   p    e   c    )    ;
w h i l e     ( $ c h a r    =    e a c h ( $ s p e c ) )
                               {
   $text = eregi_replace ($char[0]."[;]?",$char[1],$text);
   $title = eregi_replace ($char[0]."[;]?",$char[1],$title);
                               }
$text = ereg_replace('&#([0-9]+);',chr('\1').' ',$text);

/ / r e p l a c e b l a n k c h a r a c t e r s b y s p a c e s
$text = ereg_replace("[\r\n\t]+"," ",$text);
$text = eregi_replace("--|[{}();\"]+"," ",eregi_replace("</[a-z0-9]+>"," ",$text));

//replace any group of blank characters by an unique space
$text = ereg_replace("[[:blank:]]+"," ",strip_tags($text));
$ r e t o u r [ ' c o n t e n t ' ]        =    $ t e x t ;
$ r e t o u r [ ' t i t l e ' ]        =     $ t i t l e ;
r   e    t   u   r  n        $   r   e   t    o    u   r  ;
                             }

//=================================================
//purify urls from relative components like ./ o r ../ and return an array
f u n c t i o n            u r l _ p u r i f y ( $ e v a l )
                                    {
/ / d e l e t e                s p e c i a l                l i n k s
if (eregi("[/]?mailto:|[/]?javascript:|[/]?news:",$eval))
    r       e      t        u     r      n                -       1       ;

$ u r l           =       @ p a r s e _ u r l ( $ e v a l ) ;
$ p a t h              =       $ u r l [ ' p a t h ' ] ;
w h i l e ( e r e g ( ' [ ^ / ] * / \ . { 2 } / ' , $ p a t h , $ r e g s ) )
                                        {
  $path = ereg_replace('[^/]*/\.{2}/','',$path);
                                        }

$path =
str_replace("./","",ereg_replace("^[.]/","",ereg_replace("^[.]{2}/.*",'NOMATCH',ereg_replace("[^
/]*/[.]{2}/","",ereg_replace("^[.]/","",ereg_replace("/+","/",$path))))));

i f     ( e r e g i ( ' ( [ ^ / ] + ) $ ' , $ p a t h , $ r e g s ) )
                                    {
    $ f i l e               =       $ r e g s [ 1 ] ;
     $ p a t h = s t r _ r e p l a c e ( $ f i l e , " " , $ p a t h ) ;
                                    }

$ r e t o u r [ ' p a t h ' ]                                                              =
ereg_replace('(.*[^/])/?$','\\1/',ereg_replace('^/(.*)','\\1',ereg_replace("/+","/",$path)));

i     f         (     $     u    r     l    [     '    q     u       e   r     y    '    ]     )
                            {
    $ f i l e  . =  " ? " . $ u r l [ ' q u e r y ' ] ;
    $ r e t o u r [ ' a s _ q u e r y ' ]      =    1 ;
                            }

$ r e t o u r [ ' f i l e ' ]                                    =           $ f i l e ;

/ / p a t h          o u t s i d e       s i t e        t r e e
if ($retour['path'] == "NOMATCH" or ereg("^redir[.]php3.*",$file))
                                {
   r      e       t     u    r     n              -      1        ;
                                }
r           e           t       u       r               n               $       r        e            t       o           u           r   ;
                                                                        }

/ / - - - - - - - - - - - - - H T T P                                                      F U N C T I O N S
/ / T e s t p r e s e n c e a n d t                                                     y p e o f a n u r l
f u n c t i o n t e s t _ u r l ( $ u r l , $                                           m o d e = ' s i m p l e ' )
                                    {
g l o b a l           $ p h p d i g                                                     _       v e r s i o n                             ;
$ c o m p o n e n t s = p a r s                                                         e     _ u r l ( $ u r l )                         ;
$ h o s t      =    $ c o m p o n e n                                                     t    s [ " h o s t " ]                          ;
$ p o r t = ( i n t ) $ c o m p o n                                                     e     n t s [ " p o r t " ]                       ;
$ p a t h      =    $ c o m p o n e n                                                     t    s [ " p a t h " ]                          ;
$ q u e r y = $ c o m p o n e n                                                         t     s [ " q u e r y " ]                         ;

i               f                           (                   !       $           p             o               r           t           )
                                                                            {
        $           c           p           o                   r   t                    =                            8           0       ;
                                                                            }
e                                                   l                                         s                                           e
                                                                            {
    $           c           p       o           r           t               =                 $           p       o       r           t   ;
                                                                            }

$ f p =                         f s o c k o p e n ( $ h o s t , $ c p o r t ) ;
i     f                                (      $     p     o      r     t      )
   $ p o                        r t         =     " : " . $ p o r t ;
e                                       l                 s                   e
  $     p                         o       r     t          =     "     "      ;

i             f         (                                 !   $     f                           p                 )           {
            / / h o s t                                 d o m a i n                           n o t                   f o u n d
        $   s       t       a   t           u   s           =           "       N   O       H   O       S       T       "   ;
                                                                    }
e                       l                           s                       e                                               {

    i     f       (                               $             q   u    e  r   y   )
        $ p a t h                               . =             " ? " . $ q u e r y ;

/   /   s                                m          a           l           l                   g               e            t
/     *                                 $               r               e               q                                   =
" G E T                                 $ p         a t h                   H       T       T   P       /       1       .   1
H o s t                             :               $ h o                   s       t       $       p       o       r        t

"                                                                                                                           ;
*                                                                                                                           /
$               r                       e               q                               =                                   "
"                                                                                                                           ;

/   /   c   o  m   p     l   e     t     e        g   e   t
$     r     e    q     u       e       s      t          =
" G E T       $ p a t h          H T T P / 1 . 1
H o s t :           $ h o s t $ p o r t
A     c   c    e     p     t     :              *   /    *
A c c e p t - C h a r s e t :        i s o - 8 8 5 9 - 1
A c c e p t - E n c o d i n g :            i d e n t i t y
User-Agent: PhpDig/$phpdig_version (PHP; MySql)

"                                                                                                               ;
    f p u t s ( $ f p , $ r e q u e                                                                      s t ) ;
    $ a n s w e r =  f g e t s ( $ f p , 4                                                              0 9 6 ) ;
    / / t e s t     r e t u r n            c                                                                o d e
    w h     i l e     (   $ a n      s  w                                                                 e   r )
                           {
if (ereg("HTTP/[0-9.]+ ([0-9])[0-9]{2}", $answer,$regs)                                         )
                               {
  i f ( $ r e g s [ 1 ] = = 2 | | $ r e g s [ 1 ] = = 3                                         )
    $ c o d e            =        $ r e g s [ 1 ]                                               ;
  e                    l               s                                                        e
                                 {
    $ s t a t u s            =     " N O F I L E "                                              ;
    b           r          e         a          k                                               ;
                                 }
                               }

i           f               (           $           r           e           q           1       )
                                                {
    $ c u r _ r e q                                     =           $ r e q 1                    ;
    u      n    s    e     t     (     $                 r      e     q     1   )                ;
    / / c l o s e , a n d o p e n                      a n e w c o n e c t i o                  n
    / / o n         t h e          n e                w        l o c a t i o                    n
    f      c     l     o     s      e                 (      $      f     p    )                 ;
    $ f p = f s o c k o p e n                       ( $ h o s t , $ c p o r t )                 ;
    i       f            (     !       $                   f      p     )                       {
          / / h o s t        d o m a                 i n        n o t      f o u n              d
          $ s t a t u s              =                  " N O H O S T "                          ;
          b           r            e                         a            k                      ;
                                                     }
                                                }
e                           l                                   s                               e
    $   c       u   r   _       r   e       q               =           $       r   e       q   ;



f p u t s ( $ f p , $ c u r _ r e q ) ;
$ a n s w e r = f g e t s ( $ f p , 4 0 9 6 ) ;
    /       /           d       e      b      u      g
    / / e c h o        n l 2 b r ( $ c u r _ r e q ) ;
    / / e c h o          $ a n s w e r . ' < b r > ' ;

    / / p a r s e        h e a d e r        l    o c a t i o n
    if (ereg("Location: *(.*)",$answer,$regs)     && $ code == 3)
                                   {
      $     r     e    d     i     r    s             +               +           ;
      i f        ( $ r e d i r s                      >                   4       )
                                     {
        $     a  n   s    w    e      r      =                "           "       ;
        $ s t a t u s              =      " L     O       O       P           "   ;
                                     }
      $ n e w p a t h      =    t r i m ( $ r    e g s [ 1 ] ) ;
      $ n e w u r l = p a r s e _ u r l ( $      n e w p a t h ) ;

     //search if relocation is absolute or relative
     i f ( ! e r e g ( ' ^ / ' , $ n e w u r l [ " p a t h " ] ) )
                                    {
        $path = dirname($path).'/'.$newurl["path"];
                                    }
     e                  l                   s                    e
       $ p a t h     =    $ n e w u r l [ " p a t h " ] ;

     if (!$newurl['host'] || $host == $newurl['host'])
     $ r e q 1 = " G E T $ p a t h H T T P / 1 . 1
H o s t :            $ h o s t $ p o r t
A     c     c   e     p     t   :           *   /    *
A c c e p t - C h a r s e t :      i s o - 8 8 5 9 - 1
A c c e p t - E n c o d i n g :        i d e n t i t y
User-Agent: PhpDig/$phpdig_version (PHP; MySql)
"                                                                    ;
                                    }
    / / P a r s e      c o n t e n t - t y p e            h e a d e r
    elseif (eregi("Content-Type: *(text/[a-z]*)",$answer,$regs))
                                    {
     i f ( $ r e g s [ 1 ] = = " t e x t / h t m l " )
                                      {
       $ s t a t u s                =          " H T M L " ;
                                      }
     e l s e i f ( $ r e g s [ 1 ] = = " t e x t / " )
                                      {
        $      b   o    u    c    l        e          =         0    ;
        w h i l e ( $ b o u c l e                           <      3 )
                                        {
           f p u t s ( $ f p , $ r e q ) ;
           $ a n s w e r = f g e t s ( $ f p , 4 0 9 6 ) ;
           //test presence of <html> tag at the begining
           i f ( e r e g i ( " < h t m l " , $ a n s w e r ) )
                                          {
             $ s t a t u s              =        " H T M L " ;
             $   b   o    u    c      l      e          =        3   ;
                                          }
           $     b     o     u      c        l      e     +    +     ;
                                        }
                                      }
     e l s e i f ( $ r e g s [ 1 ] = = " t e x t / p l a i n " )
                                      {
        eregi('\.([a-z0-9]{1,4})$',$path,$extregs);
        / / e x t e n s i o n t x t o r o t h e r ?
        if (is_array($extregs) && !eregi('txt',$extregs[1]))
          $ s t a t u s               =         " T E X T " ;
        e                  l                      s                  e
                   $ s t a t u s       =     " P L A I N T E X T " ;
                                                }
           e                       l                   s                   e
                                                {
               $     s t a t u s               =     " T E X T " ;
                                                }
                                              }
          elseif     (eregi('Last-Modified: *([a-z0-9,: ]+)',$answer,$regs))
                                                 {
               / /    s e a r c h l a s t - m o d i f i e d h e a d e r
               $      d a t e          =         $ r e g s [ 1 ] ;
                                                 }
    i f     ( !      e r e g i ( ' [ a - z 0 - 9 ] + ' , $ a n s w e r ) )
          $   a          n    s   w     e     r        =         "    "    ;
                                           }

f         c          l    o     s      e       (   $             f       p       )       ;
                                           }

/ / r e t u r n s      v a r i a b l               e              o r a r r a y
i f      ( $ m o      d e        = =                            ' d a t e ' )
                               {
   $ r e t u r n [ ' s t a t u s ' ]                        =       $ s t a t u s ;
   $ r e t u r n [ ' l m _ d a t e '                    ]          =    $ d a t e ;
   r    e   t u   r   n        $   r                   e         t    u    r  n   ;
                               }
e                   l                                      s                             e
  r    e   t  u   r   n        $   s                   t         a   t       u       s   ;
                             }

//=================================================
/ / r e t r i e v e l i n k s  f r o m   a n   u r l
function explore($tempfile,$url,$path="",$file ="")
                         {
$    i    n    d    e    x         =          0   ;

i f      ( i s _ f i l e ( $ t e m p f i l e ) )
  $ f i l e _ c o n t e n t = f i l e ( $ t e m p f i l e ) ;

i f        ( i s _ a r r a y ( $ f i l e _ c o n t e n t ) )
                                                {
while (list($n,$eval) = each($file_content))
                                                  {
      / / s e a r c h h r e f s a n d f r a m e s s r c
      while (eregi("(<frame[^>]*src[[:blank:]]*=|href[[:blank:]]*=|http-equiv=['\"]refresh['\"]
* c o n t e n t = [ ' \ " ] [ 0 -
9]+;url[[:blank:]]*=|window[.]location[[:blank:]]*=|window[.]open[[:blank:]]*[(])[[:blank:]]*[\'\"
]?((([[a-z]{3,5}://)+(([.a-zA-Z0-9-])+(:[0-9]+)*))*([:%/?=&;\\,._a-zA-Z0-9-]*))(#[.a-zA-Z0-9-
] * ) ? [ \ ' \ "                   ] ? " , $ e v a l , $ r e g s ) )
                                                    {
         $eval = str_replace($regs[0],"",$eval);
         / / t e s t n o h o s t o r s a m e t h a n s i t e
         if ($regs[5] == "" || $url == 'http://'.$regs[5].'/')
                                                    {
         i f ( s u b s t r ( $ r e g s [ 8 ] , 0 , 1 ) = = " / " )
             $links[$index] = url_purify($regs[8]);
         e                           l                           s                               e
             $links[$index] = url_purify($path.$regs[8]);
         i f ( i s _ a r r a y ( $ l i n k s [ $ i n d e x ] ) )
           $         i         n          d          e        x          +           +           ;
         e                           l                           s                               e
           u n s e t ( $ l i n k s [ $ i n d e x ] ) ;
                                                    }
                                                    }
                                           }
r    e       t   u     r       n               $       l       i   n       k       s   ;
                                       }
e                          l                               s                           e
 r       e       t         u       r               n                   -       1       ;
                                      }
//=================================================
//test a link, search if is a file or dir, exclude robots.txt directives
function detect_dir_html($link,$exclude='')
                                      {
$test = test_url($link['url'].$link['path'].$link['file']);
/              /              f               i            l               e
if ($test == 'HTML' or $test == 'PLAINTEXT')
   $ l i n k [ ' o k ' ]                                    =        1 ;
/ / d i r          ( a v o i d            e x t e n s i o n s )
elseif (!eregi('[.][a-z]{1,4}$',$link['path'].$link['file ']) &&
test_url($link['url'].$link['path'].$link['file'].'/') == "HTML")
                                        {
   $link['path'] = ereg_replace ('/+$','/',$link['path'].$link['file'].'/');
   $ l i n k [ ' f i l e ' ]                             =       " " ;
   $ l i n k [ ' o k ' ]                                    =        1 ;
                                        }
/              /             n               o             n               e
e                        l                       s                         e
   $ l i n k [ ' o k ' ]                                    =        0 ;
/ / t e s t t h e e x c l u d e w i t h r o b o t s . t x t
if (test_robots($exclude,$link['path']) == 1 or $exclude['@ALL@'] == 1)
  $ l i n k [ ' o k ' ]                                    =         0 ;
r      e     t    u      r      n           $    l     i      n    k       ;
                                      }
//=================================================
/ / s e a r c h        r o b o t s . t x t         f o r      a   s i t e
function test_robots_txt($site) //don't forget the end backslash
                                  {
if (test_url($site.'robots.txt') == 'PLAINTEXT')
                                    {
  $ r o b o t s = f i l e ( $ s i t e . ' r o b o t s . t x t ' ) ;
  while (list($id,$line) = each($robots))
                                      {
     if (ereg('^user-agent:[ ]*([a-z0-9*]+)',strtolower($ line),$regs))
       $ u s e r _ a g e n t              =    $ r e g s [ 1 ] ;
     if (eregi('^disallow:[ ]*(/([a-z0-9_/-]*))',$line,$regs))
                                        {
       i f     ( ! e r e g ( ' / $ ' , $ r e g s [ 2 ] ) )
         $ r e g s [ 2 ]                      . =          ' / ' ;

        i f       ( $ r e g s [ 1 ]                    = =       ' / ' )
          $exclude[$user_agent]['@ALL@'] = 1;
        e                     l                      s                     e
          $exclude[$user_agent][$regs[2]] = 1;
                                           }
                                         }
   i f ( i s _ a r r a y ( $ e x c l u d e [ ' p h p d i g ' ] ) )
     r e t u r n        $ e x c l u d e [ ' p h p d i g ' ] ;
   e l s e i f ( i s _ a r r a y ( $ e x c l u d e [ ' * ' ] ) )
     r e t u r n             $ e x c l u d e [ ' * ' ] ;
                                       }
$ e x c l u d e [ ' @ N O N E @ ' ]                              =      1 ;
r e t u r n                       $ e x c l u d e ;
                                     }
//=================================================
f u n c t i o n t e s t _ r o b o t s ( $ e x c l u d e , $ p a t h )
                                     {
i f ( e r e g ( ' ^ [ a - z 0 - 9 _ / - ] + / $ ' , $ p a t h ,$ r e g s ) )
                                    {
  while (list($path_exclude) = each($exclude))
                                       {
     i f ( e r e g ( ' ^ ' . $ p a t h _ e x c l u d e , $ p a t h ) )
        $       r    e   s    u     l     t         =          1     ;
                                       }
  r    e     t     u   r   n         $    r  e    s    u    l    t   ;
                                    }
                                   }
//=================================================
f u n c t i o n t e s t _ r o b o t s _ t a g s ( $ t a g s )
                                   {
i f         ( i s _ a r r a y ( $ t a g s ) )
                                   {
while (list($id,$content) = each($tags))
                                      {
    i f        ( e r e g i ( ' r o b o t s ' , $ i d ) )
                                        {
      $ d i r e c t i v e                              =         0 ;
      i f ( e r e g i ( ' n o f o l l o w ' , $ c o n t e n t ) )
         $ d i r e c t i v e                        + =           1 ;
      i f ( e r e g i ( ' n o i n d e x ' , $ c o n t e n t ) )
         $ d i r e c t i v e                        + =           2 ;
      i f       ( e r e g i ( ' n o n e ' , $ c o n t e n t ) )
         $ d i r e c t i v e                        + =           4 ;
      //test the bitwise return > 0 : & 5 nofollow, & 6 noindex.
      r e t u r n               $ d i r e c t i v e ;
                                        }
                                      }
                                   }
                                   }
//=================================================
?                                                                        >
                D i r e c t o r y          l o c a l e s
e    n    -    l a n g u                  a g e .             p     h    p
<                                                                         ?
/ / E n g l i s h       m e s s a g e s          f o r      P h p D i g
/ / ' k e y w o r d '           = >      ' t r a n s l a t i o n '
$ p h p d i g _ m e s s                      =       a r r a y            (
' y e s '                                         = > ' y e s ' ,
' n o '                                             = > ' n o ' ,
' d e l e t e '                            = > ' d e l e t e ' ,
' r e i n d e x '                      = > ' R e - i n d e x ' ,
' b a c k '                                    = > ' B a c k ' ,
' f i l e s '                                = > ' f i l e s ' ,
' a d m i n '                   = > ' A d m i n i s t r a t i o n ' ,
' w a r n i n g '                     = > ' W a r n i n g            ! ' ,
'index_uri'           =>'which URI would you index ?',
' s p i d e r _ d e p t h ' = > ' S e a r c h d e p t h ' ,
'spider_warn' =>"Please ensure that no one else is upd ating the same site.
A locking mechanism will be included in a later version.",
'site_update' =>"Update a site or one of its branch",
' c l e a n '                                = > ' C l e a n ' ,
' t _ i n d e x '                           = > " i n d e x " ,
' t _ d i c '                       = > ' d i c t i o n n a r y ' ,
' t _ s t o p w '                = > ' c o m m o n w o r d s ' ,

' u p d a t e '                    = > ' U p d a t e ' ,
' t r e e _ f o u n d '       = > ' F o u n d    t r e e ' ,
'update_mess' =>'Re-index or delete a tree ',
'update_warn'         =>"Exclude is permanent",
'update_help' =>'Click on the cross to delete the branch
C l i c k o n t h e g r e e n s i g n t o u p d a t e i t ' ,
'branch_start' =>'Select the folder to display on the left side',
'branch_help1' =>'Select there documents to update individually',
'branch_help2' =>'Click on the cross to delete a document
C l i c k o n t h e g r e e n s i g n t o r e i n d e x i t
T h e a r r o w l a u n c h a s p i d e r i n g ' ,
' r e d e p t h '               = > ' l e v e l s d e p t h ' ,
' b r a n c h _ w a r n ' = > " E r a s e i s p e r m a n e n t " ,
' t o _ a d m i n '       = > " t o a d m i n i n t e r f a c e " ,

'    s    e a r c h '                    = > ' S     e a    r c h '       ,
'    r    e s u l t s '                 = > ' r e     s u     l t s '     ,
'   d     i s p l a y '                 = > ' d i     s p    l a y '      ,
'   w    _ b e g i n '             = > ' w o r d s      b   e g i n '     ,
'   w    _ w h o l e '             = > ' e x a c t      w   o r d s '     ,
'   w    _ p a r t '            = > ' a n y w o r    d s      p a r t '   ,

' l i m i t _ t o '                 = > ' l i m i t     t o ' ,
' t h i s _ p a t h '             = > ' t h i s    p a t h ' ,
' t o t a l '                           = > ' t o t a l ' ,
' s e c o n d s '                    = > ' s e c o n d s ' ,
'w_common'        =>'are very common words and were ignored.',
'w_short'         =>'are too short words and were ignored.',
' s _ r e s u l t s '        = > ' s e a r c h r e s u l t s ' ,
' p r e v i o u s '                 = > ' P r e v i o u s ' ,
' n e x t '                               = > ' N e x t ' ,
' o n '                                       = > ' o n ' ,

'    i d _ s t a r t '         = > ' S i t e i n d e x i        n g '     ,
'   i d _ e n d '         = > ' I n d e x i n g c o m p l e t   e ! '     ,
'   id_recent'           =>'Was recently inde                   xed'      ,
'    n u m _ w o r d s '             = > ' N u m w o r          d s '     ,
'     t i m e '                               = > ' t i m        e '      ,
' e r r o r '                               = > ' E r r o r ' ,
' n o _ s p i d e r '      = > ' S p i d e r n o t l a u n c h e d ' ,
'no_site'                =>'No such site in database',
'no_temp'                =>'No link in temporary table',
' n o _ t o i n d e x '     = > ' N o c o n t e n t i n d e x e d ' ,
'double'              =>'Duplicate of an existing document',

'   spidering'            =>'Spidering in progress...'                 ,
'   l i n k s _ m o r e '      = > ' m o r e n e w l i n k s '         ,
'     l e v e l '                           = > ' l e v e l '          ,
'   l i n k s _ f o u n d '        = > ' l i n k s f o u n d '         ,
'   d e f i n e _ e x '      = > ' D e f i n e e x c l u s i o n s '   ,
'    i n d e x _ a l l '             = > ' i n d e x       a l l '     ,

' e n d '                                        = > ' e n d '
)                                                             ;
?                                                            >
                      D i r e c t o r y          s q l
i       n           i    t    _     d   b          .   s          q       l
#     S t r u c t u r e          t a b l e    ' e n           g i n e '
CREATE TABLE engine (
  s p i d e r _ i d m e d i u m i n t ( 9 ) N O T               N U L L   ,
  k e y _ i d m e d i u m i n t ( 9 ) N O T                   N U L L     ,
  w e i g h t       s m a l l i n t ( 4 )   N O T             N U L L     ,
  K E Y       s p i d e r _ i d       ( s p i d e r            _ i d )    ,
  K E Y           k e y _ i d             ( k e y             _ i d       )
)                                                                         ;



# --------------------------------------------------------
                            #
#   S t r u c t u r e     t a b l e   ' k e y w o r d s '
                            #

C R E A T E       T A B L E        k e y w o r d s     (
  key_id mediumint(9) NOT NULL auto_increment,
  k e y w o r d v a r c h a r ( 6 4 ) N O T N U L L ,
  U N I Q U E     k e y _ i d _ 2      ( k e y _ i d ) ,
  U N I Q U E     k e y w o r d       ( k e y w o r d )
)                                                      ;



# --------------------------------------------------------
                            #
#   S t r u c t u r e         t a b l e     ' s i t e s '
                            #

C   R       E   A    T   E   T   A   B   L   E    s   i   t   e   s       (
    site_id mediumint(9) NOT NULL               auto_increment,
    s i t e _ u r l v a r c h a r ( 1 2 7 )     N O T N U L L ,
    u p d d a t e          t i m e s t a         m p ( 1 4 ) ,
    U N I Q U E        s i t e _ i d        (    s i t e _ i d )
)                                                              ;



# --------------------------------------------------------
                            #
#   S t r u c t u r e        t a b l e    ' s p i d e r '
                            #

C R E A T E            T A B L E         s p i d e r      (
  spider_id mediumint(9) NOT NULL auto_increment,
  f i l e    v a r c h a r ( 1 2 7 )     N O T   N U L L ,
  f i r s t _ w o r d s      t e x t    N O T   N U L L ,
  u p d d a t e          t i m e s t a m p ( 1 4 ) ,
  m d 5            v a r c h a r ( 5 0 ) ,
  s i t e _ i d m e d i u m i n t ( 9 ) N O T N U L L ,
  p a t h     v a r c h a r ( 1 2 7 )    N O T   N U L L ,
  num_words int(11) DEFAULT '1' NOT NULL,
  l a s t _ m o d i f i e d     t i m e s t a m p ( 1 4 ) ,
  P R I M A R Y         K E Y      ( s p i d e r _ i d ) ,
  K E Y         s i t e _ i d         ( s i t e _ i d )
)                                                         ;



# --------------------------------------------------------
                             #
# S t r u c t u r e      t a b l e ' t e m p s p i d e r '
                             #
C R E A T E          T A B L E       t e m p s p i d e r     (
  f i l e          t e x t         N O T         N U L L ,
  id mediumint(11) NOT NULL auto_increment,
  l e v e l     t i n y i n t ( 6 )        N O T    N U L L ,
  p a t h           t e x t        N O T         N U L L ,
  s i t e _ i d m e d i u m i n t ( 9 ) N O T N U L L ,
  i n d e x e d      t i n y i n t ( 1 )    N O T    N U L L ,
  e r r o r T I N Y I N T D E F A U L T ' 0 ' n o t n u l l ,
  u p d d a t e             t i m e s t a m p ( 1 4 ) ,
  U     N    I   Q     U    E        i   d        (   i  d   )
)                                                            ;
                                                                         User Interface                                            search)
i                   n                   d                    e            x         .                                p             h       p
<?
$ r e l a t i v e _ s c r i p t _ p a t h                                                                                     =            ' . ' ;

$ m t i m e = e x p l o d e ( ' ' , m i c r o t i m e ( ) ) ;
$ s t a r t _ t i m e = $ m t i m e [ 0 ] + $ m t i m e [ 1 ] ;
i n c l u d e " . / i n c l u d e s / c o n f i g . p h p " ;

i           f                           (            !           $           o           p               t           i         o           n        )
    $           o       p       t           i       o        n               =               '           s       t           a r       t       '    ;

i       f                   (       $           q        u       e       r       y       _           s       t           r    i    n           g    )
                                     {
$common_words = common_words("$relative_script_path/includes/common_words.txt");

$ l i k e _ s t a r t                                    =       a r r a y (           = > " " ,
                                                                                             " s t a r t "
         "    a    n                                     y        "        =            %  "  >,         "
         "    e    x                                     a        c    t   "               "   "   =     >
         )                                                                                     ;
$ l i k e _ e n d =                                        a r r a y ( " s t a r t " = > " % " ,
         "    a    n                                     y     "       =   >         "  %  "   ,
         "    e    x                                     a     c    t  "        =    >     "   "
         )                                                                                     ;

i           f                               (            $           r               e           f               i            n            e        )
                          {
    $query_string = urldecode($query_string                                                                                                        );
    $wheresite = "AND spider.site_id = $site                                                                                                       ";
    i    f         (   $    p     a    t    h                                                                                                       )
      $wherepath = "AND spider.path like '$path'                                                                                                   ";
  $refine_url = "&refine=1&site=$site&path=$path";
                          }
i    f        (   $    b    r  o    w   s   e    )
                          {
  $query_string = urldecode($query_string);
                          }

i      f             $ (  l    i    m     i     t     e     )
   s e t t y p e  ( $ l i m i t e , " i n t e g e r " ) ;
e                  l                  s                     e
  $ l i m i t e = $ s e a r c h _ d e f a u l t _ l i m i t ;

s e t t y p e ( $ l i m _ s t a r t , " i n t e g e r " ) ;
i f       ( $ l i m _ s t a r t                 <      0 )
  $    l   i m   _   s   t   a   r   t        =       0   ;

$n_words = count(explode(" ",$query_string));
$   n    c   r   i   t        =        0    ;
$   t   i   n        =        "    0    "   ;

$ q u e r y _ t o _ p a r s e = $ q u e r y _ s t r i n g ;
$query_to_parse = ereg_replace("[\"\%]","",$query_to_parse);
$query_to_parse = stripaccents(strtolower(ereg_replace("[\"']+"," ",$query_to_parse)));
$query_to_parse = ereg_replace("([^ ]) -([^ ])","\\1 \\2",$query_to_parse);
$query_to_parse = str_replace("_","\\_",$query_to_parse);
$query_to_parse = trim(ereg_replace (" +"," ",$query_to_parse));

$ t e s t _ s h o r t             =      $ q u e r y _ t o _ p a r s e ;

while (ereg(' ([^ ]{1,'.$small_words_size.'}) | ([^ ]{1,'.$small_words_size.'})$|^([^
]{1,'.$small_words_size.'}) ',$test_short,$regs))
                                           {
  f o r       ( $ n = 1 ;    $ n       < = 3 ;     $ n + + )
                                {
   i    f       (   $    r   e       g  s  [   $     n     ] )
                                 {
   $ i g n o r e . = " \ " " . $ r e g s [ $ n ] . " \ " , " ;
   $test_short = trim(str_replace($regs[$n],"",$test_short));
                                 }
                                }
                               }
i     f         (     $    i       g    n    o    r      e   )
  $ignore_message = $ignore.' '.msg('w_short');

while (ereg("(-)?([^ ]{".($small_words_size+1).",}).*",$query_to_parse,$regs))
                                        {
    $query_to_parse = trim(str_replace($regs[2],"",$query_to_parse));
    i f ( $ c o m m o n _ w o r d s [ $ r e g s [ 2 ] ] ! = 1 )
                                          {
       $ s p i d e r _ i n                                =         " " ;
       i f        ( $ r e g s [ 1 ]                     = =       ' - ' )
         $ e x c l u d e [ $ n c r i t ]                          =       1 ;
       e                      l                       s                      e
         $ s t r i n g s [ $ n c r i t ] = $ r e g s [ 2 ] ;

     $query = "SELECT key_id FROM keywords WHERE keyword like
'".$like_start[$option].$regs[2].$like_end[$option]."'";
     $tempresult = mysql_db_q uery($database,$query,$id_connect);
     if (mysql_num_rows($tempresult) > 0)
                                   {
       while (list($key_id)=mysql_fetch_row($tempresult))
                                    {
        $ i n [ $ n c r i t ]     . =    " $ k e y _ i d , " ;
                                    }
                                                          }
        e                               l                               s                            e
            $       i   n   [       $       n    c    r       i     t    ]        =              0   ;

        $in[$ncrit] = ereg_replace('^,?(.*),$'," \\1",$in[$ncrit]);
        $      n      c      r       i     t       +      +       ;
                                    }
      e                  l                   s                    e
                                    {
        $ignore_common .= "\"".$regs[2]."\", ";
                                    }
                                  }

i     f     ( $ i g n o r e _ c o m m o n )
    $ignore_common_message = $ignore_common.' '.msg('w_common');

$      s        p       i       d       e        r    s                 =             "      "       ;

i f     ( $ n c r i t i s _ a r r a y ( $ s t r i n g s ) )
                                    & &
                          {
    $query = "SET OPTION SQL_BIG_SELECTS = 1";
    mysql_db_query($database,$query,$id_connect);

    f o r       ( $ n       =        0 ;        $ n       <       $ n c r i t ;           $ n + + )
                                         {
       $query = "SELECT spider.spider_id,sum(weight) as weight, spider.site_id
       F R O M             e n g i n e , s p i d e r
       WHE R E e ngi ne .ke y_ i d IN (" .$ in [ $n] .")
       AND engine.spider_id = spider.spider_id $wheresite $wherepath
       G R O U P B Y s p i d e r . s p i d e r _ i d " ;
       $result = mysql_db_query($database,$query,$id_connect);
       $num_res_temp = mysql_num_rows($result);
  " $ n u m _ r e s _ t e m p < b r > " ;
  i f      ( $ n u m _ r e s _ t e m p                      >   0 )
                                     {
    i f      ( $ e x c l u d e [ $ n ]                  ! =     1 )
                                     {
    $ n u m _ r e s [ $ n ] = $ n u m _ r e s _ t e m p ;
    while (list($spider_id,$weight) = mysql_fetch_array($result))
                                         {
        $s_weight[$n][$spider_id] = $weight;
                                         }
                                     }
    e                    l                     s                   e
                                     {
    $ n u m _ e x c l u d e [ $ n ] = $ n u m _ r e s _ t e m p ;
    while (list($spider_id,$weight) = mysql_fetch_array($result))
                                         {
        $ s _ e x c l u d e [ $ n ] [ $ s p i d e r _ i d ] = 1 ;
                                         }
    m y s q l _ f r e e _ r e s u l t ( $ r e s u l t ) ;
                                     }
                                     }
    e l s e i f       ( $ e x c l u d e [ $ n ]            ! =   1 )
                                       {
      $ n u m _ r e s [ $ n ]                            =     0 ;
      $ s _ w e i g h t [ $ n ] [ 0 ]                       =   0 ;
                                       }
                                   }

i f      ( i s _ a r r a y ( $ n u m _ r e s ) )
                               {
  a s o r t              ( $ n u m _ r e s ) ;
  l i s t ( $ i d _ m o s t ) = e a c h ( $ n u m _ r e s ) ;
       r e s e t ( $ s _ w e i g h t [ $ i d _ m o s t ] ) ;
       while (list($spider_id,$weight) = each($s_weight[$id_most]))
                                        {
          $ w e i g h t _ t o t                         =       1 ;
          r e s e t               ( $ n u m _ r e s ) ;
          w h i l e ( l i s t ( $ n ) = e a c h ( $ n u m _ r e s ) )
                                          {
            $weight_tot *= $s_weight[$n][$spider_id];
                                          }
          i f      ( $ w e i g h t _ t o t                >      0 )
            $final_result[$spider_id]=$weight_tot;
                                        }
                                      }

 i f     ( i s _ a r r a y ( $ n u m _ e x c l u d e ) )
                                        {
       while (list($id) = each($num_exclude))
                                          {
        while(list($spider_id) = each($s_exclude[$id]))
                                            {
           u n s e t ( $ f i n a l _ r e s u l t [ $ s p i d e r _ i d ] ) ;
                                            }
                                          }
                                        }

                                      }

i f   ( i s _ a r r a y ( $ f i n a                l _ r e s u l t ) )
                            {
 $ n u m _ t o t = c o u n t ( $ f i              n a l _ r e s u l t ) ;
 a r s o r t ( $ f i n a l _                       r e s u l t ) ;
 $ n _ s t a r t      =    $ l i m                _ s t a r t + 1 ;
  i f        ( $ n _ s t a r t + $ l i m i t e - 1 < $ n u m _ t o t )
                                          {
        $    n _ e n d = ( $ l i m _ s t a r t + $ l i m i t e ) ;
        $      m o r e _ r e s u l t s                    =      1 ;
                                          }
  e                           l                    s                 e
                                          {
        $     n _ e n d              =      $ n u m _ t o t ;
        $     m o r e _ r e s u l t s                     =      0 ;
                                          }

  / / f i l l          t h e   r e s u l t s             t a b l e
  $ r e g _ s t r i n g s = i m p l o d e ( ' | ' , $ s t r i n g s ) ;

  r e s e t ( $ f i n a l _ r e s u l t ) ;
  f o r ( $ n = 1 ; $ n < = $ n u m _ t o t ; $ n + + )
                                                      {
     list($spider_id,$s_weight) = each($final_result);
     i f                ( ! $ m a x w e i g h t )
             $ m a x w e i g h t                           =       $ s _ w e i g h t ;
     if ($n >= $n_start && $n <= $n_end)
                                                        {
         $query = "SELECT sites.site_url,
spider.path,spider.file,spider.first_words,sites.site_id,spider.spider_id FROM spider,sites
WHERE spider.spider_id=$spider_id AND sites.site_id = spider.site_id";
         $result = mysql_db_query($database,$query,$id_connect);
         $content = mysql_fetch_array($result,MYSQL_ASSOC);
         m y s q l _ f r e e _ r e s u l t ( $ r e s u l t ) ;

            $weight = sprintf ("%01.2f", (100*$s_weight)/$maxweight);
        $ u r l = e r e g i _ r e p l a c e ( " ( [ a - z 0 -
9])[/]+","\\1/",$content['site_url'].$content['path'].$content['file']);
        $ l _ s i t e = " < a s t y l e = ' f o n t - s i z e : 1 0 ; '
href='index.php?refine=1&query_string=".urlencode($query_string)."&site=".$content['site_id']."
&limite=$limite&option=$option'>".$content['site_url']."</A>";
        i f          ( $ c o n t e n t [ ' p a t h ' ] )
           $l_path=",".msg('this_path').":<astyle='font-size:10;'
href='index.php?refine=1&query_string=".urlencode($query_string)."&site=".$content['site_id']."
&path=".$content['path']."&limite=$limite&option=$option' >".$content['path']."</A>";
        e                          l                           s                              e
           $       l      _       p        a       t      h        =       "        "         ;

       $first_words = $content['first_words'];
       $first_words = htmlentities($first_words);

       / / i f t h e c o n t e n t - t e x t i s s e t t o 1
       i f      ( C O N T E N T _ T E X T                         = =       1 )
                                           {
         $ e x t r a                    c      t        =            " " ;
         $ c o n t e n t _ f i l e =
$relative_script_path.'/'.TEXT_CONTENT_PATH.$content['spider_id'].'.txt';
         i f     ( i s _ f i l e ( $ c o n t e n t _ f i l e ) )
                                             {
           $ n u m _ e x t r a c t s                               =       0 ;
           $f_handler = fopen($content_file,'r');
           while($num_extracts < 5 && $extract_content = fgets($f_handler,1024))
                                               {
              if(eregi($reg_strings,$extract_content))
                                                {
                $extract .= ' ...'.trim($extract_content).'... ';
                $ n u m _ e x t r a c t s + + ;
                                                }
                                                          }
              f c l o s e ( $ f _ h a n d l e r ) ;
                                  }
                                }

        r e s e t          ( $ s t r i n g s ) ;
        while (list($key, $value) = each($strings))
                                {
          $first_words = highlight($value,$first_words);
          i   f      (   $   e  x   t   r   a   c  t   )
            $extract = highlight($value,$extract);
                                }



    list($title,$text) = explode("\n",$first_words);
    $table_results[$n]['weight'] = $weight;
    $img_width = ceil(WEIGHT_WIDTH*$weight/100);
    $table_results[$n]['img_tag'] = '<IMG BORDER="0" SRC="'.WEIGHT_IMGSRC.'"
WIDTH="'.$img_width.'" HEIGHT="'.WEIGHT_HEIGHT.'">';
    $table_results[$n]['page_link'] = "<A HREF='$url' target='_blank' >$title</A>";
    $table_results[$n]['limit_links'] = msg('limit_to') ." ".$l_site.$l_path;
    i      f              (     $      e     x   t      r      a      c      t    )
      $table_results[$n]['text'] = $extract;
    e                            l                      s                         e
      $ t a b l e _ r e s u l t s [ $ n ] [ ' t e x t ' ] = $ t e x t ;
                                             }
                                           }

  $       n      a        v       _   b       a       r               =           '   '       ;

  i     f         (   $       l   i   m   _       s   t       a   r       t   >           0   )
      $nav_bar .= "
<a
href='index.php?browse=1&query_string=".urlencode($query_string)."$refine_url&lim_start=".(
$lim_start-$limite)."&limite=$limite&option=$option'
>&lt;&lt;".msg('previous')."</a>&nbsp;&nbsp;&nbsp; \n";

   $ t o t _ p a g e s = c e i l ( $ n u m _ t o t / $ l i m i t e ) ;
   $ac tua l_pa ge = $li m_ sta r t/ $l i mi te + 1;
   $ p a g e _ i n f = m a x ( 1 , $ a c t u a l _ p a g e - 4 ) ;
   $page_sup = min($tot_pages,max($actual_page+5,10));
   for ($page = $page_inf; $page <= $page_sup; $page++)
                                                {
    i f        ( $ p a g e            = =         $ a c t u a l _ p a g e )
                                                  {
        $nav_bar .= " <b style='background-color:#000066;color:white'>$page</b> \n";
        $link_actual =
"index.php?browse=1&query_string=".urlencode($query_string)."$refine_url&lim_start=".(($pag
e-1)*$limite)."&limite=$limite&option=$option";
                                                  }
    e                            l                           s                             e
       $nav_bar .= " <a
href='index.php?browse=1&query_string=".urlencode($query_string)."$refine_url&lim_start=".((
$page-1)*$limite)."&limite=$limite&option=$option' >$page</a> \n";
                                                }

   if ($more_results == 1)
      $nav_bar .= " &nbsp;&nbsp;&nbsp;<a
href='index.php?browse=1&query_string=".urlencode($query_string)."$refine_url&lim_start=".(
$lim_start+$limite)."&limite=$limite&option=$option' >".msg('next')."&gt;&gt;</a>\n";

  $ m t i m e = e x p l o d e ( ' ' , m i c r o t i m e ( ) ) ;
  $search_time = sprintf('%01.2f',$mtime[0]+$mtime[1] -$start_time);
  $result_message = stripslashes(ucfirst(msg('results'))." $n_start -$n_end, $num_tot
".msg('total').", ".msg('on')." \"$query_string\" ($search_time ".msg('seconds').")");
                                             }
e                             l                          s                           e
                                             {
  $      n        u     m       _      t      o     t           =             0      ;
  $ r e s u l t _ m e s s a g e = ' N o r e s u l t s ' ;
                                             }

i     f       ( $ t e m p r e s u l t )
    m y s q l _ f r e e _ r e s u l t ( $ t e m p r e s u l t ) ;

$ t i t l e _ m e s s a g e                 =       m s g ( ' s _ r e s u l t s ' ) ;
                                                }
e             l             s             e
$title_message = 'PhpDig '.$phpdig_version;

if (is_file($template))
   {
   $t_mstrings =
compact('title_message','phpdig_version','result_message','nav_bar','ignore_message','ignore_com
mon_message');
   $t_fstrings = phpdig_form($query_string,$option,$limite,"index.php",$site,$path,'template');
   $t_strings = array_merge($t_mstrings,$t_fstrings);
   parse_phpdig_template($template,$t_strings,$table_results);
                                                   }
e                               l                               s                               e
                                                   {
?                                                                                               >
<                    h                t                  m                   l                  >
<                /             h                 e              a               d               >
< t i t l e > 3 s p i d e r s . m g t . p s u . a c . t h < / t i t l e >
<                                                                                   ?
include "$relative_script_path/includes/style.php";
?                                                                                   >
<               /            h            e             a              d            >
< b o d y                 b g c o l o r = " w h i t e " >
<div align="center"><img src="/phpdiglogo.gif" width="600" height="143"> <BR>
  <                                                                                 ?
phpdig_form($query_string,$option,$limite,"index.php",$site,$path);
?                                                                                   >
  <h3><b style='background-color:#000066;color:white'><? print $result_message ?></b>
< b r > < ? p r i n t $ i g n o r e _ m e s s a g e ? >
<br><? print $ignor e_common_message ?>
<                     /                   h                     3                   >
<                 /               d                i                v               >
<                                                                                   ?
i f        ( i s _ a r r a y ( $ t a b l e _ r e s u l t s ) )
                                            {
    while (list($n,$t_result) = each($table_results))
                                              {
         print "<P style='background-color:#CCDDFF;'>\n";
         print "<B>$n. <FONT style='font -size:10;'> [".$t_result['weight']."
%]</font>&nbsp;&nbsp;".$t_result['page_link']."</B> \n<br>\n";
         print "<FONT style='font-size:10;background-
color:#BBCCEE;'>".$t_result['limit_links']."</font> \n<br>\n";
         p r i n t                      " < / P > \ n " ;
         print "<BLOCKQUOTE style='background-color:#EEEEEE;font-size:10;'>\n";
         p r i n t           $ t _ r e s u l t [ ' t e x t ' ] ;
         p r i n t         " < / B L O C K Q U O T E > \ n " ;
                                              }
                                            }
print "<P style='text-align:center;background-color:#CCDDFF;font-weight:bold'>\n";
p      r      i     n    t          $     n     a     v      _     b     a     r     ;
p    r     i     n     t          "     <     /    P     >      \    n     "     ;
?                                                                               >
<                           h                         r                         >
<   d i v              a l i g n = " c e n t e r " >
<                                                                               ?
i   f         ( $ q u e r y _ s t r i n g )
  phpdig_form($query_string,$option,$limite,"index.php",$site,$path);
?                                                                               >
<                /               d               i              v               >
<div align='center'> <img src="/phpdigpowered.gif" width="250" height="70"></div>
<             /            b            o             d            y            >
<             /            h            t             m            l            >
<                                                                               ?
                                        }
?                                                                               >
           test1.php                                            Search keyword
                                    keyword
<?php
 //
$hostname = "localhost"; //
$user = "3spiders"; //
$password = "pat"; //
$dbname = "search"; //
$tblname = "thai_eng"; //

//
mysql_connect($hostname, $user, $password) or die("                         ");

//
mysql_select_db($dbname) or die("                       ");

//
$sql = "select * from $tblname where thai like '%$query_string%'"; //

$dbquery = mysql_db_query($dbname, $sql);

//
$num_rows = mysql_num_rows($dbquery);

//
$num_fields = mysql_num_fields($dbquery);

//echo "<Font color=blue>                                     </Font><Font
color=red>".$query_string." </Font><Font color=blue>             <Font color=red>".thai."
</Font>";
//echo "<Table border=1>";
//echo "<Tr>";

//
/*$i=0;
while ($i < $num_fields)
 {
  thai[$i] = mysql_field_name($dbquery, $i);
  echo "<Td bgcolor=skyblue>".thai[$i]."</Td>";
  $i++;
 }
*/
//
$i=0;
while ($i < $num_rows)
 {
  $result = mysql_fetch_array($dbquery);
  $thai = $result[thai];
  $english = $result[english];



echo"<center><Table bgcolor=skyblue ></center>";
 //echo"<center><Table border=1><bordercolor=black></center>";



 echo"<center><h2><font color=skyblue>Search Result</font></h2></center>";
  //echo "<center><h2><font color=skyblue>Search Result</font></h2></center><Br>";
 echo"<center><font size=5 clolor=red><a href=\"index.php?query_string=$english\">$english
</a></font></center>";
 echo"</Table> ";
// echo"</Table>";
  $i++;
}
         if ($i<= 0)
         {
            echo "<center><h2><font color='#FF0000'>Nothing Found! Try another
keyword.</font></h2></center>";
            echo "<center><h2><font color='#FF0000'>Could you tell me ".$query_string." in
english? </font></h2></center>";
            echo "<br><font size=4 clolor=red><a href=\"index.html\">Back to Search</a>";
         }

//echo "</Table>";
//end While

//echo "<Br><Br><A Href="."test_form.php".">                     </A>";
 mysql_close(); //
 ?>

				
DOCUMENT INFO
Shared By:
Categories:
Tags:
Stats:
views:3
posted:1/22/2013
language:Thai
pages:95