SEARCH.php

Go to the documentation of this file.
00001 <?php
00002 
00003 /*
00004  * Nucleus: PHP/MySQL Weblog CMS (http://nucleuscms.org/)
00005  * Copyright (C) 2003-2007 The Nucleus Group
00006  *
00007  * This program is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU General Public License
00009  * as published by the Free Software Foundation; either version 2
00010  * of the License, or (at your option) any later version.
00011  * (see nucleus/documentation/index.html#license for more info)
00012  */
00029 class SEARCH {
00030 
00031         var $querystring;
00032         var $marked;
00033         var $inclusive;
00034         var $blogs;
00035 
00036 
00037         function SEARCH($text) {
00038                 global $blogid;
00039 //              $text = preg_replace ("/[<,>,=,?,!,#,^,(,),[,\],:,;,\\\,%]/","",$text);
00040 
00041 
00042      /* * * for jp * * * * * * * * * * */
00043         $text = str_replace ("\xE3\x80\x80",' ',$text);
00044         $text = preg_replace ("/[<>=?!#^()[\]:;\\%]/","",$text);
00045 
00046         $this->ascii = '[\x00-\x7F]';
00047         $this->two = '[\xC0-\xDF][\x80-\xBF]';
00048         $this->three = '[\xE0-\xEF][\x80-\xBF][\x80-\xBF]';
00049 
00050         $this->jpmarked      = $this->boolean_mark_atoms_jp($text);
00051      /* * * * * * * * * * * * * * * * */
00052 
00053         $this->querystring      = $text;
00054 //              $this->marked           = $this->boolean_mark_atoms($text);
00055         $this->inclusive        = $this->boolean_inclusive_atoms($text);
00056         $this->blogs            = array();
00057 
00058         // get all public searchable blogs, no matter what, include the current blog allways.
00059                 $res = sql_query('SELECT bnumber FROM '.sql_table('blog').' WHERE bincludesearch=1 ');
00060                 while ($obj = mysql_fetch_object($res)) 
00061                     $this->blogs[] = intval($obj->bnumber);
00062         }
00063 
00064         function  boolean_sql_select($match){
00065 //              if (strlen($this->inclusive) > 0) {
00066                    /* build sql for determining score for each record */
00067 /*                 $result=explode(" ",$this->inclusive);
00068                    for($cth=0;$cth<count($result);$cth++){
00069                            if(strlen($result[$cth])>=4){
00070                                    $stringsum_long .=  " $result[$cth] ";
00071                            }else{
00072                                    $stringsum_a[] = ' '.$this->boolean_sql_select_short($result[$cth],$match).' ';
00073                            }
00074                    }
00075 
00076                    if(strlen($stringsum_long)>0){
00077                                 $stringsum_long = addslashes($stringsum_long);
00078                                 $stringsum_a[] = " match ($match) against ('$stringsum_long') ";
00079                    }
00080 
00081                    $stringsum .= implode("+",$stringsum_a);
00082                    return $stringsum;
00083                 }
00084 */      }
00085 
00086     
00087 
00088         function boolean_inclusive_atoms($string){
00089                 $result=trim($string);
00090                 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
00091 
00092                 /* convert normal boolean operators to shortened syntax */
00093                 $result=eregi_replace(' not ',' -',$result);
00094                 $result=eregi_replace(' and ',' ',$result);
00095                 $result=eregi_replace(' or ',',',$result);
00096 
00097                 /* drop unnecessary spaces */
00098                 $result=str_replace(' ,',',',$result);
00099                 $result=str_replace(', ',',',$result);
00100                 $result=str_replace('- ','-',$result);
00101                 $result=str_replace('+','',$result);
00102 
00103                 /* strip exlusive atoms */
00104                 $result=preg_replace(
00105 //                      "(\-\([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_\,]{0,}\))",
00106                         "(\-\(([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,}\))",
00107                         '',
00108                         $result);
00109 
00110                 $result=preg_replace(
00111 //                      "(\-[A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_]{0,})",
00112                         "(\-([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,})",
00113                         '',
00114                         $result);
00115 
00116                 $result=str_replace('(',' ',$result);
00117                 $result=str_replace(')',' ',$result);
00118                 $result=str_replace(',',' ',$result);
00119 
00120                 return $result;
00121         }
00122 
00123         function boolean_sql_where($match){
00124 /*
00125                 $result = $this->marked;
00126                 $result = preg_replace(
00127                         "/foo\[\(\'([^\)]{4,})\'\)\]bar/e",
00128                         " 'match ('.\$match.') against (\''.\$this->copyvalue(\"$1\").'\') > 0 ' ",
00129                         $result);
00130 
00131                 $result = preg_replace(
00132                         "/foo\[\(\'([^\)]{1,3})\'\)\]bar/e",
00133                         " '('.\$this->boolean_sql_where_short(\"$1\",\"$match\").')' ",
00134                         $result);
00135 */
00136                 $result = $this->jpmarked; /* for jp */
00137                 $result = $this->boolean_sql_where_jp_short($result,$match);/* for jp */
00138                 return $result;
00139         }
00140 
00141         // there must be a simple way to simply copy a value with backslashes in it through
00142         // the preg_replace, but I cannot currently find it (karma 2003-12-30)
00143         function copyvalue($foo) {
00144                 return $foo;
00145         }
00146 
00147 
00148 /*
00149         function boolean_mark_atoms($string){
00150                 $result=trim($string);
00151                 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
00152 
00153                 // convert normal boolean operators to shortened syntax
00154                 $result=eregi_replace(' not ',' -',$result);
00155                 $result=eregi_replace(' and ',' ',$result);
00156                 $result=eregi_replace(' or ',',',$result);
00157 
00158 
00159                 // strip excessive whitespace
00160                 $result=str_replace('( ','(',$result);
00161                 $result=str_replace(' )',')',$result);
00162                 $result=str_replace(', ',',',$result);
00163                 $result=str_replace(' ,',',',$result);
00164                 $result=str_replace('- ','-',$result);
00165                 $result=str_replace('+','',$result);
00166 
00167                 // remove double spaces (we might have introduced some new ones above)
00168                 $result=trim($result);
00169                 $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
00170 
00171         // apply arbitrary function to all 'word' atoms
00172 
00173                 $result_a = explode(" ",$result);
00174                 for($word=0;$word<count($result_a);$word++){
00175                         $result_a[$word] = "foo[('".$result_a[$word]."')]bar";
00176                 }
00177                 $result = implode(" ",$result_a);
00178 
00179                 // dispatch ' ' to ' AND '
00180                 $result=str_replace(' ',' AND ',$result);
00181 
00182                 // dispatch ',' to ' OR '
00183                 $result=str_replace(',',' OR ',$result);
00184 
00185                 // dispatch '-' to ' NOT '
00186                 $result=str_replace(' -',' NOT ',$result);
00187                 return $result;
00188         }
00189 
00190         function boolean_sql_where_short($string,$match){
00191                 $match_a = explode(',',$match);
00192                 for($ith=0;$ith<count($match_a);$ith++){
00193                         $like_a[$ith] = " $match_a[$ith] LIKE '% $string %' ";
00194                 }
00195                 $like = implode(" OR ",$like_a);
00196 
00197                 return $like;
00198         }
00199         function boolean_sql_select_short($string,$match){
00200                 $match_a = explode(',',$match);
00201                 $score_unit_weight = .2;
00202                 for($ith=0;$ith<count($match_a);$ith++){
00203                         $score_a[$ith] =
00204                                                    " $score_unit_weight*(
00205                                                    LENGTH(" . addslashes($match_a[$ith]) . ") -
00206                                                    LENGTH(REPLACE(LOWER(" . addslashes($match_a[$ith]) . "),LOWER('" . addslashes($string) . "'),'')))
00207                                                    /LENGTH('" . addslashes($string) . "') ";
00208                 }
00209                 $score = implode(" + ",$score_a);
00210 
00211                 return $score;
00212         }
00213 */
00214 
00215 /***********************************************
00216   Make "WHERE" (jp)
00217 ***********************************************/
00218 
00219     function boolean_mark_atoms_jp($string){
00220 
00221         $result=trim($string);
00222         $result=preg_replace("/([[:space:]]{2,})/",' ',$result);
00223         
00224         /* convert normal boolean operators to shortened syntax */
00225         $result=eregi_replace(' not ',' -',$result);
00226         $result=eregi_replace(' and ',' ',$result);
00227         $result=eregi_replace(' or ',',',$result);
00228 
00229         /* strip excessive whitespace */
00230         $result=str_replace(', ',',',$result);
00231         $result=str_replace(' ,',',',$result);
00232         $result=str_replace('- ','-',$result);
00233         $result=str_replace('+','',$result);
00234         
00235         $result=str_replace(',',' ,',$result);
00236 
00237         return $result;
00238     }
00239     
00240 
00241     function boolean_sql_where_jp_short($string,$match){
00242         $match_a = explode(',',$match);
00243                         $key_a = explode(' ',$string);
00244                         
00245         for($ith=0;$ith<count($match_a);$ith++){
00246                 $temp_a[$ith] = "(i.$match_a[$ith] LIKE '%" . addslashes($key_a[0]) . "%') ";
00247         }
00248         $like = '('.implode(' or ',$temp_a).')';
00249 
00250                         for($kn=1; $kn<count($key_a); $kn++){
00251                 if(substr($key_a[$kn],0,1) == ","){
00252                         for($ith=0;$ith<count($match_a);$ith++){
00253                                 $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
00254                         }
00255                         $like .=' OR ('. implode(' or ',$temp_a).')';
00256                 }elseif(substr($key_a[$kn],0,1) != '-'){
00257                         for($ith=0;$ith<count($match_a);$ith++){
00258                                 $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes($key_a[$kn]) . "%') ";
00259                         }
00260                         $like .=' AND ('. implode(' or ',$temp_a).')';
00261                 }else{
00262                         for($ith=0;$ith<count($match_a);$ith++){
00263                                 $temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";
00264                         }
00265                         $like .=' AND ('. implode(' and ',$temp_a).')';
00266                 }
00267         }
00268         
00269         $like = '('.$like.')';
00270         return $like;
00271     }
00272 
00273 /***********************************************/
00274 }
00275 ?>



Generated on Wed Jun 25 17:25:59 2008 by  doxygen 1.5.5