00001 <?php 00002 00003 /* 00004 * Nucleus: PHP/MySQL Weblog CMS (http://nucleuscms.org/) 00005 * Copyright (C) 2003-2007 The Nucleus Group 00006 * 00007 * This program is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU General Public License 00009 * as published by the Free Software Foundation; either version 2 00010 * of the License, or (at your option) any later version. 00011 * (see nucleus/documentation/index.html#license for more info) 00012 */ 00029 class SEARCH { 00030 00031 var $querystring; 00032 var $marked; 00033 var $inclusive; 00034 var $blogs; 00035 00036 00037 function SEARCH($text) { 00038 global $blogid; 00039 // $text = preg_replace ("/[<,>,=,?,!,#,^,(,),[,\],:,;,\\\,%]/","",$text); 00040 00041 00042 /* * * for jp * * * * * * * * * * */ 00043 $text = str_replace ("\xE3\x80\x80",' ',$text); 00044 $text = preg_replace ("/[<>=?!#^()[\]:;\\%]/","",$text); 00045 00046 $this->ascii = '[\x00-\x7F]'; 00047 $this->two = '[\xC0-\xDF][\x80-\xBF]'; 00048 $this->three = '[\xE0-\xEF][\x80-\xBF][\x80-\xBF]'; 00049 00050 $this->jpmarked = $this->boolean_mark_atoms_jp($text); 00051 /* * * * * * * * * * * * * * * * */ 00052 00053 $this->querystring = $text; 00054 // $this->marked = $this->boolean_mark_atoms($text); 00055 $this->inclusive = $this->boolean_inclusive_atoms($text); 00056 $this->blogs = array(); 00057 00058 // get all public searchable blogs, no matter what, include the current blog allways. 00059 $res = sql_query('SELECT bnumber FROM '.sql_table('blog').' WHERE bincludesearch=1 '); 00060 while ($obj = mysql_fetch_object($res)) 00061 $this->blogs[] = intval($obj->bnumber); 00062 } 00063 00064 function boolean_sql_select($match){ 00065 // if (strlen($this->inclusive) > 0) { 00066 /* build sql for determining score for each record */ 00067 /* $result=explode(" ",$this->inclusive); 00068 for($cth=0;$cth<count($result);$cth++){ 00069 if(strlen($result[$cth])>=4){ 00070 $stringsum_long .= " $result[$cth] "; 00071 }else{ 00072 $stringsum_a[] = ' '.$this->boolean_sql_select_short($result[$cth],$match).' '; 00073 } 00074 } 00075 00076 if(strlen($stringsum_long)>0){ 00077 $stringsum_long = addslashes($stringsum_long); 00078 $stringsum_a[] = " match ($match) against ('$stringsum_long') "; 00079 } 00080 00081 $stringsum .= implode("+",$stringsum_a); 00082 return $stringsum; 00083 } 00084 */ } 00085 00086 00087 00088 function boolean_inclusive_atoms($string){ 00089 $result=trim($string); 00090 $result=preg_replace("/([[:space:]]{2,})/",' ',$result); 00091 00092 /* convert normal boolean operators to shortened syntax */ 00093 $result=eregi_replace(' not ',' -',$result); 00094 $result=eregi_replace(' and ',' ',$result); 00095 $result=eregi_replace(' or ',',',$result); 00096 00097 /* drop unnecessary spaces */ 00098 $result=str_replace(' ,',',',$result); 00099 $result=str_replace(', ',',',$result); 00100 $result=str_replace('- ','-',$result); 00101 $result=str_replace('+','',$result); 00102 00103 /* strip exlusive atoms */ 00104 $result=preg_replace( 00105 // "(\-\([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_\,]{0,}\))", 00106 "(\-\(([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,}\))", 00107 '', 00108 $result); 00109 00110 $result=preg_replace( 00111 // "(\-[A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_]{0,})", 00112 "(\-([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,})", 00113 '', 00114 $result); 00115 00116 $result=str_replace('(',' ',$result); 00117 $result=str_replace(')',' ',$result); 00118 $result=str_replace(',',' ',$result); 00119 00120 return $result; 00121 } 00122 00123 function boolean_sql_where($match){ 00124 /* 00125 $result = $this->marked; 00126 $result = preg_replace( 00127 "/foo\[\(\'([^\)]{4,})\'\)\]bar/e", 00128 " 'match ('.\$match.') against (\''.\$this->copyvalue(\"$1\").'\') > 0 ' ", 00129 $result); 00130 00131 $result = preg_replace( 00132 "/foo\[\(\'([^\)]{1,3})\'\)\]bar/e", 00133 " '('.\$this->boolean_sql_where_short(\"$1\",\"$match\").')' ", 00134 $result); 00135 */ 00136 $result = $this->jpmarked; /* for jp */ 00137 $result = $this->boolean_sql_where_jp_short($result,$match);/* for jp */ 00138 return $result; 00139 } 00140 00141 // there must be a simple way to simply copy a value with backslashes in it through 00142 // the preg_replace, but I cannot currently find it (karma 2003-12-30) 00143 function copyvalue($foo) { 00144 return $foo; 00145 } 00146 00147 00148 /* 00149 function boolean_mark_atoms($string){ 00150 $result=trim($string); 00151 $result=preg_replace("/([[:space:]]{2,})/",' ',$result); 00152 00153 // convert normal boolean operators to shortened syntax 00154 $result=eregi_replace(' not ',' -',$result); 00155 $result=eregi_replace(' and ',' ',$result); 00156 $result=eregi_replace(' or ',',',$result); 00157 00158 00159 // strip excessive whitespace 00160 $result=str_replace('( ','(',$result); 00161 $result=str_replace(' )',')',$result); 00162 $result=str_replace(', ',',',$result); 00163 $result=str_replace(' ,',',',$result); 00164 $result=str_replace('- ','-',$result); 00165 $result=str_replace('+','',$result); 00166 00167 // remove double spaces (we might have introduced some new ones above) 00168 $result=trim($result); 00169 $result=preg_replace("/([[:space:]]{2,})/",' ',$result); 00170 00171 // apply arbitrary function to all 'word' atoms 00172 00173 $result_a = explode(" ",$result); 00174 for($word=0;$word<count($result_a);$word++){ 00175 $result_a[$word] = "foo[('".$result_a[$word]."')]bar"; 00176 } 00177 $result = implode(" ",$result_a); 00178 00179 // dispatch ' ' to ' AND ' 00180 $result=str_replace(' ',' AND ',$result); 00181 00182 // dispatch ',' to ' OR ' 00183 $result=str_replace(',',' OR ',$result); 00184 00185 // dispatch '-' to ' NOT ' 00186 $result=str_replace(' -',' NOT ',$result); 00187 return $result; 00188 } 00189 00190 function boolean_sql_where_short($string,$match){ 00191 $match_a = explode(',',$match); 00192 for($ith=0;$ith<count($match_a);$ith++){ 00193 $like_a[$ith] = " $match_a[$ith] LIKE '% $string %' "; 00194 } 00195 $like = implode(" OR ",$like_a); 00196 00197 return $like; 00198 } 00199 function boolean_sql_select_short($string,$match){ 00200 $match_a = explode(',',$match); 00201 $score_unit_weight = .2; 00202 for($ith=0;$ith<count($match_a);$ith++){ 00203 $score_a[$ith] = 00204 " $score_unit_weight*( 00205 LENGTH(" . addslashes($match_a[$ith]) . ") - 00206 LENGTH(REPLACE(LOWER(" . addslashes($match_a[$ith]) . "),LOWER('" . addslashes($string) . "'),''))) 00207 /LENGTH('" . addslashes($string) . "') "; 00208 } 00209 $score = implode(" + ",$score_a); 00210 00211 return $score; 00212 } 00213 */ 00214 00215 /*********************************************** 00216 Make "WHERE" (jp) 00217 ***********************************************/ 00218 00219 function boolean_mark_atoms_jp($string){ 00220 00221 $result=trim($string); 00222 $result=preg_replace("/([[:space:]]{2,})/",' ',$result); 00223 00224 /* convert normal boolean operators to shortened syntax */ 00225 $result=eregi_replace(' not ',' -',$result); 00226 $result=eregi_replace(' and ',' ',$result); 00227 $result=eregi_replace(' or ',',',$result); 00228 00229 /* strip excessive whitespace */ 00230 $result=str_replace(', ',',',$result); 00231 $result=str_replace(' ,',',',$result); 00232 $result=str_replace('- ','-',$result); 00233 $result=str_replace('+','',$result); 00234 00235 $result=str_replace(',',' ,',$result); 00236 00237 return $result; 00238 } 00239 00240 00241 function boolean_sql_where_jp_short($string,$match){ 00242 $match_a = explode(',',$match); 00243 $key_a = explode(' ',$string); 00244 00245 for($ith=0;$ith<count($match_a);$ith++){ 00246 $temp_a[$ith] = "(i.$match_a[$ith] LIKE '%" . addslashes($key_a[0]) . "%') "; 00247 } 00248 $like = '('.implode(' or ',$temp_a).')'; 00249 00250 for($kn=1; $kn<count($key_a); $kn++){ 00251 if(substr($key_a[$kn],0,1) == ","){ 00252 for($ith=0;$ith<count($match_a);$ith++){ 00253 $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') "; 00254 } 00255 $like .=' OR ('. implode(' or ',$temp_a).')'; 00256 }elseif(substr($key_a[$kn],0,1) != '-'){ 00257 for($ith=0;$ith<count($match_a);$ith++){ 00258 $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes($key_a[$kn]) . "%') "; 00259 } 00260 $like .=' AND ('. implode(' or ',$temp_a).')'; 00261 }else{ 00262 for($ith=0;$ith<count($match_a);$ith++){ 00263 $temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') "; 00264 } 00265 $like .=' AND ('. implode(' and ',$temp_a).')'; 00266 } 00267 } 00268 00269 $like = '('.$like.')'; 00270 return $like; 00271 } 00272 00273 /***********************************************/ 00274 } 00275 ?>