00001 <?php 00002 00003 class entity { 00004 00005 function named_to_numeric ($string) { 00006 $string = preg_replace('/(&[0-9A-Za-z]+)(;?\=?|([^A-Za-z0-9\;\:\.\-\_]))/e', "entity::_named('\\1', '\\2') . '\\3'", $string); 00007 return $string; 00008 } 00009 00010 function normalize_numeric ($string) { 00011 global $_entities; 00012 $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string); 00013 $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string); 00014 $string = strtr($string, $_entities['cp1251']); 00015 return $string; 00016 } 00017 00018 function numeric_to_utf8 ($string) { 00019 $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string); 00020 $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string); 00021 $string = preg_replace('/&#x([0-9A-Fa-f]+);/e', "entity::_hex_to_utf8('\\1')", $string); 00022 return $string; 00023 } 00024 00025 function numeric_to_named ($string) { 00026 global $_entities; 00027 $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+)/e', "'&#'.hexdec('\\1')", $string); 00028 $string = strtr($string, array_flip($_entities['named'])); 00029 return $string; 00030 } 00031 00032 function specialchars ($string, $type = 'xml') { 00033 $apos = $type == 'xml' ? ''' : '''; 00034 $specialchars = array ( 00035 '"' => '"', '&' => '&', 00036 ''' => $apos, '<' => '<', 00037 '>' => '>', '"' => '"', 00038 '&' => '&', "'" => $apos, 00039 '<' => '<', '>' => '>' 00040 ); 00041 00042 $string = preg_replace('/&(#?[Xx]?[0-9A-Za-z]+);/', "[[[ENTITY:\\1]]]", $string); 00043 $string = strtr($string, $specialchars); 00044 $string = preg_replace('/\[\[\[ENTITY\:([^\]]+)\]\]\]/', "&\\1;", $string); 00045 return $string; 00046 } 00047 00048 00049 function _hex_to_utf8($s) 00050 { 00051 $c = hexdec($s); 00052 00053 if ($c < 0x80) { 00054 $str = chr($c); 00055 } 00056 else if ($c < 0x800) { 00057 $str = chr(0xC0 | $c>>6) . chr(0x80 | $c & 0x3F); 00058 } 00059 else if ($c < 0x10000) { 00060 $str = chr(0xE0 | $c>>12) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F); 00061 } 00062 else if ($c < 0x200000) { 00063 $str = chr(0xF0 | $c>>18) . chr(0x80 | $c>>12 & 0x3F) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F); 00064 } 00065 00066 return $str; 00067 } 00068 00069 function _named($entity, $extra) { 00070 global $_entities; 00071 00072 if ($extra == '=') return $entity . '='; 00073 00074 $length = strlen($entity); 00075 00076 while ($length > 0) { 00077 $check = substr($entity, 0, $length); 00078 if (isset($_entities['named'][$check])) return $_entities['named'][$check] . ';' . substr($entity, $length); 00079 $length--; 00080 } 00081 00082 return $entity . ($extra == ';' ? ';' : ''); 00083 } 00084 } 00085 00086 00087 $_entities['cp1251'] = array ( 00088 '€' => '€', '‚' => '‚', 'ƒ' => 'ƒ', 00089 '„' => '„', '…' => '…', '†' => '†', 00090 '‡' => '‡', 'ˆ' => 'ˆ', '‰' => '‰', 00091 'Š' => 'Š', '‹' => '‹', 'Œ' => 'Œ', 00092 'Ž' => 'Ž', '‘' => '‘', '’' => '’', 00093 '“' => '“', '”' => '”', '•' => '•', 00094 '–' => '–', '—' => '—', '˜' => '˜', 00095 '™' => '™', 'š' => 'š', '›' => '›', 00096 'œ' => 'œ', 'ž' => 'ž', 'Ÿ' => 'Ÿ', 00097 ); 00098 00099 $_entities['named'] = array ( 00100 ' ' => ' ', '¡' => '¡', '¢' => '¢', 00101 '£' => '£', '¤' => '¤', '¥' => '¥', 00102 '¦' => '¦', '§' => '§', '¨' => '¨', 00103 '©' => '©', 'ª' => 'ª', '«' => '«', 00104 '¬' => '¬', '­' => '­', '®' => '®', 00105 '¯' => '¯', '°' => '°', '±' => '±', 00106 '²' => '²', '³' => '³', '´' => '´', 00107 'µ' => 'µ', '¶' => '¶', '·' => '·', 00108 '¸' => '¸', '¹' => '¹', 'º' => 'º', 00109 '»' => '»', '¼' => '¼', '½' => '½', 00110 '¾' => '¾', '¿' => '¿', 'À' => 'À', 00111 'Á' => 'Á', 'Â' => 'Â', 'Ã' => 'Ã', 00112 'Ä' => 'Ä', 'Å' => 'Å', 'Æ' => 'Æ', 00113 'Ç' => 'Ç', 'È' => 'È', 'É' => 'É', 00114 'Ê' => 'Ê', 'Ë' => 'Ë', 'Ì' => 'Ì', 00115 'Í' => 'Í', 'Î' => 'Î', 'Ï' => 'Ï', 00116 'Ð' => 'Ð', 'Ñ' => 'Ñ', 'Ò' => 'Ò', 00117 'Ó' => 'Ó', 'Ô' => 'Ô', 'Õ' => 'Õ', 00118 'Ö' => 'Ö', '×' => '×', 'Ø' => 'Ø', 00119 'Ù' => 'Ù', 'Ú' => 'Ú', 'Û' => 'Û', 00120 'Ü' => 'Ü', 'Ý' => 'Ý', 'Þ' => 'Þ', 00121 'ß' => 'ß', 'à' => 'à', 'á' => 'á', 00122 'â' => 'â', 'ã' => 'ã', 'ä' => 'ä', 00123 'å' => 'å', 'æ' => 'æ', 'ç' => 'ç', 00124 'è' => 'è', 'é' => 'é', 'ê' => 'ê', 00125 'ë' => 'ë', 'ì' => 'ì', 'í' => 'í', 00126 'î' => 'î', 'ï' => 'ï', 'ð' => 'ð', 00127 'ñ' => 'ñ', 'ò' => 'ò', 'ó' => 'ó', 00128 'ô' => 'ô', 'õ' => 'õ', 'ö' => 'ö', 00129 '÷' => '÷', 'ø' => 'ø', 'ù' => 'ù', 00130 'ú' => 'ú', 'û' => 'û', 'ü' => 'ü', 00131 'ý' => 'ý', 'þ' => 'þ', 'ÿ' => 'ÿ', 00132 '&OElig' => 'Œ', '&oelig' => 'å', '&Scaron' => 'Š', 00133 '&scaron' => 'š', '&Yuml' => 'Ÿ', '&circ' => 'ˆ', 00134 '&tilde' => '˜', '&esnp' => ' ', '&emsp' => ' ', 00135 '&thinsp' => ' ', '&zwnj' => '‌', '&zwj' => '‍', 00136 '&lrm' => '‎', '&rlm' => '‏', '&ndash' => '–', 00137 '&mdash' => '—', '&lsquo' => '‘', '&rsquo' => '’', 00138 '&sbquo' => '‚', '&ldquo' => '“', '&rdquo' => '”', 00139 '&bdquo' => '„', '&dagger' => '†', '&Dagger' => '‡', 00140 '&permil' => '‰', '&lsaquo' => '‹', '&rsaquo' => '›', 00141 '&euro' => '€', '&fnof' => 'ƒ', '&Alpha' => 'Α', 00142 '&Beta' => 'Β', '&Gamma' => 'Γ', '&Delta' => 'Δ', 00143 '&Epsilon' => 'Ε', '&Zeta' => 'Ζ', '&Eta' => 'Η', 00144 '&Theta' => 'Θ', '&Iota' => 'Ι', '&Kappa' => 'Κ', 00145 '&Lambda' => 'Λ', '&Mu' => 'Μ', '&Nu' => 'Ν', 00146 '&Xi' => 'Ξ', '&Omicron' => 'Ο', '&Pi' => 'Π', 00147 '&Rho' => 'Ρ', '&Sigma' => 'Σ', '&Tau' => 'Τ', 00148 '&Upsilon' => 'Υ', '&Phi' => 'Φ', '&Chi' => 'Χ', 00149 '&Psi' => 'Ψ', '&Omega' => 'Ω', '&alpha' => 'α', 00150 '&beta' => 'β', '&gamma' => 'γ', '&delta' => 'δ', 00151 '&epsilon' => 'ε', '&zeta' => 'ζ', '&eta' => 'η', 00152 '&theta' => 'θ', '&iota' => 'ι', '&kappa' => 'κ', 00153 '&lambda' => 'λ', '&mu' => 'μ', '&nu' => 'ν', 00154 '&xi' => 'ξ', '&omicron' => 'ο', '&pi' => 'π', 00155 '&rho' => 'ρ', '&sigmaf' => 'ς', '&sigma' => 'σ', 00156 '&tau' => 'τ', '&upsilon' => 'υ', '&phi' => 'φ', 00157 '&chi' => 'χ', '&psi' => 'ψ', '&omega' => 'ω', 00158 '&thetasym' => 'ϑ', '&upsih' => 'ϒ', '&piv' => 'ϖ', 00159 '&bull' => '•', '&hellip' => '…', '&prime' => '′', 00160 '&Prime' => '″', '&oline' => '‾', '&frasl' => '⁄', 00161 '&weierp' => '℘', '&image' => 'ℑ', '&real' => 'ℜ', 00162 '&trade' => '™', '&alefsym' => 'ℵ', '&larr' => '←', 00163 '&uarr' => '↑', '&rarr' => '→', '&darr' => '↓', 00164 '&harr' => '↔', '&crarr' => '↵', '&lArr' => '⇐', 00165 '&uArr' => '⇑', '&rArr' => '⇒', '&dArr' => '⇓', 00166 '&hArr' => '⇔', '&forall' => '∀', '&part' => '∂', 00167 '&exist' => '∃', '&empty' => '∅', '&nabla' => '∇', 00168 '&isin' => '∈', '¬in' => '∉', '&ni' => '∋', 00169 '&prod' => '∏', '&sum' => '∑', '&minus' => '−', 00170 '&lowast' => '∗', '&radic' => '√', '&prop' => '∝', 00171 '&infin' => '∞', '&ang' => '∠', '&and' => '∧', 00172 '&or' => '∨', '&cap' => '∩', '&cup' => '∪', 00173 '&int' => '∫', '&there4' => '∴', '&sim' => '∼', 00174 '&cong' => '≅', '&asymp' => '≈', '&ne' => '≠', 00175 '&equiv' => '≡', '&le' => '≤', '&ge' => '≥', 00176 '&sub' => '⊂', '&sup' => '⊃', '&nsub' => '⊄', 00177 '&sube' => '⊆', '&supe' => '⊇', '&oplus' => '⊕', 00178 '&otimes' => '⊗', '&perp' => '⊥', '&sdot' => '⋅', 00179 '&lceil' => '⌈', '&rceil' => '⌉', '&lfloor' => '⌊', 00180 '&rfloor' => '⌋', '&lang' => '〈', '&rang' => '〉', 00181 '&loz' => '◊', '&spades' => '♠', '&clubs' => '♣', 00182 '&hearts' => '♥', '&diams' => '♦', 00183 ); 00184 00185 00186 ?>