entity.php

Go to the documentation of this file.
00001 <?php
00002 
00003 class entity {
00004 
00005         function named_to_numeric ($string) {
00006                 $string = preg_replace('/(&[0-9A-Za-z]+)(;?\=?|([^A-Za-z0-9\;\:\.\-\_]))/e', "entity::_named('\\1', '\\2') . '\\3'", $string);
00007                 return $string; 
00008         }
00009         
00010         function normalize_numeric ($string) {
00011                 global $_entities;
00012                 $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string);
00013                 $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string);
00014                 $string = strtr($string, $_entities['cp1251']);
00015                 return $string;
00016         }
00017  
00018         function numeric_to_utf8 ($string) {
00019                 $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string);
00020                 $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string);
00021                 $string = preg_replace('/&#x([0-9A-Fa-f]+);/e', "entity::_hex_to_utf8('\\1')", $string);                
00022                 return $string;         
00023         }
00024 
00025         function numeric_to_named ($string) {
00026                 global $_entities;
00027                 $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+)/e', "'&#'.hexdec('\\1')", $string);
00028                 $string = strtr($string, array_flip($_entities['named']));
00029                 return $string; 
00030         }
00031         
00032         function specialchars ($string, $type = 'xml') {
00033                 $apos = $type == 'xml' ? '&apos;' : '&#39;';
00034                 $specialchars = array (
00035                         '&quot;'        => '&quot;',            '&amp;'         => '&amp;',             
00036                         '&apos;'        => $apos,                       '&lt;'          => '&lt;',              
00037                         '&gt;'          => '&gt;',                      '"'                     => '&quot;',
00038                         '&'                     => '&amp;',                     "'"                     => $apos,
00039                         '<'                     => '&lt;',                      '>'                     => '&gt;'
00040                 );
00041         
00042                 $string = preg_replace('/&(#?[Xx]?[0-9A-Za-z]+);/', "[[[ENTITY:\\1]]]", $string);               
00043                 $string = strtr($string, $specialchars);
00044                 $string = preg_replace('/\[\[\[ENTITY\:([^\]]+)\]\]\]/', "&\\1;", $string);             
00045                 return $string;
00046         }
00047         
00048 
00049         function _hex_to_utf8($s)
00050         {
00051                 $c = hexdec($s);
00052         
00053                 if ($c < 0x80) {
00054                         $str = chr($c);
00055                 }
00056                 else if ($c < 0x800) {
00057                         $str = chr(0xC0 | $c>>6) . chr(0x80 | $c & 0x3F);
00058                 }
00059                 else if ($c < 0x10000) {
00060                         $str = chr(0xE0 | $c>>12) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F);
00061                 }
00062                 else if ($c < 0x200000) {
00063                         $str = chr(0xF0 | $c>>18) . chr(0x80 | $c>>12 & 0x3F) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F);
00064                 }
00065                                 
00066                 return $str;
00067         }               
00068 
00069         function _named($entity, $extra) {
00070                 global $_entities;
00071                 
00072                 if ($extra == '=') return $entity . '=';
00073                 
00074                 $length = strlen($entity);
00075 
00076                 while ($length > 0) {
00077                         $check = substr($entity, 0, $length);
00078                         if (isset($_entities['named'][$check])) return $_entities['named'][$check] . ';' . substr($entity, $length);
00079                         $length--;
00080                 }
00081                 
00082                 return $entity . ($extra == ';' ? ';' : '');
00083         }
00084 }
00085 
00086 
00087 $_entities['cp1251'] = array (
00088         '&#x80;'                => '&#x20AC;',  '&#x82;'                => '&#x201A;',  '&#x83;'                => '&#x192;',   
00089         '&#x84;'                => '&#x201E;',  '&#x85;'                => '&#x2026;',  '&#x86;'                => '&#x2020;',  
00090         '&#x87;'                => '&#x2021;',  '&#x88;'                => '&#x2C6;',   '&#x89;'                => '&#x2030;',  
00091         '&#x8A;'                => '&#x160;',   '&#x8B;'                => '&#x2039;',  '&#x8C;'                => '&#x152;',   
00092         '&#x8E;'                => '&#x17D;',   '&#x91;'                => '&#x2018;',  '&#x92;'                => '&#x2019;',  
00093         '&#x93;'                => '&#x201C;',  '&#x94;'                => '&#x201D;',  '&#x95;'                => '&#x2022;',  
00094         '&#x96;'                => '&#x2013;',  '&#x97;'                => '&#x2014;',  '&#x98;'                => '&#x2DC;',   
00095         '&#x99;'                => '&#x2122;',  '&#x9A;'                => '&#x161;',   '&#x9B;'                => '&#x203A;',  
00096         '&#x9C;'                => '&#x153;',   '&#x9E;'                => '&#x17E;',   '&#x9F;'                => '&#x178;',   
00097 );
00098         
00099 $_entities['named'] = array (
00100         '&nbsp'                 => '&#160',             '&iexcl'                => '&#161',             '&cent'                 => '&#162',     
00101         '&pound'                => '&#163',             '&curren'               => '&#164',             '&yen'                  => '&#165',     
00102         '&brvbar'               => '&#166',     '&sect'                 => '&#167',             '&uml'                  => '&#168',     
00103         '&copy'                 => '&#169',             '&ordf'                 => '&#170',             '&laquo'                => '&#171',     
00104         '&not'                  => '&#172',             '&shy'                  => '&#173',             '&reg'                  => '&#174',     
00105         '&macr'                 => '&#175',             '&deg'                  => '&#176',             '&plusmn'               => '&#177',     
00106         '&sup2'                 => '&#178',             '&sup3'                 => '&#179',     '&acute'                => '&#180',     
00107         '&micro'                => '&#181',     '&para'                 => '&#182',             '&middot'               => '&#183',     
00108         '&cedil'                => '&#184',     '&sup1'                 => '&#185',             '&ordm'                 => '&#186',     
00109         '&raquo'                => '&#187',             '&frac14'               => '&#188',             '&frac12'               => '&#189',     
00110         '&frac34'               => '&#190',             '&iquest'               => '&#191',             '&Agrave'               => '&#192',     
00111         '&Aacute'               => '&#193',             '&Acirc'                => '&#194',             '&Atilde'               => '&#195',     
00112         '&Auml'                 => '&#196',             '&Aring'                => '&#197',             '&AElig'                => '&#198',     
00113         '&Ccedil'               => '&#199',     '&Egrave'               => '&#200',             '&Eacute'               => '&#201',     
00114         '&Ecirc'                => '&#202',             '&Euml'                 => '&#203',             '&Igrave'               => '&#204',     
00115         '&Iacute'               => '&#205',             '&Icirc'                => '&#206',             '&Iuml'                 => '&#207',     
00116         '&ETH'                  => '&#208',             '&Ntilde'               => '&#209',             '&Ograve'               => '&#210',     
00117         '&Oacute'               => '&#211',             '&Ocirc'                => '&#212',             '&Otilde'               => '&#213',     
00118         '&Ouml'                 => '&#214',             '&times'                => '&#215',             '&Oslash'               => '&#216',     
00119         '&Ugrave'               => '&#217',             '&Uacute'               => '&#218',             '&Ucirc'                => '&#219',     
00120         '&Uuml'                 => '&#220',             '&Yacute'               => '&#221',             '&THORN'                => '&#222',     
00121         '&szlig'                => '&#223',             '&agrave'               => '&#224',             '&aacute'               => '&#225',     
00122         '&acirc'                => '&#226',             '&atilde'               => '&#227',             '&auml'                 => '&#228',     
00123         '&aring'                => '&#229',             '&aelig'                => '&#230',             '&ccedil'               => '&#231',     
00124         '&egrave'               => '&#232',             '&eacute'               => '&#233',             '&ecirc'                => '&#234',     
00125         '&euml'                 => '&#235',             '&igrave'               => '&#236',             '&iacute'               => '&#237',     
00126         '&icirc'                => '&#238',             '&iuml'                 => '&#239',             '&eth'                  => '&#240',     
00127         '&ntilde'               => '&#241',             '&ograve'               => '&#242',             '&oacute'               => '&#243',     
00128         '&ocirc'                => '&#244',             '&otilde'               => '&#245',             '&ouml'                 => '&#246',     
00129         '&divide'               => '&#247',             '&oslash'               => '&#248',             '&ugrave'               => '&#249',     
00130         '&uacute'               => '&#250',             '&ucirc'                => '&#251',             '&uuml'                 => '&#252',     
00131         '&yacute'               => '&#253',             '&thorn'                => '&#254',             '&yuml'                 => '&#255',     
00132         '&OElig'                => '&#338',             '&oelig'                => '&#229',             '&Scaron'               => '&#352',     
00133         '&scaron'               => '&#353',             '&Yuml'                 => '&#376',             '&circ'                 => '&#710',     
00134         '&tilde'                => '&#732',     '&esnp'                 => '&#8194',    '&emsp'                 => '&#8195',    
00135         '&thinsp'               => '&#8201',    '&zwnj'                 => '&#8204',    '&zwj'                  => '&#8205',    
00136         '&lrm'                  => '&#8206',    '&rlm'                  => '&#8207',    '&ndash'                => '&#8211',    
00137         '&mdash'                => '&#8212',    '&lsquo'                => '&#8216',    '&rsquo'                => '&#8217',    
00138         '&sbquo'                => '&#8218',    '&ldquo'                => '&#8220',    '&rdquo'                => '&#8221',    
00139         '&bdquo'                => '&#8222',    '&dagger'               => '&#8224',    '&Dagger'               => '&#8225',    
00140         '&permil'               => '&#8240',    '&lsaquo'               => '&#8249',    '&rsaquo'               => '&#8250',
00141         '&euro'                 => '&#8364',    '&fnof'                 => '&#402',             '&Alpha'                => '&#913',     
00142         '&Beta'                 => '&#914',             '&Gamma'                => '&#915',             '&Delta'                => '&#916',     
00143         '&Epsilon'              => '&#917',             '&Zeta'                 => '&#918',             '&Eta'                  => '&#919',     
00144         '&Theta'                => '&#920',             '&Iota'                 => '&#921',             '&Kappa'                => '&#922',     
00145         '&Lambda'               => '&#923',             '&Mu'                   => '&#924',             '&Nu'                   => '&#925',     
00146         '&Xi'                   => '&#926',             '&Omicron'              => '&#927',             '&Pi'                   => '&#928',     
00147         '&Rho'                  => '&#929',             '&Sigma'                => '&#931',             '&Tau'                  => '&#932',     
00148         '&Upsilon'              => '&#933',     '&Phi'                  => '&#934',             '&Chi'                  => '&#935',     
00149         '&Psi'                  => '&#936',             '&Omega'                => '&#937',             '&alpha'                => '&#945',     
00150         '&beta'                 => '&#946',             '&gamma'                => '&#947',             '&delta'                => '&#948',     
00151         '&epsilon'              => '&#949',             '&zeta'                 => '&#950',             '&eta'                  => '&#951',     
00152         '&theta'                => '&#952',             '&iota'                 => '&#953',             '&kappa'                => '&#954',     
00153         '&lambda'               => '&#955',             '&mu'                   => '&#956',             '&nu'                   => '&#957',     
00154         '&xi'                   => '&#958',             '&omicron'              => '&#959',             '&pi'                   => '&#960',     
00155         '&rho'                  => '&#961',             '&sigmaf'               => '&#962',             '&sigma'                => '&#963',     
00156         '&tau'                  => '&#964',             '&upsilon'              => '&#965',     '&phi'                  => '&#966',     
00157         '&chi'                  => '&#967',             '&psi'                  => '&#968',             '&omega'                => '&#969',     
00158         '&thetasym'             => '&#977',             '&upsih'                => '&#978',             '&piv'                  => '&#982',     
00159         '&bull'                 => '&#8226',    '&hellip'               => '&#8230',    '&prime'                => '&#8242',    
00160         '&Prime'                => '&#8243',    '&oline'                => '&#8254',    '&frasl'                => '&#8260',    
00161         '&weierp'               => '&#8472',    '&image'                => '&#8465',    '&real'                 => '&#8476',    
00162         '&trade'                => '&#8482',    '&alefsym'              => '&#8501',    '&larr'                 => '&#8592',    
00163         '&uarr'                 => '&#8593',    '&rarr'                 => '&#8594',    '&darr'                 => '&#8595',    
00164         '&harr'                 => '&#8596',    '&crarr'                => '&#8629',    '&lArr'                 => '&#8656',    
00165         '&uArr'                 => '&#8657',    '&rArr'                 => '&#8658',    '&dArr'                 => '&#8659',    
00166         '&hArr'                 => '&#8660',    '&forall'               => '&#8704',    '&part'                 => '&#8706',    
00167         '&exist'                => '&#8707',    '&empty'                => '&#8709',    '&nabla'                => '&#8711',    
00168         '&isin'                 => '&#8712',    '&notin'                => '&#8713',    '&ni'                   => '&#8715',    
00169         '&prod'                 => '&#8719',    '&sum'                  => '&#8721',    '&minus'                => '&#8722',    
00170         '&lowast'               => '&#8727',    '&radic'                => '&#8730',    '&prop'                 => '&#8733',    
00171         '&infin'                => '&#8734',    '&ang'                  => '&#8736',    '&and'                  => '&#8743',    
00172         '&or'                   => '&#8744',    '&cap'                  => '&#8745',    '&cup'                  => '&#8746',    
00173         '&int'                  => '&#8747',    '&there4'               => '&#8756',    '&sim'                  => '&#8764',    
00174         '&cong'                 => '&#8773',    '&asymp'                => '&#8776',    '&ne'                   => '&#8800',    
00175         '&equiv'                => '&#8801',    '&le'                   => '&#8804',    '&ge'                   => '&#8805',    
00176         '&sub'                  => '&#8834',    '&sup'                  => '&#8835',    '&nsub'                 => '&#8836',    
00177         '&sube'                 => '&#8838',    '&supe'                 => '&#8839',    '&oplus'                => '&#8853',    
00178         '&otimes'               => '&#8855',    '&perp'                 => '&#8869',    '&sdot'                 => '&#8901',    
00179         '&lceil'                => '&#8968',    '&rceil'                => '&#8969',    '&lfloor'               => '&#8970',    
00180         '&rfloor'               => '&#8971',    '&lang'                 => '&#9001',    '&rang'                 => '&#9002',    
00181         '&loz'                  => '&#9674',    '&spades'               => '&#9824',    '&clubs'                => '&#9827',    
00182         '&hearts'               => '&#9829',    '&diams'                => '&#9830',    
00183 );
00184 
00185 
00186 ?>



Generated on Wed Jun 25 17:25:58 2008 by  doxygen 1.5.5