00001 <?php 00002 /* mbstring emulator for Japanese by Andy 00003 * email : webmaster@matsubarafamily.com 00004 * 00005 * license based on GPL(GNU General Public License) 00006 * 00007 * Ver.0.84 (2006/1/20) 00008 */ 00009 00010 00011 define('MB_CASE_UPPER', 0); 00012 define('MB_CASE_LOWER', 1); 00013 define('MB_CASE_TITLE', 2); 00014 00015 include dirname(__FILE__).'/convert.table'; 00016 // include dirname(__FILE__).'/sjistouni.table'; 00017 // include dirname(__FILE__).'/unitosjis.table'; 00018 00019 00020 $mbemu_internals['ini_file'] = parse_ini_file(dirname(__FILE__).'/mb-emulator.ini'); 00021 00022 $mbemu_internals['language'] = $mbemu_internals['ini_file']['language']; 00023 $mbemu_internals['internal_encoding'] = $mbemu_internals['ini_file']['internal_encoding']; 00024 $mbemu_internals['lang_array'] = array ( 00025 'Japanese', 'ja','jp', 'English', 'en', 'uni' 00026 ); 00027 00028 $mbemu_internals['encoding'] = array ( 00029 'AUTO' => 0xFF, 00030 'ASCII' => 0, 00031 'EUC-JP' => 1, 00032 'EUC' => 1, 00033 'SJIS' => 2, 00034 'SHIFT-JIS' => 2, 00035 'SHIFT_JIS' => 2, 00036 'SJIS-WIN' => 2, 00037 'JIS' => 3, 00038 'ISO-2022-JP' => 3, 00039 'UTF-8' => 4, 00040 'UTF8' => 4, 00041 'UTF-16'=>5, 00042 'ISO-8859-1' => 6 00043 ); 00044 00045 00046 function mb_detect_order($encoding_list = '') 00047 { 00048 global $mbemu_internals; 00049 00050 if ($encoding_list) { 00051 if (is_string($encoding_list)) { 00052 $encoding_list = strtoupper($encoding_list); 00053 $encoding_list = split(', *', $encoding_list); 00054 } 00055 foreach($encoding_list as $encode) 00056 if (!array_key_exists($encode, $mbemu_internals['encoding'])) return FALSE; 00057 $mbemu_internals['detect_order'] = $encoding_list; 00058 return TRUE; 00059 } 00060 return $mbemu_internals['detect_order']; 00061 } 00062 00063 if (!(mb_detect_order($mbemu_internals['ini_file']['detect_order']))) 00064 $mbemu_internals['detect_order'] = array ("ASCII", "JIS", "UTF-8", "EUC-JP", "SJIS"); 00065 00066 $mbemu_internals['substitute_character'] = $mbemu_internals['ini_file']['substitute_character']; 00067 00068 $mbemu_internals['regex'] = array( 00069 0 => "[\x01-\x7F]", // for ASCII 00070 1 => "[\xA1-\xFE]([\xA1-\xFE])|[\x01-\x7F]|\x8E([\xA0-\xDF])", // for EUC-JP 00071 2 => "[\x81-\x9F\xE0-\xFC]([\x40-\xFC])|[\x01-\x7F]|[\xA0-\xDF]", // for Shift_JIS 00072 3 => "(?:^|\x1B\(\x42)([\x01-\x1A,\x1C-\x7F]*)|(?:\x1B\\$\x42([\x01-\x1A,\x1C-\x7F]*))|(?:\x1B\(I([\x01-\x1A,\x1C-\x7F]*))", // for JIS 00073 4 => "[\x01-\x7F]|[\xC0-\xDF][\x80-\xBF]|[\xE0-\xEF][\x80-\xBF][\x80-\xBF]", // for UTF-8 00074 5 => "..", // for UTF-16 00075 6 => "." // for ISO-8859-1 00076 ); 00077 00078 00079 00080 function mb_language($language='') 00081 { 00082 global $mbemu_internals; 00083 00084 if ($language =='') { 00085 if ($mbemu_internals['language'] == '') return FALSE; 00086 else return $mbemu_internals['language']; 00087 } else { 00088 foreach ($mbemu_internals['lang_array'] as $element) { 00089 if ($element == $language) { 00090 $mbemu_internals['language'] = $language; 00091 return TRUE; 00092 } 00093 } 00094 return FALSE; 00095 } 00096 } 00097 00098 00099 function mb_internal_encoding($encoding = '') 00100 { 00101 global $mbemu_internals; 00102 00103 if ($encoding =='') { 00104 if ($mbemu_internals['internal_encoding'] == '') return FALSE; 00105 else return $mbemu_internals['internal_encoding']; 00106 } else { 00107 $mbemu_internals['internal_encoding'] = $encoding; 00108 return TRUE; 00109 } 00110 } 00111 00112 function mb_get_info($type = 'all') 00113 { 00114 switch(strtolower($type)) { 00115 case 'all' : 00116 $a['internal_encoding'] = mb_internal_encoding(); 00117 $a['http_output'] = mb_http_output(); 00118 $a['http_input'] = 'pass'; 00119 $a['func_overload'] = 'pass'; 00120 return $a; 00121 case 'internal_encoding' : 00122 return mb_internal_encoding(); 00123 case 'http_output' : 00124 return mb_http_output(); 00125 case 'http_input' : 00126 return 'pass'; 00127 case 'func_overloard' : 00128 return 'pass'; 00129 } 00130 } 00131 00132 function mb_substitute_character($subchar='') 00133 { 00134 global $mbemu_internals; 00135 00136 if (!$subchar) return $mbemu_internals['substitute_character']; 00137 if (is_int($subchar)) { 00138 $mbemu_internals['substitute_character'] = $subchar; 00139 } else { 00140 $subchar = strtolower($subchar); 00141 switch ($subchar) { 00142 case 'none' : 00143 case 'long' : 00144 $mbemu_internals['substitute_character'] = $subchar; 00145 } 00146 } 00147 } 00148 00149 00150 function mb_convert_encoding( $str, $to_encoding, $from_encoding = '') 00151 { 00152 global $mbemu_internals; 00153 00154 $to_encoding = strtoupper($to_encoding); 00155 $from_encoding = mb_detect_encoding($str, $from_encoding); 00156 00157 switch ($mbemu_internals['encoding'][$from_encoding]) { 00158 case 1: //euc-jp 00159 switch($mbemu_internals['encoding'][$to_encoding]) { 00160 case 2: //sjis 00161 return _euctosjis($str); 00162 case 3: //jis 00163 $str = _euctosjis($str); 00164 return _sjistojis($str); 00165 case 4: //utf8 00166 return _euctoutf8($str); 00167 case 5: //utf16 00168 $str = _euctoutf8($str); 00169 return _utf8toutf16($str); 00170 default: 00171 return $str; 00172 } 00173 case 2: //sjis 00174 switch($mbemu_internals['encoding'][$to_encoding]) { 00175 case 1: //euc-jp 00176 return _sjistoeuc($str); 00177 case 3: //jis 00178 return _sjistojis($str); 00179 case 4: //utf8 00180 return _sjistoutf8($str); 00181 case 5: //utf16 00182 $str = _sjistoutf8($str); 00183 return _utf8toutf16($str); 00184 default: 00185 return $str; 00186 } 00187 case 3: //jis 00188 switch($mbemu_internals['encoding'][$to_encoding]) { 00189 case 1: //euc-jp 00190 $str = _jistosjis($str); 00191 return _sjistoeuc($str); 00192 case 2: //sjis 00193 return _jistosjis($str); 00194 case 4: //utf8 00195 $str = _jistosjis($str); 00196 return _sjistoutf8($str); 00197 case 5: //utf16 00198 $str = _jistosjis($str); 00199 $str = _sjistoutf8($str); 00200 return _utf8toutf16($str); 00201 default: 00202 return $str; 00203 } 00204 case 4: //utf8 00205 switch($mbemu_internals['encoding'][$to_encoding]) { 00206 case 1: //euc-jp 00207 return _utf8toeuc($str); 00208 case 2: //sjis 00209 return _utf8tosjis($str); 00210 case 3: //jis 00211 $str = _utf8tosjis($str); 00212 return _sjistojis($str); 00213 case 5: //utf16 00214 return _utf8toutf16($str); 00215 default: 00216 return $str; 00217 } 00218 case 5: //utf16 00219 $str = _utf16toutf8($str); 00220 switch($mbemu_internals['encoding'][$to_encoding]) { 00221 case 1: //euc-jp 00222 return _utf8toeuc($str); 00223 case 2: //sjis 00224 return _utf8tosjis($str); 00225 case 3: //jis 00226 $str = _utf8tosjis($str); 00227 return _sjistojis($str); 00228 case 4: //utf8 00229 return $str; 00230 default: 00231 return _utf8toutf16($str); 00232 } 00233 default: 00234 return $str; 00235 } 00236 } 00237 00238 00239 00240 function _sjistoeuc(&$str) 00241 { 00242 global $mbemu_internals; 00243 00244 $max = preg_match_all('/'.$mbemu_internals['regex'][2].'/', $str, $allchars); // 文字の配列に分解 00245 $str_EUC = ''; 00246 for ($i = 0; $i < $max; ++$i) { 00247 $num = ord($allchars[0][$i]); // 各文字の1バイト目を数値として取り出す 00248 if ($num2 = ord($allchars[1][$i])) { // 2バイト目がある場合 00249 $shift = $mbemu_internals['sjistoeuc_byte1_shift'][$num2]; 00250 $str_EUC .= chr($mbemu_internals['sjistoeuc_byte1'][$num] + $shift) 00251 .chr($mbemu_internals['sjistoeuc_byte2'][$shift][$num2]); 00252 } elseif ($num <= 0x7F) {//英数字 00253 $str_EUC .= chr($num); 00254 } else { //半角カナ 00255 $str_EUC .= chr(0x8E).chr($num); 00256 } 00257 } 00258 return $str_EUC; 00259 } 00260 00261 00262 function _euctosjis(&$str) 00263 { 00264 global $mbemu_internals; 00265 $max = preg_match_all('/'.$mbemu_internals['regex'][1].'/', $str, $allchars); // 文字の配列に分解 00266 $str_SJIS = ''; 00267 for ($i = 0; $i < $max; ++$i) { 00268 $num = ord($allchars[0][$i]); // 各文字の1バイト目を数値として取り出す 00269 if ($num2 = ord($allchars[1][$i])) { // 漢字の場合 00270 $str_SJIS .= chr($mbemu_internals['euctosjis_byte1'][$num]); 00271 if ($num & 1) 00272 $str_SJIS .= chr($mbemu_internals['euctosjis_byte2'][0][$num2]); 00273 else 00274 $str_SJIS .= chr($mbemu_internals['euctosjis_byte2'][1][$num2]); 00275 } elseif ($num3 = ord($allchars[2][$i])) {//半角カナ 00276 $str_SJIS .= chr($num3); 00277 } else { //英数字 00278 $str_SJIS .= chr($num); 00279 } 00280 } 00281 return $str_SJIS; 00282 } 00283 00284 function _sjistojis(&$str) 00285 { 00286 global $mbemu_internals; 00287 00288 $max = preg_match_all('/'.$mbemu_internals['regex'][2].'/', $str, $allchars); // 文字の配列に分解 00289 $str_JIS = ''; 00290 $mode = 0; // 英数 00291 for ($i = 0; $i < $max; ++$i) { 00292 $num = ord($allchars[0][$i]); // 各文字の1バイト目を数値として取り出す 00293 if ($num2 = ord($allchars[1][$i])) { // 2バイト目がある場合 00294 if ($mode != 1) { 00295 $mode = 1; 00296 $str_JIS .= chr(0x1b).'$B'; 00297 } 00298 $shift = $mbemu_internals['sjistoeuc_byte1_shift'][$num2]; 00299 $str_JIS .= chr(($mbemu_internals['sjistoeuc_byte1'][$num] + $shift) & 0x7F) 00300 .chr($mbemu_internals['sjistoeuc_byte2'][$shift][$num2] & 0x7F); 00301 } elseif ($num > 0x80) {//半角カナ 00302 if ($mode != 2) { 00303 $mode = 2; 00304 $str_JIS .= chr(0x1B).'(I'; 00305 } 00306 $str_JIS .= chr($num & 0x7F); 00307 } else {//半角英数 00308 if ($mode != 0) { 00309 $mode = 0; 00310 $str_JIS .= chr(0x1B).'(B'; 00311 } 00312 $str_JIS .= chr($num); 00313 } 00314 } 00315 if ($mode != 0) { 00316 $str_JIS .= chr(0x1B).'(B'; 00317 } 00318 return $str_JIS; 00319 } 00320 00321 function _sub_jtosj($match) 00322 { 00323 global $mbemu_internals; 00324 $num = ord($match[0]); 00325 $num2 = ord($match[1]); 00326 $s = chr($mbemu_internals['euctosjis_byte1'][$num | 0x80]); 00327 if ($num & 1) { 00328 $s .= chr($mbemu_internals['euctosjis_byte2'][0][$num2 | 0x80]); 00329 } else { 00330 $s .= chr($mbemu_internals['euctosjis_byte2'][1][$num2 | 0x80]); 00331 } 00332 return $s; 00333 } 00334 00335 function _jistosjis(&$str) 00336 { 00337 global $mbemu_internals; 00338 00339 $max = preg_match_all('/'.$mbemu_internals['regex'][3].'/', $str, $allchunks, PREG_SET_ORDER); // 文字種ごとの配列に分解 00340 $st = ''; 00341 for ($i = 0; $i < $max; ++$i) { 00342 if (ord($allchunks[$i][1])) { //英数にマッチ 00343 $st .= $allchunks[$i][1]; 00344 } elseif (ord($allchunks[$i][2])) { //漢字にマッチ 00345 $tmp = substr($allchunks[$i][0], 3, strlen($allchunks[$i][0])); 00346 $st .= preg_replace_callback("/.(.)/","_sub_jtosj", $tmp); 00347 } elseif (ord($allchunks[$i][3])) { //半角カナにマッチ 00348 $st .= preg_replace("/./e","chr(ord['$1'] | 0x80);",$allchunks[$i][3]); 00349 } 00350 } 00351 return $st; 00352 } 00353 00354 00355 function _ucs2utf8($uni) 00356 { 00357 if ($uni <= 0x7f) 00358 return chr($uni); 00359 elseif ($uni <= 0x7ff) { 00360 $y = ($uni >> 6) & 0x1f; 00361 $x = $uni & 0x3f; 00362 return chr(0xc0 | $y).chr(0x80 | $x); 00363 } else { 00364 $z = ($uni >> 12) & 0x0f; 00365 $y = ($uni >> 6) & 0x3f; 00366 $x = $uni & 0x3f; 00367 return chr(0xe0 | $z).chr(0x80 | $y).chr(0x80 | $x); 00368 } 00369 } 00370 00371 function _sjistoutf8(&$str) 00372 { 00373 global $mbemu_internals; 00374 include_once(dirname(__FILE__).'/sjistouni.table'); 00375 $st = ''; 00376 $max = preg_match_all('/'.$mbemu_internals['regex'][2].'/', $str, $allchars); // 文字の配列に分解 00377 for ($i = 0; $i < $max; ++$i) { 00378 $num = ord($allchars[0][$i]); // 各文字の1バイト目を数値として取り出す 00379 if ($num2 = ord($allchars[1][$i])) { // 2バイト目がある場合 00380 $ucs2 = $mbemu_internals['sjistoucs2'][($num << 8) | $num2]; 00381 $st .= _ucs2utf8($ucs2); 00382 } elseif ($num > 0x80) {//半角カナ 00383 $st .= _ucs2utf8(0xfec0 + $num); 00384 } else {//半角英数 00385 $st .= chr($num); 00386 } 00387 } 00388 return $st; 00389 } 00390 00391 function _utf8ucs2($st) 00392 { 00393 $num = ord($st); 00394 if (!($num & 0x80)) //1byte 00395 return $num; 00396 elseif (($num & 0xe0) == 0xc0) {//2bytes 00397 $num2 = ord(substr($st, 1,1)); 00398 return (($num & 0x1f) << 6) | ($num2 & 0x3f); 00399 } else { //3bytes 00400 $num2 = ord(substr($st, 1,1)); 00401 $num3 = ord(substr($st, 2,1)); 00402 return (($num & 0x0f) << 12) | (($num2 & 0x3f) << 6) | ($num3 & 0x3f); 00403 } 00404 } 00405 00406 function _utf8tosjis(&$str) 00407 { 00408 global $mbemu_internals; 00409 include_once(dirname(__FILE__).'/unitosjis.table'); 00410 $st = ''; 00411 $max = preg_match_all('/'.$mbemu_internals['regex'][4].'/', $str, $allchars); // 文字の配列に分解 00412 for ($i = 0; $i < $max; ++$i) { 00413 $num = _utf8ucs2($allchars[0][$i]); //ucs2の値を取り出す 00414 if ($num < 0x80) 00415 $st .= chr($num); 00416 elseif ((0xff61 <= $num) && ($num <= 0xff9f)) 00417 $st .= chr($num - 0xfec0); 00418 else { 00419 $sjis = $mbemu_internals['ucs2tosjis'][$num]; 00420 $st .= chr($sjis >> 8) . chr($sjis & 0xff); 00421 } 00422 } 00423 return $st; 00424 } 00425 00426 function _euctoutf8(&$str) 00427 { 00428 global $mbemu_internals; 00429 include_once(dirname(__FILE__).'/sjistouni.table'); 00430 $st = ''; 00431 $max = preg_match_all('/'.$mbemu_internals['regex'][1].'/', $str, $allchars); // 文字の配列に分解 00432 for ($i = 0; $i < $max; ++$i) { 00433 $num = ord($allchars[0][$i]); // 各文字の1バイト目を数値として取り出す 00434 if ($num2 = ord($allchars[1][$i])) { // 2バイト目がある場合 00435 if ($num & 1) 00436 $sjis = ($mbemu_internals['euctosjis_byte1'][$num] << 8) | $mbemu_internals['euctosjis_byte2'][0][$num2]; 00437 else 00438 $sjis = ($mbemu_internals['euctosjis_byte1'][$num] << 8) | $mbemu_internals['euctosjis_byte2'][1][$num2]; 00439 $st .= _ucs2utf8($mbemu_internals['sjistoucs2'][$sjis]); 00440 } elseif ($num3 = ord($allchars[2][$i])) { 00441 $st .= _ucs2utf8(0xfec0 + $num3); 00442 } else {//半角英数 00443 $st .= chr($num); 00444 } 00445 } 00446 return $st; 00447 } 00448 00449 function _utf8toeuc(&$str) 00450 { 00451 global $mbemu_internals; 00452 include_once(dirname(__FILE__).'/unitosjis.table'); 00453 $st = ''; 00454 $max = preg_match_all('/'.$mbemu_internals['regex'][4].'/', $str, $allchars); // 文字の配列に分解 00455 for ($i = 0; $i < $max; ++$i) { 00456 $num = _utf8ucs2($allchars[0][$i]); //ucs2の値を取り出す 00457 if ($num < 0x80) 00458 $st .= chr($num); 00459 elseif ((0xff61 <= $num) && ($num <= 0xff9f)) //半角カナ 00460 $st .= chr(0x8e) . chr($num - 0xfec0); 00461 else { 00462 $sjis = $mbemu_internals['ucs2tosjis'][$num]; 00463 $upper = $sjis >> 8; 00464 $lower = $sjis & 0xff; 00465 $shift = $mbemu_internals['sjistoeuc_byte1_shift'][$lower]; 00466 $st .= chr($mbemu_internals['sjistoeuc_byte1'][$upper] + $shift) 00467 .chr($mbemu_internals['sjistoeuc_byte2'][$shift][$lower]); 00468 } 00469 } 00470 return $st; 00471 } 00472 00473 function _utf8toutf16(&$str) 00474 { 00475 global $mbemu_internals; 00476 $st = ''; 00477 $max = preg_match_all('/'.$mbemu_internals['regex'][4].'/', $str, $allchars); // 文字の配列に分解 00478 for ($i = 0; $i < $max; ++$i) { 00479 $num = _utf8ucs2($allchars[0][$i]); //ucs2の値を取り出す 00480 $st .= chr(($num >> 8) & 0xff).chr($num & 0xff); 00481 } 00482 return $st; 00483 } 00484 00485 function _utf16toutf8(&$str) 00486 { 00487 global $mbemu_internals; 00488 $st = ''; 00489 $ar = unpack("n*", $str); 00490 foreach($ar as $char) { 00491 $st .= _ucs2utf8($char); 00492 } 00493 return $st; 00494 } 00495 00496 00497 function sub_zenhan_EUC(&$str, $match) { 00498 global $mbemu_internals; 00499 00500 $match = $match . "|[\xa1-\xfe][\xa1-\xfe]|[\x01-\x7f]|\x8e[\xa0-\xdf]"; 00501 $max = preg_match_all("/$match/", $str, $chars); 00502 $str = ''; 00503 for ($i = 0; $i < $max; ++$i) { 00504 if ($num = ord($chars[1][$i])) //全角にマッチングした場合 00505 $str .= chr(array_search($chars[1][$i], $mbemu_internals['alphanumeric_convert'])); 00506 // $str .= chr($num & 0x7F); 00507 else 00508 $str .= $chars[0][$i]; 00509 } 00510 } 00511 00512 function sub_hanzen_EUC(&$str, $match) { 00513 global $mbemu_internals; 00514 00515 $match = $match . "|[\xa1-\xfe][\xa1-\xfe]|[\x01-\x7f]|\x8e[\xa0-\xdf]"; 00516 $max = preg_match_all("/$match/", $str, $chars); 00517 $str = ''; 00518 for ($i = 0; $i < $max; ++$i) { 00519 if ($num = ord($chars[1][$i])) //半角にマッチングした場合 00520 $str .= $mbemu_internals['alphanumeric_convert'][$num]; 00521 else 00522 $str .= $chars[0][$i]; 00523 } 00524 } 00525 00526 function alpha_zenhan_EUC(&$str) { 00527 sub_zenhan_EUC($str, "(\xA3[\xC1-\xFA])"); 00528 } 00529 00530 function alpha_hanzen_EUC(&$str) { 00531 sub_hanzen_EUC($str, "([\x41-\x5A,\x61-\x7A])"); 00532 } 00533 00534 00535 function num_zenhan_EUC(&$str) { 00536 sub_zenhan_EUC($str, "(\xA3[\xB0-\xB9])"); 00537 } 00538 00539 function num_hanzen_EUC(&$str) { 00540 sub_hanzen_EUC($str, "([\x30-\x39])"); 00541 } 00542 00543 function alphanum_zenhan_EUC(&$str) { 00544 sub_zenhan_EUC($str, "(\xa1[\xa4,\xa5,\xa7-\xaa,\xb0,\xb2,\xbf,\xc3,\xca,\xcb,\xce-\xd1,\xdc,\xdd,\xe1,\xe3,\xe4,\xf0,\xf3-\xf7]|\xA3[\xC1-\xFA]|\xA3[\xB0-\xB9])"); 00545 } 00546 00547 function alphanum_hanzen_EUC(&$str) { 00548 sub_hanzen_EUC($str, "([\\\x21,\\\x23-\\\x26,\\\x28-\\\x5B,\\\x5D-\\\x7D])"); 00549 } 00550 00551 00552 function space_zenhan_EUC(&$str) { 00553 sub_zenhan_EUC($str, "(\xA1\xA1)"); 00554 } 00555 00556 function space_hanzen_EUC(&$str) { 00557 sub_hanzen_EUC($str, "(\x20)"); 00558 } 00559 00560 function katakana_zenhan_EUC(&$str) { 00561 global $mbemu_internals; 00562 00563 $match = "\xa5([\xa1-\xf4])|\xa1([\xa2,\xa3,\xa6,\xab,\xac,\xbc,\xd6,\xd7])|[\xa1-\xfe][\xa1-\xfe]|[\x01-\x7f]|\x8e[\xa0-\xdf]"; 00564 $max = preg_match_all("/$match/", $str, $chars); 00565 $str = ''; 00566 for ($i = 0; $i < $max; ++$i) { 00567 if ($num = ord($chars[1][$i])) //カナにマッチングした場合 00568 $str .= chr(0x8e) . $mbemu_internals['kana_zenhan_convert'][$num]; 00569 elseif ($num = ord($chars[2][$i])) //半角変換可能な特殊文字にマッチした場合 00570 $str .= chr(0x8e) . $mbemu_internals['special_zenhan_convert'][$num]; 00571 else 00572 $str .= $chars[0][$i]; 00573 } 00574 } 00575 00576 function hiragana_zenhan_EUC(&$str) { 00577 global $mbemu_internals; 00578 00579 $match = "\xa4([\xa1-\xf4])|\xa1([\xa2,\xa3,\xa6,\xab,\xac,\xbc,\xd6,\xd7])|[\xa1-\xfe][\xa1-\xfe]|[\x01-\x7f]|\x8e[\xa0-\xdf]"; 00580 $max = preg_match_all("/$match/", $str, $chars); 00581 $str = ''; 00582 for ($i = 0; $i < $max; ++$i) { 00583 if ($num = ord($chars[1][$i])) //かなにマッチングした場合 00584 $str .= chr(0x8e) . $mbemu_internals['kana_zenhan_convert'][$num]; 00585 elseif ($num = ord($chars[2][$i])) //半角変換可能な特殊文字にマッチした場合 00586 $str .= chr(0x8e) . $mbemu_internals['special_zenhan_convert'][$num]; 00587 else 00588 $str .= $chars[0][$i]; 00589 } 00590 } 00591 00592 function katakana_hanzen1_EUC(&$str) { //濁点の統合をする方 00593 global $mbemu_internals; 00594 00595 $match = "\x8e((?:[\xb3,\xb6-\xc4,\xca-\xce]\x8e\xde)|(?:[\xca-\xce]\x8e\xdf))|[\xa1-\xfe][\xa1-\xfe]|[\x01-\x7f]|\x8e([\xa1-\xdf])"; 00596 //濁点や半濁点は一緒にマッチング 00597 $max = preg_match_all("/$match/", $str, $chars); 00598 $str = ''; 00599 for ($i = 0; $i < $max; ++$i) { 00600 if ($chars[1][$i]) //濁音,半濁音にマッチングした場合 00601 $str .= chr(0xa5).chr(array_search($chars[1][$i], $mbemu_internals['kana_zenhan_convert'])); 00602 elseif ($chars[2][$i]) //その他の半角カナにマッチ 00603 if ($num = array_search($chars[2][$i], $mbemu_internals['kana_zenhan_convert'])) 00604 $str .= chr(0xa5).chr($num); 00605 else 00606 $str .= chr(0xa1).chr(array_search($chars[2][$i], $mbemu_internals['special_zenhan_convert'])); 00607 else 00608 $str .= $chars[0][$i]; 00609 } 00610 } 00611 00612 function hiragana_hanzen1_EUC(&$str) { //濁点の統合をする方 00613 global $mbemu_internals; 00614 00615 $match = "\x8e((?:[\xb6-\xc4,\xca-\xce]\x8e\xde)|(?:[\xca-\xce]\x8e\xdf))|[\xa1-\xfe][\xa1-\xfe]|[\x01-\x7f]|\x8e([\xa1-\xdf])"; 00616 //濁点や半濁点は一緒にマッチング 00617 $max = preg_match_all("/$match/", $str, $chars); 00618 $str = ''; 00619 for ($i = 0; $i < $max; ++$i) { 00620 if ($chars[1][$i]) //濁音,半濁音にマッチングした場合 00621 $str .= chr(0xa4).chr(array_search($chars[1][$i], $mbemu_internals['kana_zenhan_convert'])); 00622 elseif ($chars[2][$i]) //その他の半角カナにマッチ 00623 if ($num = array_search($chars[2][$i], $mbemu_internals['kana_zenhan_convert'])) 00624 $str .= chr(0xa4).chr($num); 00625 else 00626 $str .= chr(0xa1).chr(array_search($chars[2][$i], $mbemu_internals['special_zenhan_convert'])); 00627 else 00628 $str .= $chars[0][$i]; 00629 } 00630 } 00631 00632 function katakana_hanzen2_EUC(&$str) { //濁点の統合をしない方 00633 global $mbemu_internals; 00634 00635 $match = "[\xa1-\xfe][\xa1-\xfe]|[\x01-\x7f]|\x8e([\xa1-\xdf])"; 00636 $max = preg_match_all("/$match/", $str, $chars); 00637 $str = ''; 00638 for ($i = 0; $i < $max; ++$i) { 00639 if ($chars[1][$i]) //半角カナにマッチ 00640 if ($num = array_search($chars[1][$i], $mbemu_internals['kana_zenhan_convert'])) 00641 $str .= chr(0xa5).chr($num); 00642 else 00643 $str .= chr(0xa1).chr(array_search($chars[1][$i], $mbemu_internals['special_zenhan_convert'])); 00644 else 00645 $str .= $chars[0][$i]; 00646 } 00647 } 00648 00649 function hiragana_hanzen2_EUC(&$str) { //濁点の統合をしない方 00650 global $mbemu_internals; 00651 00652 $match = "[\xa1-\xfe][\xa1-\xfe]|[\x01-\x7f]|\x8e([\xa1-\xdf])"; 00653 $max = preg_match_all("/$match/", $str, $chars); 00654 $str = ''; 00655 for ($i = 0; $i < $max; ++$i) { 00656 if ($chars[1][$i]) //半角カナにマッチ 00657 if ($num = array_search($chars[1][$i], $mbemu_internals['kana_zenhan_convert'])) 00658 $str .= chr(0xa4).chr($num); 00659 else 00660 $str .= chr(0xa1).chr(array_search($chars[1][$i], $mbemu_internals['special_zenhan_convert'])); 00661 else 00662 $str .= $chars[0][$i]; 00663 } 00664 } 00665 00666 function katakana_hiragana_EUC(&$str) { 00667 00668 $match = "\xa5([\xa1-\xf3])|[\xa1-\xfe][\xa1-\xfe]|[\x01-\x7f]|\x8e[\xa0-\xdf]"; 00669 $max = preg_match_all("/$match/", $str, $chars); 00670 $str = ''; 00671 for ($i = 0; $i < $max; ++$i) { 00672 if ($num = ord($chars[1][$i])) //カナにマッチングした場合 00673 $str .= chr(0xa4) . chr($num); 00674 else 00675 $str .= $chars[0][$i]; 00676 } 00677 } 00678 00679 function hiragana_katakana_EUC(&$str) { 00680 00681 $match = "\xa4([\xa1-\xf4])|[\xa1-\xfe][\xa1-\xfe]|[\x01-\x7f]|\x8e[\xa0-\xdf]"; 00682 $max = preg_match_all("/$match/", $str, $chars); 00683 $str = ''; 00684 for ($i = 0; $i < $max; ++$i) { 00685 if ($num = ord($chars[1][$i])) //カナにマッチングした場合 00686 $str .= chr(0xa5) . chr($num); 00687 else 00688 $str .= $chars[0][$i]; 00689 } 00690 } 00691 00692 function mb_convert_kana( $str, $option='KV', $encoding = '') 00693 { 00694 if (!$encoding) $encoding = mb_internal_encoding(); 00695 $str = mb_convert_encoding($str, 'EUC-JP', $encoding); 00696 00697 if (strstr($option, "r")) alpha_zenhan_EUC($str); 00698 if (strstr($option, "R")) alpha_hanzen_EUC($str); 00699 if (strstr($option, "n")) num_zenhan_EUC($str); 00700 if (strstr($option, "N")) num_hanzen_EUC($str); 00701 if (strstr($option, "a")) alphanum_zenhan_EUC($str); 00702 if (strstr($option, "A")) alphanum_hanzen_EUC($str); 00703 if (strstr($option, "s")) space_zenhan_EUC($str); 00704 if (strstr($option, "S")) space_hanzen_EUC($str); 00705 if (strstr($option, "k")) katakana_zenhan_EUC($str); 00706 if (strstr($option, "K")) { 00707 if (strstr($option, "V")) 00708 katakana_hanzen1_EUC($str); 00709 else 00710 katakana_hanzen2_EUC($str); 00711 } 00712 if (strstr($option, "H")) { 00713 if (strstr($option, "V")) 00714 hiragana_hanzen1_EUC($str); 00715 else 00716 hiragana_hanzen2_EUC($str); 00717 } 00718 if (strstr($option, "h")) hiragana_zenhan_EUC($str); 00719 if (strstr($option, "c")) katakana_hiragana_EUC($str); 00720 if (strstr($option, "C")) hiragana_katakana_EUC($str); 00721 00722 $str = mb_convert_encoding($str, $encoding, 'EUC-JP'); 00723 return $str; 00724 } 00725 00726 // if mb_language is uni this function send mail using UTF-8/Base64 00727 // if English or en this function send mail using ISO-8859-1/quoted printable 00728 // if Japanese this function send mail using ISO-2022-JP 00729 function mb_send_mail($to, $subject, $message , $additional_headers='', $additional_parameter='') 00730 { 00731 switch (mb_language()) { 00732 case 'jp' : 00733 case 'ja' : 00734 case 'Japanese' : 00735 if (!_check_encoding($subject, 3)) //if not JIS encoded 00736 $subject =mb_encode_mimeheader($subject); 00737 else { 00738 $tmp = mb_internal_encoding(); 00739 mb_internal_encoding('iso-2022-jp'); 00740 $subject =mb_encode_mimeheader($subject); 00741 mb_internal_encoding($tmp); 00742 } 00743 if (!_check_encoding($message, 3)) 00744 $message = mb_convert_encoding($message, "iso-2022-jp", mb_internal_encoding()); 00745 $additional_headers .= 00746 "\r\nMime-Version: 1.0\r\nContent-Type: text/plain; charset=ISO-2022-JP\r\nContent-Transfer-Encoding: 7bit"; 00747 mail($to, $subject, $message, $additional_headers, $additional_parameter); 00748 break; 00749 case 'en' : 00750 case 'English' : 00751 $subject =mb_encode_mimeheader($subject, mb_internal_encoding(), 'Q'); 00752 $message = _sub_encode_base64($message, mb_internal_encoding(), 76 , "\r\n"); 00753 $additional_headers .= 00754 "\r\nMime-Version: 1.0\r\nContent-Type: text/plain; charset=". 00755 mb_preferred_mime_name(mb_internal_encoding()). 00756 "\r\nContent-Transfer-Encoding: BASE64"; 00757 mail($to, $subject, $message, $additional_headers, $additional_parameter); 00758 break; 00759 case 'uni' : 00760 $subject =mb_encode_mimeheader($subject, mb_internal_encoding(), 'B'); 00761 $message = _sub_encode_base64($message, mb_internal_encoding(), 76 , "\r\n"); 00762 $additional_headers .= 00763 "\r\nMime-Version: 1.0\r\nContent-Type: text/plain; charset=". 00764 mb_preferred_mime_name(mb_internal_encoding()). 00765 "\r\nContent-Transfer-Encoding: BASE64"; 00766 mail($to, $subject, $message, $additional_headers, $additional_parameter); 00767 break; 00768 } 00769 00770 } 00771 00772 00773 00774 function _check_encoding($str, $encoding_number) 00775 { 00776 global $mbemu_internals; 00777 return (preg_match('/^('.$mbemu_internals['regex'][$encoding_number].')+$/', $str) == 1); 00778 } 00779 00780 function mb_detect_encoding( $str , $encoding_list = '') 00781 { 00782 global $mbemu_internals; 00783 00784 if ($encoding_list == '') 00785 $encoding_list = mb_detect_order(); 00786 if (!is_array($encoding_list)) { 00787 $encoding_list = strtoupper($encoding_list); 00788 if ($encoding_list == 'AUTO') { 00789 $encoding_list = mb_detect_order(); 00790 } else { 00791 $encoding_list = split(', *', $encoding_list); 00792 } 00793 } 00794 foreach($encoding_list as $encode) { 00795 if (_check_encoding($str, $mbemu_internals['encoding'][$encode])) 00796 return $encode; 00797 } 00798 return $encode; 00799 } 00800 00801 function mb_strlen ( $str , $encoding='') 00802 { 00803 global $mbemu_internals; 00804 00805 $encoding = mb_detect_encoding($str, $encoding); 00806 00807 switch ($e = $mbemu_internals['encoding'][$encoding]) { 00808 case 1 : //euc-jp 00809 case 2 : //shift-jis 00810 case 4 : //utf-8 00811 return preg_match_all('/'.$mbemu_internals['regex'][$e].'/', $str, $arr); 00812 case 5 : //utf-16 00813 return strlen($str) >> 1; 00814 case 0 : //ascii 00815 case 6 : //iso8859-1 00816 return strlen($str); 00817 case 3 : //jis 00818 $str = mb_convert_encoding($str, 'SJIS', 'JIS'); 00819 return preg_match_all('/'.$mbemu_internals['regex'][2].'/', $str, $arr); 00820 } 00821 } 00822 00823 function mb_strwidth( $str, $encoding='') 00824 { 00825 global $mbemu_internals; 00826 00827 $encoding = mb_detect_encoding($str, $encoding); 00828 switch ($e = $mbemu_internals['encoding'][$encoding]) { 00829 case 4 : //utf-8 00830 $max = $len = preg_match_all('/'.$mbemu_internals['regex'][4].'/', $str, $arr); 00831 for ($i=0; $i < $max; ++$i) { 00832 $ucs2 = _utf8ucs2($arr[0][$i]); 00833 if (((0x2000 <= $ucs2) && ($ucs2 <= 0xff60)) || (0xffa0 <= $ucs2)) 00834 ++$len; 00835 } 00836 return $len; 00837 case 1 : //euc-jp 00838 case 2 : //shift-jis 00839 $max = $len = preg_match_all('/'.$mbemu_internals['regex'][$e].'/', $str, $arr); 00840 for ($i=0; $i < $max; ++$i) 00841 if ($arr[1][$i]) ++$len; 00842 return $len; 00843 case 5 : //utf-16 00844 $max = $len = preg_match_all('/'.$mbemu_internals['regex'][5].'/', $str, $arr); 00845 for ($i=0; $i < $max; ++$i) { 00846 $ucs2 = (ord($arr[0][$i]) << 8) | ord(substr($arr[0][$i],1,1)); 00847 if (((0x2000 <= $ucs2) && ($ucs2 <= 0xff60)) || (0xffa0 <= $ucs2)) 00848 ++$len; 00849 } 00850 return $len; 00851 case 0 : //ascii 00852 case 6 : //iso8859-1 00853 return strlen($str); 00854 case 3 : //jis 00855 $str = mb_convert_encoding($str, 'SJIS', 'JIS'); 00856 $max = $len = preg_match_all('/'.$mbemu_internals['regex'][2].'/', $str, $arr); 00857 for ($i=0; $i < $max; ++$i) 00858 if ($arr[1][$i]) ++$len; 00859 return $len; 00860 } 00861 } 00862 00863 function mb_strimwidth( $str, $start, $width, $trimmarker , $encoding = '') 00864 { 00865 global $mbemu_internals; 00866 00867 $encoding = mb_detect_encoding($str, $encoding); 00868 $str = mb_substr($str, $start, 'notnumber', $encoding); 00869 if (($len = mb_strwidth($str,$encoding)) <= $width) 00870 return $str; 00871 $trimwidth = mb_strwidth($trimmarker,$encoding); 00872 $width -= $trimwidth; 00873 if ($width <= 0) return $trimmarker; 00874 00875 switch ($e = $mbemu_internals['encoding'][$encoding]) { 00876 case 0 : //ascii 00877 case 6 : //iso8859-1 00878 return substr($str, 0, $width).$trimmarker; 00879 case 4 : //utf-8 00880 preg_match_all('/'.$mbemu_internals['regex'][4].'/', $str, $arr); 00881 $i = 0; 00882 while(TRUE) { 00883 $ucs2 = _utf8ucs2($arr[0][$i]); 00884 if (((0x2000 <= $ucs2) && ($ucs2 <= 0xff60)) || (0xffa0 <= $ucs2)) 00885 $width -= 2; 00886 else 00887 --$width; 00888 if ($width<0) break; 00889 ++$i; 00890 } 00891 $arr[0] = array_slice($arr[0], 0, $i); 00892 return implode("", $arr[0]).$trimmarker; 00893 case 5 : //utf-16 00894 $arr = unpack("n*", $str); 00895 $i = 0; 00896 foreach($arr as $ucs2) { 00897 if (((0x2000 <= $ucs2) && ($ucs2 <= 0xff60)) || (0xffa0 <= $ucs2)) 00898 $width -= 2; 00899 else 00900 --$width; 00901 if ($width<0) break; 00902 ++$i; 00903 } 00904 $arr[0] = array_slice($arr[0], 0, $i); 00905 return implode("", $arr[0]).$trimmarker; 00906 case 1 : //euc-jp 00907 case 2 : //shift-jis 00908 preg_match_all('/'.$mbemu_internals['regex'][$e].'/', $str, $arr); 00909 $i = 0; 00910 while(TRUE) { 00911 if ($arr[1][$i]) 00912 $width -= 2; 00913 else 00914 --$width; 00915 if ($width<0) break; 00916 ++$i; 00917 } 00918 $arr[0] = array_slice($arr[0], 0, $i); 00919 return implode("", $arr[0]).$trimmarker; 00920 case 3 : //jis 00921 $str = mb_convert_encoding($str, 'SJIS', 'JIS'); 00922 $trimmarker = mb_convert_encoding($trimmarker, 'SJIS', 'JIS'); 00923 preg_match_all('/'.$mbemu_internals['regex'][2].'/', $str, $arr); 00924 $i = 0; 00925 while(TRUE) { 00926 if ($arr[1][$i]) 00927 $width -= 2; 00928 else 00929 --$width; 00930 if ($width<0) break; 00931 ++$i; 00932 } 00933 $arr[0] = array_slice($arr[0], 0, $i); 00934 return mb_convert_encoding(implode("", $arr[0]).$trimmarker,'JIS','SJIS'); 00935 } 00936 } 00937 00938 00939 function mb_substr ( $str, $start , $length='notnumber' , $encoding='') 00940 { 00941 global $mbemu_internals; 00942 00943 $encoding = mb_detect_encoding($str, $encoding); 00944 00945 switch ($e = $mbemu_internals['encoding'][$encoding]) { 00946 case 0 : //ascii 00947 case 1 : //euc-jp 00948 case 2 : //shift-jis 00949 case 4 : //utf-8 00950 case 5 : //utf-16 00951 case 6 : //iso-8859-1 00952 preg_match_all('/'.$mbemu_internals['regex'][$e].'/', $str, $arr); 00953 break; 00954 case 3 : //jis 00955 $str = mb_convert_encoding($str, 'SJIS', 'JIS'); 00956 preg_match_all('/'.$mbemu_internals['regex'][2].'/', $str, $arr); 00957 } 00958 if (is_int($length)) 00959 $arr[0] = array_slice($arr[0], $start, $length); 00960 else 00961 $arr[0] = array_slice($arr[0], $start); 00962 $str = implode("", $arr[0]); 00963 if ($mbemu_internals['encoding'][$encoding] == 3) 00964 $str = mb_convert_encoding($str, 'JIS', 'SJIS'); 00965 return $str; 00966 } 00967 00968 function _sub_strcut($arr, $start, $length) { 00969 $max = count($arr[0]); 00970 $s = ''; $counter = 0; 00971 for ($i = 0; $i < $max; ++$i) { 00972 $counter += strlen($arr[0][$i]); 00973 if ($counter > $start) { 00974 if ($length == 0) { 00975 for ($j = $i; $j < $max; ++$j) 00976 $s .= $arr[0][$j]; 00977 return $s; 00978 } 00979 for ($j = $i, $len = 0; $j < $max; ++$j) { 00980 $len += strlen($arr[0][$j]); 00981 if ($len <= $length) 00982 $s .= $arr[0][$j]; 00983 } 00984 return $s; 00985 } 00986 } 00987 return $s; 00988 } 00989 00990 00991 function mb_strcut ( $str, $start , $length=0 , $encoding = '') 00992 { 00993 global $mbemu_internals; 00994 00995 $encoding = mb_detect_encoding($str, $encoding); 00996 00997 switch ($e = $mbemu_internals['encoding'][$encoding]) { 00998 case 0 : //ascii 00999 case 1 : //euc-jp 01000 case 2 : //shift-jis 01001 case 4 : //utf-8 01002 case 5 : //utf-16 01003 case 6 : //iso-8859-1 01004 preg_match_all('/'.$mbemu_internals['regex'][$e].'/', $str, $arr); 01005 return _sub_strcut($arr, $start, $length); 01006 case 3 : //jis 01007 $str = mb_convert_encoding($str, 'SJIS', 'JIS'); 01008 preg_match_all('/'.$mbemu_internals['regex'][2].'/', $str, $arr); 01009 $sub = _sub_strcut($arr, $start, $length); 01010 return mb_convert_encoding($sub, 'JIS', 'SJIS'); 01011 } 01012 } 01013 01014 function _sub_strrpos($ar_haystack, $ar_needle) 01015 { 01016 $max_h = count($ar_haystack) - 1; 01017 $max_n = count($ar_needle) - 1; 01018 for ($i = $max_h; $i >= $max_n; --$i) { 01019 if ($ar_haystack[$i] == $ar_needle[$max_n]) { 01020 $match = TRUE; 01021 for ($j = 1; $j <= $max_n; ++$j) 01022 if ($ar_haystack[$i-$j] != $ar_needle[$max_n-$j]) { 01023 $match = FALSE; 01024 break; 01025 } 01026 if ($match) return $i - $max_n; 01027 } 01028 } 01029 return FALSE; 01030 } 01031 01032 function mb_strrpos ( $haystack, $needle , $encoding = '') 01033 { 01034 01035 global $mbemu_internals; 01036 01037 $encoding = mb_detect_encoding($haystack, $encoding); 01038 01039 switch ($e = $mbemu_internals['encoding'][$encoding]) { 01040 case 0 : //ascii 01041 case 1 : //euc-jp 01042 case 2 : //shift-jis 01043 case 4 : //utf-8 01044 case 5 : //utf-16 01045 case 6 : //iso-8859-1 01046 preg_match_all('/'.$mbemu_internals['regex'][$e].'/', $haystack, $ar_h); 01047 preg_match_all('/'.$mbemu_internals['regex'][$e].'/', $needle, $ar_n); 01048 return _sub_strrpos($ar_h[0], $ar_n[0]); 01049 case 3 : //jis 01050 $haystack = mb_convert_encoding($haystack, 'SJIS', 'JIS'); 01051 $needle = mb_convert_encoding($needle, 'SJIS', 'JIS'); 01052 preg_match_all('/'.$mbemu_internals['regex'][2].'/', $haystack, $ar_h); 01053 preg_match_all('/'.$mbemu_internals['regex'][2].'/', $needle, $ar_n); 01054 return _sub_strrpos($ar_h[0], $ar_n[0]); 01055 } 01056 } 01057 01058 function _sub_strpos($ar_haystack, $ar_needle, $offset) 01059 { 01060 $max_n = count($ar_needle) - 1; 01061 $max_h = count($ar_haystack) - count($ar_needle); 01062 for ($i = $offset; $i <= $max_h; ++$i) { 01063 for ($j = 0; $j <= $max_n; ++$j) { 01064 $match = TRUE; 01065 if ($ar_haystack[$i+$j] != $ar_needle[$j]) { 01066 $match = FALSE; 01067 break; 01068 } 01069 } 01070 if ($match) return $i; 01071 } 01072 return FALSE; 01073 } 01074 01075 function mb_strpos ( $haystack, $needle , $offset = 0, $encoding = '') 01076 { 01077 01078 global $mbemu_internals; 01079 01080 $encoding = mb_detect_encoding($haystack, $encoding); 01081 01082 switch ($e = $mbemu_internals['encoding'][$encoding]) { 01083 case 0 : //ascii 01084 case 1 : //euc-jp 01085 case 2 : //shift-jis 01086 case 4 : //utf-8 01087 case 5 : //utf-16 01088 case 6 : //iso-8859-1 01089 preg_match_all('/'.$mbemu_internals['regex'][$e].'/', $haystack, $ar_h); 01090 preg_match_all('/'.$mbemu_internals['regex'][$e].'/', $needle, $ar_n); 01091 return _sub_strpos($ar_h[0], $ar_n[0], $offset); 01092 case 3 : //jis 01093 $haystack = mb_convert_encoding($haystack, 'SJIS', 'JIS'); 01094 $needle = mb_convert_encoding($needle, 'SJIS', 'JIS'); 01095 preg_match_all('/'.$mbemu_internals['regex'][2].'/', $haystack, $ar_h); 01096 preg_match_all('/'.$mbemu_internals['regex'][2].'/', $needle, $ar_n); 01097 return _sub_strpos($ar_h[0], $ar_n[0], $offset); 01098 } 01099 } 01100 01101 function _sub_substr_count($ar_haystack, $ar_needle) 01102 { 01103 $matches = 0; 01104 $max_n = count($ar_needle) - 1; 01105 $max_h = count($ar_haystack) - count($ar_needle); 01106 for ($i = 0; $i <= $max_h; ++$i) { 01107 for ($j = 0; $j <= $max_n; ++$j) { 01108 $match = TRUE; 01109 if ($ar_haystack[$i+$j] != $ar_needle[$j]) { 01110 $match = FALSE; 01111 break; 01112 } 01113 } 01114 if ($match) ++$matches; 01115 } 01116 return $matches; 01117 } 01118 01119 function mb_substr_count($haystack, $needle , $encoding = '') 01120 { 01121 01122 global $mbemu_internals; 01123 01124 $encoding = mb_detect_encoding($haystack, $encoding); 01125 01126 switch ($e = $mbemu_internals['encoding'][$encoding]) { 01127 case 0 : //ascii 01128 case 1 : //euc-jp 01129 case 2 : //shift-jis 01130 case 4 : //utf-8 01131 case 5 : //utf-16 01132 case 6 : //iso-8859-1 01133 preg_match_all('/'.$mbemu_internals['regex'][$e].'/', $haystack, $ar_h); 01134 preg_match_all('/'.$mbemu_internals['regex'][$e].'/', $needle, $ar_n); 01135 return _sub_substr_count($ar_h[0], $ar_n[0]); 01136 case 3 : //jis 01137 $haystack = mb_convert_encoding($haystack, 'SJIS', 'JIS'); 01138 $needle = mb_convert_encoding($needle, 'SJIS', 'JIS'); 01139 preg_match_all('/'.$mbemu_internals['regex'][2].'/', $haystack, $ar_h); 01140 preg_match_all('/'.$mbemu_internals['regex'][2].'/', $needle, $ar_n); 01141 return _sub_substr_count($ar_h[0], $ar_n[0]); 01142 } 01143 } 01144 01145 01146 /****************** 01147 mb_convert_variables 01148 *******************/ 01149 if (!$mbemu_internals['ini_file']['convert_variables_arrayonly']) { 01150 function mb_convert_variables($to_encoding, $from_encoding, $s1, $s2='',$s3='',$s4='',$s5='',$s6='',$s7='', $s8='',$s9='', $s10='') 01151 { 01152 if (is_array($s1)) { 01153 $st = ''; 01154 foreach($s1 as $s) $st .= $s; 01155 if (!($encode = mb_detect_encoding($st, $from_encoding))) 01156 return FALSE; 01157 reset($s1); 01158 while (list ($key, $val) = each ($s1)) { 01159 $s1[$key] = mb_convert_encoding($val, $to_encoding, $encode); 01160 } 01161 return $encode; 01162 } 01163 $st = $s1.$s2.$s3.$s4.$s5.$s6.$s7.$s8.$s9.$s10; 01164 if (!($encode = mb_detect_encoding($st, $from_encoding))) 01165 return FALSE; 01166 $s1 = mb_convert_encoding($s1, $to_encoding, $encode); 01167 $s2 = mb_convert_encoding($s2, $to_encoding, $encode); 01168 $s3 = mb_convert_encoding($s3, $to_encoding, $encode); 01169 $s4 = mb_convert_encoding($s4, $to_encoding, $encode); 01170 $s5 = mb_convert_encoding($s5, $to_encoding, $encode); 01171 $s6 = mb_convert_encoding($s6, $to_encoding, $encode); 01172 $s7 = mb_convert_encoding($s7, $to_encoding, $encode); 01173 $s8 = mb_convert_encoding($s8, $to_encoding, $encode); 01174 $s9 = mb_convert_encoding($s9, $to_encoding, $encode); 01175 $s10 = mb_convert_encoding($s10, $to_encoding, $encode); 01176 return $encode; 01177 } 01178 } else { 01179 function mb_convert_variables($to_encoding, $from_encoding, &$arr) 01180 { 01181 $st = ''; 01182 foreach($arr as $s) $st .= $s; 01183 if (!($encode = mb_detect_encoding($st, $from_encoding))) 01184 return FALSE; 01185 reset($arr); 01186 while (list ($key, $val) = each ($arr)) { 01187 $arr[$key] = mb_convert_encoding($val, $to_encoding, $encode); 01188 } 01189 return $encode; 01190 } 01191 } 01192 01193 function mb_preferred_mime_name ($encoding) 01194 { 01195 global $mbemu_internals; 01196 01197 $encoding = strtoupper($encoding); 01198 01199 switch ($mbemu_internals['encoding'][$encoding]) { 01200 case 0 : //ascii 01201 return 'US-ASCII'; 01202 case 1 : //euc-jp 01203 return 'EUC-JP'; 01204 case 2 : //shift-jis 01205 return 'Shift_JIS'; 01206 case 3 : //jis 01207 return 'ISO-2022-JP'; 01208 case 4 : //utf-8 01209 return 'UTF-8'; 01210 case 5 : 01211 return 'UTF-16'; 01212 case 6 : 01213 return 'ISO-8859-1'; 01214 } 01215 } 01216 01217 function mb_decode_mimeheader($str) 01218 { 01219 $lines = preg_split("/(\r\n|\r|\n)( *)/", $str); 01220 $s = ''; 01221 foreach ($lines as $line) { 01222 if ($line != "") { 01223 $line = preg_replace("/<[\w\-+\.]+\@[\w\-+\.]+>/","", $line); //メール・アドレス部を消す 01224 $matches = preg_split("/=\?([^?]+)\?(B|Q)\?([^?]+)\?=/", $line, -1, PREG_SPLIT_DELIM_CAPTURE); 01225 for ($i = 0; $i < count($matches)-1; $i+=4) { 01226 if (!preg_match("/^[ \t\r\n]*$/", $matches[$i])) 01227 $s .= $matches[$i]; 01228 if ($matches[$i+2] == 'B') 01229 $s .= mb_convert_encoding(base64_decode($matches[$i+3]), 01230 mb_internal_encoding(), $matches[$i+1]); 01231 else 01232 $s .= mb_convert_encoding(quoted_printable_decode($matches[$i+3]), 01233 mb_internal_encoding(), $matches[$i+1]); 01234 } 01235 if (!preg_match("/^[ \t\r\n]*$/", $matches[$i])) 01236 $s .= $matches[$i]; 01237 } 01238 } 01239 return $s; 01240 } 01241 01242 function _sub_qponechar($str, &$len) 01243 { 01244 $all = unpack("C*", $str); 01245 $s = ''; $len = 0; 01246 foreach($all as $char) { 01247 if (((ord('A') <= $char) && ($char <= ord('Z'))) || 01248 ((ord('a') <= $char) && ($char <= ord('z')))) { 01249 $s .= chr($char); 01250 ++$len; 01251 } else { 01252 $s .= '='.sprintf("%2X",$char); 01253 $len += 3; 01254 } 01255 } 01256 return $s; 01257 } 01258 01259 function _sub_quoted_printable_encode($str, $encoding, $maxline, $linefeed) 01260 { 01261 global $mbemu_internals; 01262 switch ($e = $mbemu_internals['encoding'][$encoding]) { 01263 case 0 : //ascii 01264 case 1 : //euc-jp 01265 case 2 : //shift-jis 01266 case 4 : //utf-8 01267 $max = preg_match_all('/'.$mbemu_internals['regex'][$e].'/', $str, $allchars); 01268 break; 01269 case 3 : //jis 01270 $max = preg_match_all('/'.$mbemu_internals['regex'][3].'/', $str, $allchunks, PREG_SET_ORDER); // 文字種ごとの配列に分解 01271 $st = ''; // quoted printable変換後の文字列 01272 $len = $maxline; // その行に追加可能なバイト数 01273 $needterminate = FALSE; //最後にエスケープシーケンスが必要かどうか 01274 for ($i = 0; $i < $max; ++$i) { 01275 if (ord($allchunks[$i][1])) { //英数にマッチ 01276 if ($needterminate) { 01277 $st .= '=1B=28B'; 01278 $len -= 7; 01279 } 01280 $tmparr = unpack("C*", $allchunks[$i][1]); 01281 foreach ($tmparr as $char) { 01282 $tmp = _sub_qponechar(chr($char), $l); 01283 if ($len < $l) { 01284 $st .= $linefeed; 01285 $len = $maxline; 01286 } 01287 $st .= $tmp; 01288 $len -= $l; 01289 } 01290 $needterminate = FALSE; 01291 } elseif (ord($allchunks[$i][2])) { //漢字にマッチ 01292 $maxchars = preg_match_all("/../",substr($allchunks[$i][0], 3),$allchars); 01293 $tmp = _sub_qponechar($allchars[0][0], $l); 01294 if ($len < 14 + $l) { 01295 if ($needterminate) 01296 $st .= '=1B=28B'; 01297 $st .= $linefeed; 01298 $len = $maxline; 01299 } 01300 $st .= '=1B=24B'; 01301 $len -= 7; 01302 for ($j = 0; $j < $maxchars; ++$j) { 01303 $tmp = _sub_qponechar($allchars[0][$j], $l); 01304 if ($len < $l + 7) { 01305 $st .= '=1B=28B'.$linefeed.'=1B=24B'; 01306 $len = $maxline-7; 01307 } 01308 $st .= $tmp; 01309 $len -= $l; 01310 } 01311 $needterminate = TRUE; 01312 01313 } elseif (ord($allchunks[$i][3])) { //半角カナにマッチ 01314 $max = preg_match_all("/./",$allchunks[$i][3],$allchars); 01315 $tmp = _sub_qponechar($allchars[0][0], $l); 01316 if ($len < 14 + $l) { 01317 if ($needterminate) 01318 $st .= '=1B=28B'; 01319 $st .= $linefeed; 01320 $len = $maxline; 01321 } 01322 $st .= '=1B=28I'; 01323 $len -= 7; 01324 for ($j == 0; $j < $max; ++$j) { 01325 $tmp = _sub_qponechar($allchars[0][$j], $l); 01326 if ($len < $l + 7) { 01327 $st .= '=1B=28B'.$linefeed.'=1B=28I'; 01328 $len = $maxline-7; 01329 } 01330 $st .= $tmp; 01331 $len -= $l; 01332 } 01333 $needterminate = TRUE; 01334 } 01335 } 01336 if ($needterminate) $st .= '=1B=28B'; 01337 $st .= $linefeed; 01338 return $st; 01339 } 01340 $st = ''; // quoted printable変換後の文字列 01341 $len = $maxline; // その行に追加可能なバイト数 01342 for ($i = 0; $i < $max; ++$i) { 01343 $tmp = _sub_qponechar($allchars[0][$i], $l); 01344 if ($l > $len) { 01345 $st .= $linefeed; 01346 $len = $maxline; 01347 } 01348 $st .= $tmp; 01349 $len -= $l; 01350 } 01351 $st .= $linefeed; 01352 return $st; 01353 } 01354 01355 function _sub_encode_base64($str, $encoding, $maxline , $linefeed) 01356 { 01357 global $mbemu_internals; 01358 switch ($e = $mbemu_internals['encoding'][$encoding]) { 01359 case 0 : //ascii 01360 case 6 : //iso-8859-1 01361 return chunk_split( base64_encode($str) , $maxline, $linefeed); 01362 case 1 : //euc-jp 01363 case 2 : //shift-jis 01364 case 4 : //utf-8 01365 case 5 : //utf-16 01366 $max = preg_match_all('/'.$mbemu_internals['regex'][$e].'/', $str, $allchars); 01367 break; 01368 case 3 : //jis 01369 $max = preg_match_all('/'.$mbemu_internals['regex'][3].'/', $str, $allchunks); // 文字種ごとの配列に分解 01370 $st = ''; // BASE64変換後の文字列 01371 $maxbytes = floor($maxline * 3 / 4); //1行に変換可能なバイト数 01372 $len = $maxbytes; // その行に追加可能なバイト数 01373 $line = ''; //1行分の変換前の文字列 01374 $needterminate = FALSE; //最後にエスケープシーケンスが必要かどうか 01375 for ($i = 0; $i < $max; ++$i) { 01376 if (ord($allchunks[1][$i])) { //英数にマッチ 01377 if ($needterminate) { 01378 $line .= chr(0x1B).'(B'; 01379 $len -= 3; 01380 } 01381 $tmpstr = $allchunks[1][$i]; //追加する文字列 01382 $l = strlen($tmpstr); //追加する文字列の長さ 01383 while ($l > $len) { 01384 $line .= substr($tmpstr, 0, $len); 01385 $st .= base64_encode($line).$linefeed; 01386 $l -= $len; 01387 $tmpstr = substr($tmpstr, $len); 01388 $len = $maxbytes; 01389 $line = ''; 01390 } 01391 $line .= $tmpstr; 01392 $len -= $l; 01393 $needterminate = FALSE; 01394 } elseif (ord($allchunks[2][$i])) { //漢字にマッチ 01395 $tmpstr = substr($allchunks[0][$i], 3); 01396 if ($len < 8) { //文字を追加するのに最低8バイト必要なので 01397 if ($needterminate) 01398 $line .= chr(0x1B).'(B'; 01399 $st .= base64_encode($line).$linefeed; 01400 $len = $maxbytes; 01401 $line = ''; 01402 } 01403 $l = strlen($tmpstr); 01404 $line .= chr(0x1B).'$B'; 01405 $len -= 3; 01406 while ($l > $len-3) { 01407 $add = floor(($len-3) / 2) * 2; 01408 if ($add == 0) break; 01409 $line .= substr($tmpstr, 0, $add).chr(0x1B).'(B'; 01410 $st .= base64_encode($line).$linefeed; 01411 $l -= $add; 01412 $tmpstr = substr($tmpstr, $add); 01413 $len = $maxbytes-3; 01414 $line = chr(0x1B).'$B'; 01415 } 01416 $line .= $tmpstr; 01417 $len -= $l; 01418 $needterminate = TRUE; 01419 01420 } elseif (ord($allchunks[3][$i])) { //半角カナにマッチ 01421 $tmpstr = $allchunks[3][$i]; 01422 if ($len < 7) { //文字を追加するのに最低7バイト必要なので 01423 if ($needterminate) 01424 $line .= chr(0x1B).'(B'; 01425 $st .= base64_encode($line).$linefeed; 01426 $len = $maxbytes; 01427 $line = ''; 01428 } 01429 $l = strlen($tmpstr); 01430 $line .= chr(0x1B).'(I'; 01431 $len -= 3; 01432 while ($l > $len-3) { 01433 $line .= substr($tmpstr, 0, $len-3).chr(0x1B).'(B'; 01434 $st .= base64_encode($line).$linefeed; 01435 $l -= $len; 01436 $tmpstr = substr($tmpstr, $len-3); 01437 $len = $maxbytes-3; 01438 $line = chr(0x1B).'(I'; 01439 } 01440 $line .= $tmpstr; 01441 $len -= $l; 01442 $needterminate = TRUE; 01443 } 01444 } 01445 if ($needterminate) $line .= chr(0x1B).'(B'; 01446 $st .= base64_encode($line).$linefeed; 01447 return $st; 01448 } 01449 $st = ''; // BASE64変換後の文字列 01450 $maxbytes = floor($maxline * 3 / 4); //1行に変換可能なバイト数 01451 $len = $maxbytes; // その行に追加可能なバイト数 01452 $line = ''; //1行分の変換前の文字列 01453 for ($i = 0; $i < $max; ++$i) { 01454 $l = strlen($allchars[0][$i]); 01455 if ($l > $len) { 01456 $st .= base64_encode($line).$linefeed; 01457 $len = $maxbytes; 01458 $line = ''; 01459 } 01460 $line .= $allchars[0][$i]; 01461 $len -= $l; 01462 } 01463 $st .= base64_encode($line).$linefeed; 01464 return $st; 01465 } 01466 01467 function mb_encode_mimeheader( $str, $encoding = "ISO-2022-JP", $transfer_encoding = "B", $linefeed = "\r\n") 01468 { 01469 global $mbemu_internals; 01470 if ($transfer_encoding == "b") $transfer_encoding = "B"; 01471 if ($transfer_encoding <> "B") $transfer_encoding = "Q"; 01472 $encoding = strtoupper($encoding); 01473 01474 $head = '=?' . mb_preferred_mime_name ($encoding) . '?'.$transfer_encoding.'?'; 01475 $str = mb_convert_encoding($str, $encoding, mb_internal_encoding()); 01476 $length = 76 - strlen($head) - 4; 01477 if ($transfer_encoding == "B") { 01478 $str = _sub_encode_base64( $str , $encoding, $length, $linefeed); 01479 } else { 01480 $str = _sub_quoted_printable_encode($str, $encoding, $length, $linefeed); 01481 } 01482 $ar = explode($linefeed, $str); 01483 $s = ''; 01484 foreach ($ar as $element) { 01485 if ($element <> '') 01486 $s .= $head . $element . '?=' .$linefeed; 01487 } 01488 return $s; 01489 } 01490 01491 function mb_http_input($type = '') 01492 { 01493 return FALSE; 01494 } 01495 01496 function mb_http_output($encoding = '') 01497 { 01498 global $mbemu_internals; 01499 01500 if ($encoding == '') return $mbemu_internals['ini_file']['http_output']; 01501 if (strtolower($encoding) == 'pass') { 01502 $mbemu_internals['ini_file']['http_output'] = 'pass'; 01503 return TRUE; 01504 } 01505 $mbemu_internals['ini_file']['http_output'] = mb_preferred_mime_name($encoding); 01506 return TRUE; 01507 } 01508 01509 01510 function mb_output_handler ( $buffer, $status='') 01511 { 01512 global $mbemu_internals; 01513 if ($mbemu_internals['ini_file']['http_output'] == 'pass') 01514 return $buffer; 01515 return mb_convert_encoding($buffer, $mbemu_internals['ini_file']['http_output'], mb_internal_encoding()); 01516 } 01517 01518 01519 function mb_encode_numericentity($str, $convmap, $encoding="") 01520 { 01521 if (!$encoding) $encoding = mb_internal_encoding(); 01522 $str = mb_convert_encoding($str, "utf-16", $encoding); 01523 $ar = unpack("n*", $str); 01524 $s = ""; 01525 foreach($ar as $char) { 01526 $max = count($convmap); 01527 for ($i = 0; $i < $max; $i += 4) { 01528 if (($convmap[$i] <= $char) && ($char <= $convmap[$i+1])) { 01529 $char += $convmap[$i+2]; 01530 $char &= $convmap[$i+3]; 01531 $s .= sprintf("&#%u;", $char); 01532 break; 01533 } 01534 } 01535 if ($i >= $max) $s .= pack("n*", $char); 01536 } 01537 return $s; 01538 } 01539 01540 function mb_decode_numericentity ($str, $convmap, $encoding="") 01541 { 01542 if (!$encoding) $encoding = mb_internal_encoding(); 01543 $ar = preg_split('/(&#[0-9]+;)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE); 01544 $s = ''; 01545 $max = count($convmap); 01546 foreach($ar as $chunk) { 01547 if (preg_match('/&#([0-9]+);/', $chunk, $match)) { 01548 for ($i = 0; $i < $max; $i += 4) { 01549 $num = $match[1] - $convmap[$i+2]; 01550 if (($convmap[$i] <= $num) && ($num <= $convmap[$i+1])) { 01551 $ucs2 = pack('n*', $num); 01552 $s .= mb_convert_encoding($ucs2, $encoding, 'UTF-16'); 01553 break; 01554 } 01555 } 01556 if ($i >= $max) $s .= $chunk; 01557 } else { 01558 $s .= $chunk; 01559 } 01560 } 01561 return $s; 01562 } 01563 01564 function mb_strtoupper($str, $encoding='') 01565 { 01566 global $mbemu_internals; 01567 01568 include_once(dirname(__FILE__).'/upper.table'); 01569 $encoding = mb_detect_encoding($str, $encoding); 01570 $str = mb_convert_encoding($str, 'UTF-8', $encoding); 01571 01572 $max = preg_match_all('/'.$mbemu_internals['regex'][4].'/', $str, $allchars); // make array of chars 01573 $newst = ''; 01574 for ($i = 0; $i < $max; ++$i) { 01575 $val = _utf8ucs2($allchars[0][$i]); //get ucs2 value 01576 if ((0x61 <= $val) && ($val <= 0x7a)) { 01577 $val -= 0x20; 01578 $newst .= _ucs2utf8($val); 01579 } elseif ($upper = $mbemu_internals['upperarray'][$val]) { 01580 $newst .= _ucs2utf8($upper); 01581 } else { 01582 $newst .= $allchars[0][$i]; 01583 } 01584 } 01585 return mb_convert_encoding($newst, $encoding, 'UTF-8'); 01586 } 01587 01588 function mb_strtolower($str, $encoding='') 01589 { 01590 global $mbemu_internals; 01591 01592 include_once(dirname(__FILE__).'/lower.table'); 01593 $encoding = mb_detect_encoding($str, $encoding); 01594 $str = mb_convert_encoding($str, 'UTF-8', $encoding); 01595 01596 $max = preg_match_all('/'.$mbemu_internals['regex'][4].'/', $str, $allchars); // make array of chars 01597 $newst = ''; 01598 for ($i = 0; $i < $max; ++$i) { 01599 $val = _utf8ucs2($allchars[0][$i]); //get ucs2 value 01600 if ((0x41 <= $val) && ($val <= 0x5a)) { 01601 $val += 0x20; 01602 $newst .= _ucs2utf8($val); 01603 } elseif ($lower = $mbemu_internals['lowerarray'][$val]) { 01604 $newst .= _ucs2utf8($lower); 01605 } else { 01606 $newst .= $allchars[0][$i]; 01607 } 01608 } 01609 return mb_convert_encoding($newst, $encoding, 'UTF-8'); 01610 } 01611 01612 function mb_convert_case($str, $case, $encoding='') 01613 { 01614 global $mbemu_internals; 01615 01616 switch($case) { 01617 case MB_CASE_UPPER : 01618 return mb_strtoupper($str, $encoding); 01619 case MB_CASE_LOWER : 01620 return mb_strtolower($str, $encoding); 01621 case MB_CASE_TITLE : 01622 include_once(dirname(__FILE__).'/upper.table'); 01623 include_once(dirname(__FILE__).'/lower.table'); 01624 $encoding = mb_detect_encoding($str, $encoding); 01625 $str = mb_convert_encoding($str, 'UTF-8', $encoding); 01626 01627 $max = preg_match_all('/'.$mbemu_internals['regex'][4].'/', $str, $allchars); // make array of chars 01628 $newst = ''; 01629 $isalpha = FALSE; 01630 for ($i = 0; $i < $max; ++$i) { 01631 $val = _utf8ucs2($allchars[0][$i]); //get ucs2 value 01632 if ((0x41 <= $val) && ($val <= 0x5a)) { 01633 if ($isalpha) { 01634 $val += 0x20; // to lower; 01635 } else { 01636 $isalpha = TRUE; 01637 } 01638 $newst .= _ucs2utf8($val); 01639 } elseif ((0x61 <= $val) && ($val <= 0x7a)){ 01640 if (!$isalpha) { 01641 $val -= 0x20; // to upper 01642 $isalpha = TRUE; 01643 } 01644 $newst .= _ucs2utf8($val); 01645 } elseif ($upper = $mbemu_internals['upperarray'][$val]) { // this char is lower 01646 if ($isalpha) { 01647 $newst .= _ucs2utf8($val); 01648 } else { 01649 $isalpha = TRUE; 01650 $newst .= _ucs2utf8($upper); 01651 } 01652 } elseif ($lower = $mbemu_internals['lowerarray'][$val]) { // this char is upper 01653 if ($isalpha) { 01654 $newst .= _ucs2utf8($lower); 01655 } else { 01656 $isalpha = TRUE; 01657 $newst .= _ucs2utf8($val); 01658 } 01659 } else { 01660 $isalpha = FALSE; 01661 $newst .= $allchars[0][$i]; 01662 } 01663 } 01664 return mb_convert_encoding($newst, $encoding, 'UTF-8'); 01665 } 01666 } 01667 01668 01669 function _print_str($str) { 01670 $all = unpack("C*", $str); 01671 $s = ''; 01672 foreach($all as $char) { 01673 $s .= sprintf(" %2X",$char); 01674 } 01675 print $s."\n"; 01676 } 01677 01678 ?>