|
| 1 | +<?php |
| 2 | +/** |
| 3 | + * Nexmo Client Library for PHP |
| 4 | + * |
| 5 | + * @copyright Copyright (c) 2016 Nexmo, Inc. (http://nexmo.com) |
| 6 | + * @license https://github.com/Nexmo/nexmo-php/blob/master/LICENSE.txt MIT License |
| 7 | + */ |
| 8 | + |
| 9 | +namespace Nexmo\Message; |
| 10 | + |
| 11 | +class EncodingDetector { |
| 12 | + |
| 13 | + public function requiresUnicodeEncoding($content) |
| 14 | + { |
| 15 | + |
| 16 | + $gsmCodePoints = [ |
| 17 | + 0x0040, 0x00A3, 0x0024, 0x00A5, 0x00E8, 0x00E9, 0x00F9, 0x00EC, 0x00F2, 0x00E7, 0x000A, 0x00D8, 0x00F8, 0x000D, 0x00C5, 0x00E5, 0x0394, |
| 18 | + 0x005F, 0x03A6, 0x0393, 0x039B, 0x03A9, 0x03A0, 0x03A8, 0x03A3, 0x0398, 0x039E, 0x00A0, 0x000C, 0x005E, 0x007B, 0x007D, 0x005C, 0x005B, |
| 19 | + 0x007E, 0x005D, 0x007C, 0x20AC, 0x00C6, 0x00E6, 0x00DF, 0x00C9, 0x0020, 0x0021, 0x0022, 0x0023, 0x00A4, 0x0025, 0x0026, 0x0027, 0x0028, |
| 20 | + 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, |
| 21 | + 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 0x00A1, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, |
| 22 | + 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x00C4, |
| 23 | + 0x00D6, 0x00D1, 0x00DC, 0x00A7, 0x00BF, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, |
| 24 | + 0x006D, 0x006E, 0x006F, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x00E4, 0x00F6, 0x00F1, |
| 25 | + 0x00FC, 0x00E0 |
| 26 | + ]; |
| 27 | + |
| 28 | + // Split $text into an array in a way that respects multibyte characters. |
| 29 | + $textChars = preg_split('//u', $content, null, PREG_SPLIT_NO_EMPTY); |
| 30 | + |
| 31 | + // Array of codepoint values for characters in $text. |
| 32 | + $textCodePoints = array_map(function ($char) { |
| 33 | + $k = mb_convert_encoding($char, 'UTF-16LE', 'UTF-8'); |
| 34 | + $k1 = ord(substr($k, 0, 1)); |
| 35 | + $k2 = ord(substr($k, 1, 1)); |
| 36 | + return $k2 * 256 + $k1; |
| 37 | + }, $textChars); |
| 38 | + |
| 39 | + // Filter the array to contain only codepoints from $text that are not in the set of valid GSM codepoints. |
| 40 | + $nonGsmCodePoints = array_diff($textCodePoints, $gsmCodePoints); |
| 41 | + |
| 42 | + // The text contains unicode if the result is not empty. |
| 43 | + return !empty($nonGsmCodePoints); |
| 44 | + } |
| 45 | + |
| 46 | + |
| 47 | +} |
| 48 | + |
0 commit comments