Skip to content

Commit 610fba3

Browse files
committed
Ability for messages to auto-detect if they need to be Unicode
This is an initial attempt at solving unicode auto-detection. It is *disabled* by default and needs to be enabled explicitly by calling `$message->enableEncodingDetection()`
1 parent f80f3b8 commit 610fba3

File tree

7 files changed

+239
-0
lines changed

7 files changed

+239
-0
lines changed

src/Entity/RequestArrayTrait.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,13 @@ public function getRequestData($sent = true)
4040
return $query;
4141
}
4242

43+
// Trigger a pre-getRequestData() hook for any last minute
44+
// decision making that needs to be done, but only if
45+
// it hasn't been sent already
46+
if (method_exists($this, 'preGetRequestDataHook')) {
47+
$this->preGetRequestDataHook();
48+
}
49+
4350
return $this->requestData;
4451
}
4552

src/Message/AutoDetect.php

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<?php
2+
/**
3+
* Nexmo Client Library for PHP
4+
*
5+
* @copyright Copyright (c) 2016 Nexmo, Inc. (http://nexmo.com)
6+
* @license https://github.com/Nexmo/nexmo-php/blob/master/LICENSE.txt MIT License
7+
*/
8+
9+
namespace Nexmo\Message;
10+
11+
/**
12+
* SMS Text Message
13+
*/
14+
class AutoDetect extends Message
15+
{
16+
const TYPE = 'text';
17+
18+
/**
19+
* Message Body
20+
* @var string
21+
*/
22+
protected $text;
23+
24+
/**
25+
* Create a new SMS text message.
26+
*
27+
* @param string $to
28+
* @param string $from
29+
* @param string $text
30+
* @param array $additional
31+
*/
32+
public function __construct($to, $from, $text, $additional = [])
33+
{
34+
parent::__construct($to, $from, $additional);
35+
$this->enableEncodingDetection();
36+
$this->requestData['text'] = (string) $text;
37+
}
38+
}

src/Message/EncodingDetector.php

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<?php
2+
/**
3+
* Nexmo Client Library for PHP
4+
*
5+
* @copyright Copyright (c) 2016 Nexmo, Inc. (http://nexmo.com)
6+
* @license https://github.com/Nexmo/nexmo-php/blob/master/LICENSE.txt MIT License
7+
*/
8+
9+
namespace Nexmo\Message;
10+
11+
class EncodingDetector {
12+
13+
public function requiresUnicodeEncoding($content)
14+
{
15+
16+
$gsmCodePoints = [
17+
0x0040, 0x00A3, 0x0024, 0x00A5, 0x00E8, 0x00E9, 0x00F9, 0x00EC, 0x00F2, 0x00E7, 0x000A, 0x00D8, 0x00F8, 0x000D, 0x00C5, 0x00E5, 0x0394,
18+
0x005F, 0x03A6, 0x0393, 0x039B, 0x03A9, 0x03A0, 0x03A8, 0x03A3, 0x0398, 0x039E, 0x00A0, 0x000C, 0x005E, 0x007B, 0x007D, 0x005C, 0x005B,
19+
0x007E, 0x005D, 0x007C, 0x20AC, 0x00C6, 0x00E6, 0x00DF, 0x00C9, 0x0020, 0x0021, 0x0022, 0x0023, 0x00A4, 0x0025, 0x0026, 0x0027, 0x0028,
20+
0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039,
21+
0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 0x00A1, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A,
22+
0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x00C4,
23+
0x00D6, 0x00D1, 0x00DC, 0x00A7, 0x00BF, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006A, 0x006B, 0x006C,
24+
0x006D, 0x006E, 0x006F, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x00E4, 0x00F6, 0x00F1,
25+
0x00FC, 0x00E0
26+
];
27+
28+
// Split $text into an array in a way that respects multibyte characters.
29+
$textChars = preg_split('//u', $content, null, PREG_SPLIT_NO_EMPTY);
30+
31+
// Array of codepoint values for characters in $text.
32+
$textCodePoints = array_map(function ($char) {
33+
$k = mb_convert_encoding($char, 'UTF-16LE', 'UTF-8');
34+
$k1 = ord(substr($k, 0, 1));
35+
$k2 = ord(substr($k, 1, 1));
36+
return $k2 * 256 + $k1;
37+
}, $textChars);
38+
39+
// Filter the array to contain only codepoints from $text that are not in the set of valid GSM codepoints.
40+
$nonGsmCodePoints = array_diff($textCodePoints, $gsmCodePoints);
41+
42+
// The text contains unicode if the result is not empty.
43+
return !empty($nonGsmCodePoints);
44+
}
45+
46+
47+
}
48+

src/Message/Message.php

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
namespace Nexmo\Message;
10+
use Nexmo\Message\EncodingDetector;
1011
use Nexmo\Entity\JsonResponseTrait;
1112
use Nexmo\Entity\Psr7Trait;
1213
use Nexmo\Entity\RequestArrayTrait;
@@ -40,6 +41,8 @@ class Message implements MessageInterface, \Countable, \ArrayAccess, \Iterator
4041

4142
protected $id;
4243

44+
protected $autodetectEncoding = false;
45+
4346
/**
4447
* @param string $idOrTo Message ID or E.164 (international) formatted number to send the message
4548
* @param null|string $from Number or name the message is from
@@ -90,6 +93,16 @@ public function setClass($class)
9093
return $this->setRequestData('message-class', $class);
9194
}
9295

96+
public function enableEncodingDetection()
97+
{
98+
$this->autodetectEncoding = true;
99+
}
100+
101+
public function disableEncodingDetection()
102+
{
103+
$this->autodetectEncoding = false;
104+
}
105+
93106
public function count()
94107
{
95108
$data = $this->getResponseData();
@@ -188,6 +201,11 @@ public function getDeliveryLabel()
188201
return $this['error-code-label'];
189202
}
190203

204+
public function isEncodingDetectionEnabled()
205+
{
206+
return $this->autodetectEncoding;
207+
}
208+
191209
protected function getMessageData($name, $index = null)
192210
{
193211
if(!isset($this->response)){
@@ -206,6 +224,30 @@ protected function getMessageData($name, $index = null)
206224
return $data[$name];
207225
}
208226

227+
protected function preGetRequestDataHook()
228+
{
229+
// If $autodetectEncoding is true, we want to set the `type`
230+
// field in our payload
231+
if ($this->isEncodingDetectionEnabled()) {
232+
$this->requestData['type'] = $this->detectEncoding();
233+
}
234+
}
235+
236+
protected function detectEncoding()
237+
{
238+
if (!isset($this->requestData['text'])) {
239+
return static::TYPE;
240+
}
241+
242+
// Auto detect unicode messages
243+
$detector = new EncodingDetector;
244+
if ($detector->requiresUnicodeEncoding($this->requestData['text'])){
245+
return Unicode::TYPE;
246+
}
247+
248+
return static::TYPE;
249+
}
250+
209251
public function offsetExists($offset)
210252
{
211253
$response = $this->getResponseData();

test/Message/AutoDetectTest.php

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<?php
2+
/**
3+
* Nexmo Client Library for PHP
4+
*
5+
* @copyright Copyright (c) 2016 Nexmo, Inc. (http://nexmo.com)
6+
* @license https://github.com/Nexmo/nexmo-php/blob/master/LICENSE.txt MIT License
7+
*/
8+
9+
namespace NexmoTest\Message;
10+
use Nexmo\Message\AutoDetect;
11+
12+
class AutoDetectTest extends \PHPUnit_Framework_TestCase
13+
{
14+
/**
15+
* When creating a message, it should not auto-detect encoding by default
16+
*/
17+
public function testAutoDetectEnabledByDefault()
18+
{
19+
$message = new AutoDetect('to', 'from', 'Example Message');
20+
$this->assertTrue($message->isEncodingDetectionEnabled());
21+
}
22+
23+
24+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<?php
2+
/**
3+
* Nexmo Client Library for PHP
4+
*
5+
* @copyright Copyright (c) 2016 Nexmo, Inc. (http://nexmo.com)
6+
* @license https://github.com/Nexmo/nexmo-php/blob/master/LICENSE.txt MIT License
7+
*/
8+
9+
namespace NexmoTest\Message;
10+
use Nexmo\Message\EncodingDetector;
11+
12+
class EncodingDetectorTest extends \PHPUnit_Framework_TestCase
13+
{
14+
15+
/**
16+
* @dataProvider unicodeProvider
17+
*/
18+
public function testDetectsUnicode($content, $expected) {
19+
$d = new EncodingDetector;
20+
$this->assertEquals($expected, $d->requiresUnicodeEncoding($content));
21+
}
22+
23+
public function unicodeProvider() {
24+
$r = [];
25+
26+
$r['ascii'] = ['Hello World', false];
27+
$r['emoji'] = ['Testing 💪 👌', true];
28+
$r['danish'] = ['Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen Wolther spillede på xylofon.', false];
29+
$r['german'] = ['Heizölrückstoßabdämpfung', false];
30+
$r['greek'] = [' Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο', true];
31+
$r['spanish'] = ['El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y frío, añoraba a su querido cachorro.', true];
32+
$r['french'] = ['Le cœur déçu mais l\'âme plutôt naïve, Louÿs rêva de crapaüter en canoë au delà des îles, près du mälström où brûlent les novæ.', true];
33+
$r['icelandic'] = ['Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa ', true];
34+
$r['japanese-hiragana'] = ['いろはにほへとちりぬるを', true];
35+
$r['japanese-katakana'] = ['イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム', true];
36+
$r['hebrew'] = [' ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה', true];
37+
$r['polish'] = ['Pchnąć w tę łódź jeża lub ośm skrzyń fig', true];
38+
$r['russian'] = ['В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!', true];
39+
$r['thai'] = ['กว่าบรรดาฝูงสัตว์เดรัจฉาน', true];
40+
$r['turkish'] = ['Pijamalı hasta, yağız şoföre çabucak güvendi.', true];
41+
42+
return $r;
43+
}
44+
}

test/Message/MessageTest.php

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
namespace NexmoTest\Message;
1010
use Nexmo\Message\Message;
11+
use Nexmo\Message\Text;
1112
use Zend\Diactoros\Response;
1213
use Zend\Diactoros\ServerRequest;
1314

@@ -69,6 +70,41 @@ public function testCanCreateWithId()
6970
$this->assertEquals('00000123', $message->getMessageId());
7071
}
7172

73+
/**
74+
* When creating a message, it should not auto-detect encoding by default
75+
* @dataProvider messageEncodingProvider
76+
*/
77+
public function testDoesNotAutodetectByDefault($msg, $encoding)
78+
{
79+
$message = new Text('to', 'from', $msg);
80+
$this->assertFalse($message->isEncodingDetectionEnabled());
81+
$d = $message->getRequestData(false);
82+
$this->assertEquals($d['type'], 'text');
83+
}
84+
85+
/**
86+
* When creating a message, it should not auto-detect encoding by default
87+
* @dataProvider messageEncodingProvider
88+
*/
89+
public function testDoesAutodetectWhenEnabled($msg, $encoding)
90+
{
91+
$message = new Text('to', 'from', $msg);
92+
$message->enableEncodingDetection();
93+
$this->assertTrue($message->isEncodingDetectionEnabled());
94+
95+
$d = $message->getRequestData(false);
96+
$this->assertEquals($d['type'], $encoding);
97+
}
98+
99+
public function messageEncodingProvider() {
100+
101+
$r = [];
102+
$r['text'] = ['Hello World', 'text'];
103+
$r['emoji'] = ['Testing 💪', 'unicode'];
104+
$r['kanji'] = ['漢字', 'unicode'];
105+
return $r;
106+
}
107+
72108
/**
73109
* Get the API response we'd expect for a call to the API. Message API currently returns 200 all the time, so only
74110
* change between success / fail is body of the message.

0 commit comments

Comments
 (0)