11#include "lua-urlencode.h"
22
3- static const char xdigit [16 ] = "0123456789ABCDEF" ;
3+ #define UTF8_LEAD (c ) ((uint8_t)(c) < 0x80 || ((uint8_t)(c) > 0xC1 && (uint8_t)(c) < 0xF5))
4+ #define UTF8_TRAIL (c ) (((uint8_t)(c) & 0xC0) == 0x80)
5+
6+ uint8_t utf8_len (const uint8_t * str );
7+
8+ static const uint8_t xdigit [16 ] = "0123456789ABCDEF" ;
49static const int url_unreserved [256 ] = {
510 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0x00-0x0F */
611 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0x10-0x1F */
@@ -20,42 +25,27 @@ static const int url_unreserved[256] = {
2025 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0xF0-0xFF */
2126};
2227
23- typedef struct {
24- char * got ;
25- char * err ;
26- } urlencode_result_t ;
27-
28- static urlencode_result_t _encode_url (const unsigned char * input , const char * encoding ) {
29- if (setlocale (LC_CTYPE , encoding ) == NULL ) {
30- const char * errmsg = "failed to configure the locale" ;
31- printf ("%s\n" , errmsg );
32- const urlencode_result_t result = {"" , errmsg };
33- return result ;
28+ static uint8_t * _encode_url (const uint8_t * input ) {
29+ if (input [0 ] == '\0' ) {
30+ return "" ;
3431 }
3532
3633 const long len = strlen (input );
37- const char * endmarker = input + len ;
38- char * encoded ;
39- encoded = (char * )malloc (sizeof (char ) * len * 3 + 1 );
34+ const uint8_t * endmarker = input + len ;
35+ uint8_t * encoded ;
36+ encoded = (uint8_t * )malloc (sizeof (uint8_t ) * len * 3 + 1 );
4037
4138 int in_cursor = 0 ;
4239 int out_cursor = 0 ;
43- int char_count = 0 ;
4440 while (input [in_cursor ] != '\0' ) {
45- int charlen = mblen (& input [in_cursor ], MB_CUR_MAX );
46- if (charlen < 0 ) {
47- const char * errmsg = "invalid character has come" ;
48- printf ("%s\n" , errmsg );
49- const urlencode_result_t result = {"" , errmsg };
50- return result ;
51- }
41+ const uint8_t charlen = utf8_len (& input [in_cursor ]);
5242
5343 if (charlen == 0 ) {
5444 continue ;
5545 }
5646
5747 if (charlen <= 1 ) {
58- const unsigned char c = input [in_cursor ];
48+ const uint8_t c = input [in_cursor ];
5949 in_cursor += charlen ;
6050 if (url_unreserved [c ]) {
6151 encoded [out_cursor ++ ] = c ;
@@ -70,33 +60,22 @@ static urlencode_result_t _encode_url(const unsigned char* input, const char* en
7060 }
7161
7262 for (int i = 0 ; i < charlen ; i ++ , in_cursor ++ ) {
73- const unsigned char c = input [in_cursor ];
63+ const uint8_t c = input [in_cursor ];
7464 encoded [out_cursor ++ ] = '%' ;
7565 encoded [out_cursor ++ ] = xdigit [c >> 4 ];
7666 encoded [out_cursor ++ ] = xdigit [c & 15 ];
7767 }
7868 }
7969 encoded [out_cursor ] = '\0' ;
8070
81- const urlencode_result_t result = {encoded , "" };
82- return result ;
71+ return encoded ;
8372}
8473
8574static int encode_url (lua_State * L ) {
86- const unsigned char * input = luaL_checkstring (L , 1 );
87- const unsigned char * encoding = luaL_checkstring (L , 2 );
88- const urlencode_result_t result = _encode_url (input , encoding );
89-
90- lua_pushstring (L , result .got );
91-
92- const char * err = result .err ;
93- if (strlen (err ) <= 0 ) {
94- lua_pushnil (L );
95- } else {
96- lua_pushstring (L , err );
97- }
98-
99- return 2 ;
75+ const uint8_t * input = luaL_checkstring (L , 1 );
76+ const uint8_t * encoded = _encode_url (input );
77+ lua_pushstring (L , encoded );
78+ return 1 ;
10079}
10180
10281#define __ 256
@@ -120,37 +99,23 @@ static const int hexval[256] = {
12099};
121100#undef __
122101
123- static urlencode_result_t _decode_url (const unsigned char * input , const char * encoding ) {
124- if (setlocale (LC_CTYPE , encoding ) == NULL ) {
125- const char * errmsg = "failed to configure the locale" ;
126- printf ("%s\n" , errmsg );
127- const urlencode_result_t result = {"" , errmsg };
128- return result ;
129- }
130-
102+ static uint8_t * _decode_url (const uint8_t * input ) {
131103 const long len = strlen (input );
132- const char * endmarker = input + len ;
133- char * decoded ;
134- decoded = (char * )malloc (sizeof (char ) * len + 1 );
104+ const uint8_t * endmarker = input + len ;
105+ uint8_t * decoded ;
106+ decoded = (uint8_t * )malloc (sizeof (uint8_t ) * len + 1 );
135107
136108 int in_cursor = 0 ;
137109 int out_cursor = 0 ;
138- int char_count = 0 ;
139110 while (input [in_cursor ] != '\0' ) {
140- int charlen = mblen (& input [in_cursor ], MB_CUR_MAX );
141- if (charlen < 0 ) {
142- const char * errmsg = "invalid character has come" ;
143- printf ("%s\n" , errmsg );
144- const urlencode_result_t result = {"" , errmsg };
145- return result ;
146- }
111+ const uint8_t charlen = utf8_len (& input [in_cursor ]);
147112
148113 if (charlen == 0 ) {
149114 continue ;
150115 }
151116
152117 if (charlen <= 1 ) {
153- const unsigned char c = input [in_cursor ++ ];
118+ const uint8_t c = input [in_cursor ++ ];
154119
155120 if (c == '+' ) {
156121 decoded [out_cursor ++ ] = ' ' ;
@@ -162,8 +127,15 @@ static urlencode_result_t _decode_url(const unsigned char* input, const char* en
162127 continue ;
163128 }
164129
165- const unsigned int v1 = hexval [(unsigned int )input [in_cursor ++ ]];
166- const unsigned int v2 = hexval [(unsigned int )input [in_cursor ++ ]];
130+ const unsigned int v1raw = input [in_cursor ++ ];
131+ const unsigned int v2raw = input [in_cursor ++ ];
132+ if (v1raw == 0x30 && v2raw == 0x30 ) {
133+ // null char termination (%00)
134+ return decoded ;
135+ }
136+
137+ const unsigned int v1 = hexval [v1raw ];
138+ const unsigned int v2 = hexval [v2raw ];
167139 if ((v1 | v2 ) != 0xFF ) {
168140 decoded [out_cursor ++ ] = (v1 << 4 ) | v2 ;
169141 continue ;
@@ -175,31 +147,103 @@ static urlencode_result_t _decode_url(const unsigned char* input, const char* en
175147 }
176148
177149 for (int i = 0 ; i < charlen ; i ++ , in_cursor ++ ) {
178- const unsigned char c = input [in_cursor ];
150+ const uint8_t c = input [in_cursor ];
179151 decoded [out_cursor ++ ] = c ;
180152 }
181153 }
182154 decoded [out_cursor ] = '\0' ;
183155
184- const urlencode_result_t result = {decoded , "" };
185- return result ;
156+ return decoded ;
186157}
187158
188159static int decode_url (lua_State * L ) {
189- const unsigned char * input = luaL_checkstring (L , 1 );
190- const unsigned char * encoding = luaL_checkstring (L , 2 );
191- const urlencode_result_t result = _decode_url (input , encoding );
160+ const uint8_t * input = luaL_checkstring (L , 1 );
161+ const uint8_t * decoded = _decode_url (input );
162+
163+ lua_pushstring (L , decoded );
164+
165+ return 1 ;
166+ }
167+
168+ #define __ 0xFF
169+ /*
170+ * 0x00: 0
171+ * 0x01-0xC1: 1
172+ * 0xF5-: 1
173+ */
174+ static const uint8_t utf8_immediate_len [256 ] = {
175+ 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , /* 0x00-0x0F */
176+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , /* 0x10-0x1F */
177+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , /* 0x20-0x2F */
178+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , /* 0x30-0x3F */
179+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , /* 0x40-0x4F */
180+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , /* 0x50-0x5F */
181+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , /* 0x60-0x6F */
182+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , /* 0x70-0x7F */
183+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , /* 0x80-0x8F */
184+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , /* 0x90-0x9F */
185+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , /* 0xA0-0xAF */
186+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , /* 0xB0-0xBF */
187+ 1 , 1 ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ , /* 0xC0-0xCF */
188+ __ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ , /* 0xD0-0xDF */
189+ __ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ ,__ , /* 0xE0-0xEF */
190+ __ ,__ ,__ ,__ ,__ , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , /* 0xF0-0xFF */
191+ };
192+ #undef __
193+
194+ /*
195+ * 0xC2-0xDF: 2
196+ * 0xE0-0xEF: 3
197+ * 0xF0-0xF4: 4
198+ */
199+ static const uint8_t utf8_count_len [256 ] = {
200+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0x00-0x0F */
201+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0x10-0x1F */
202+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0x20-0x2F */
203+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0x30-0x3F */
204+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0x40-0x4F */
205+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0x50-0x5F */
206+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0x60-0x6F */
207+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0x70-0x7F */
208+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0x80-0x8F */
209+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0x90-0x9F */
210+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0xA0-0xAF */
211+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0xB0-0xBF */
212+ 0 ,0 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 , /* 0xC0-0xCF */
213+ 2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 , /* 0xD0-0xDF */
214+ 3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 ,3 , /* 0xE0-0xEF */
215+ 4 ,4 ,4 ,4 ,4 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* 0xF0-0xFF */
216+ };
192217
193- lua_pushstring (L , result .got );
218+ uint8_t utf8_len (const uint8_t * str ) {
219+ const uint8_t lead = * str ;
194220
195- const char * err = result .err ;
196- if (strlen (err ) <= 0 ) {
197- lua_pushnil (L );
198- } else {
199- lua_pushstring (L , err );
221+ const uint8_t immediate_len = utf8_immediate_len [lead ];
222+ if (immediate_len != 0xFF ) {
223+ return immediate_len ;
200224 }
201225
202- return 2 ;
226+ const uint8_t count = utf8_count_len [lead ];
227+ uint8_t trail = * (++ str );
228+
229+ if (count == 3 ) {
230+ if ((lead == 0xE0 && 0xA0 > trail ) || (lead == 0xED && trail > 0x9F )) {
231+ return 1 ;
232+ }
233+ } else if (count == 4 ) {
234+ if ((lead == 0xF0 && 0x90 > trail ) || (lead == 0xF4 && trail > 0x8F )) {
235+ return 1 ;
236+ }
237+ }
238+
239+ uint8_t size = 1 ;
240+ for (; size < count ; ++ size ) {
241+ if (!UTF8_TRAIL (trail )) {
242+ return size ;
243+ }
244+ trail = * (++ str );
245+ }
246+ return size ;
203247}
204248
205249static const struct luaL_Reg R [] = {
@@ -241,3 +285,4 @@ LUALIB_API int luaopen_urlencode(lua_State * L) {
241285#ifdef __cplusplus
242286}
243287#endif
288+
0 commit comments