4242
4343#define DEFAULT_SIZE 8
4444#define CAPACITY_INCREASE 8
45+ #define MAX_ATOM_LEN ((1 << 12) - 1)
4546
4647#define ATOM_TABLE_THRESHOLD (capacity ) (capacity + (capacity >> 2))
4748#define ATOM_TABLE_NEW_CAPACITY (new_count ) (new_count + CAPACITY_INCREASE)
4849
4950struct HNode
5051{
5152 struct HNode * next ;
52- AtomString key ;
53- atom_index_t index ;
53+ const uint8_t * key ;
54+ uint32_t index : 20 ;
55+ uint32_t bytes_len : 10 ;
5456};
5557
5658struct HNodeGroup
@@ -148,7 +150,7 @@ static struct HNodeGroup *new_node_group(struct AtomTable *table, int len)
148150
149151static unsigned long sdbm_hash (const unsigned char * str , int len )
150152{
151- unsigned long hash = 0 ;
153+ unsigned long hash = len ;
152154 int c ;
153155
154156 for (int i = 0 ; i < len ; i ++ ) {
@@ -160,11 +162,11 @@ static unsigned long sdbm_hash(const unsigned char *str, int len)
160162}
161163
162164static inline struct HNode * get_node_from_bucket (
163- const struct AtomTable * hash_table , unsigned long bucket_index , AtomString string )
165+ const struct AtomTable * hash_table , unsigned long bucket_index , const uint8_t * string , size_t string_len )
164166{
165167 struct HNode * node = hash_table -> buckets [bucket_index ];
166168 while (node ) {
167- if (atom_are_equals ( string , node -> key ) ) {
169+ if (node -> bytes_len == string_len && memcmp ( node -> key , string , string_len ) == 0 ) {
168170 return node ;
169171 }
170172
@@ -175,17 +177,17 @@ static inline struct HNode *get_node_from_bucket(
175177}
176178
177179static inline struct HNode * get_node_with_hash (
178- const struct AtomTable * hash_table , AtomString string , unsigned long hash )
180+ const struct AtomTable * hash_table , const uint8_t * string , size_t string_len , unsigned long hash )
179181{
180182 unsigned long bucket_index = hash % hash_table -> capacity ;
181- return get_node_from_bucket (hash_table , bucket_index , string );
183+ return get_node_from_bucket (hash_table , bucket_index , string , string_len );
182184}
183185
184- static inline struct HNode * get_node (const struct AtomTable * hash_table , AtomString string )
186+ static inline struct HNode * get_node (const struct AtomTable * hash_table , const uint8_t * string , size_t string_len )
185187{
186- unsigned long hash = sdbm_hash (string , atom_string_len ( string ) );
188+ unsigned long hash = sdbm_hash (string , string_len );
187189
188- return get_node_with_hash (hash_table , string , hash );
190+ return get_node_with_hash (hash_table , string , string_len , hash );
189191}
190192
191193// TODO: this function needs use an efficient structure such as a skip list
@@ -208,33 +210,47 @@ static struct HNode *get_node_using_index(struct AtomTable *table, atom_index_t
208210 return NULL ;
209211}
210212
211- AtomString atom_table_get_atom_string (struct AtomTable * table , atom_index_t index )
213+ const uint8_t * atom_table_get_atom_string (struct AtomTable * table , atom_index_t index , size_t * out_size )
212214{
215+ const uint8_t * result ;
213216 SMP_RDLOCK (table );
214217
215218 struct HNode * node = get_node_using_index (table , index );
216219 if (IS_NULL_PTR (node )) {
217220 SMP_UNLOCK (table );
218221 return NULL ;
219222 }
220-
221- AtomString found_key = node -> key ;
223+ result = node -> key ;
224+ * out_size = node -> bytes_len ;
222225
223226 SMP_UNLOCK (table );
224- return found_key ;
227+ return result ;
225228}
226229
227- int atom_table_cmp_using_atom_index (struct AtomTable * table , int t_atom_index , int other_atom_index )
230+ bool atom_table_is_equal_to_atom_string (struct AtomTable * table , atom_index_t t_atom_index , AtomString string )
228231{
229- AtomString t_atom_string = atom_table_get_atom_string (table , t_atom_index );
232+ size_t t_atom_len ;
233+ const uint8_t * t_atom_data = atom_table_get_atom_string (table , t_atom_index , & t_atom_len );
234+ if (IS_NULL_PTR (t_atom_data )) {
235+ return false;
236+ }
230237
231- int t_atom_len = atom_string_len (t_atom_string );
232- const char * t_atom_data = ( const char * ) atom_string_data ( t_atom_string );
238+ return ( t_atom_len == atom_string_len (string )) && ( memcmp ( t_atom_data , atom_string_data ( string ), t_atom_len ) == 0 );
239+ }
233240
234- AtomString other_atom_string = atom_table_get_atom_string (table , other_atom_index );
241+ int atom_table_cmp_using_atom_index (struct AtomTable * table , atom_index_t t_atom_index , atom_index_t other_atom_index )
242+ {
243+ size_t t_atom_len ;
244+ const uint8_t * t_atom_data = atom_table_get_atom_string (table , t_atom_index , & t_atom_len );
245+ if (IS_NULL_PTR (t_atom_data )) {
246+ return -1 ;
247+ }
235248
236- int other_atom_len = atom_string_len (other_atom_string );
237- const char * other_atom_data = (const char * ) atom_string_data (other_atom_string );
249+ size_t other_atom_len ;
250+ const uint8_t * other_atom_data = atom_table_get_atom_string (table , other_atom_index , & other_atom_len );
251+ if (IS_NULL_PTR (other_atom_data )) {
252+ return 1 ;
253+ }
238254
239255 int cmp_size = (t_atom_len > other_atom_len ) ? other_atom_len : t_atom_len ;
240256
@@ -267,73 +283,10 @@ atom_ref_t atom_table_get_atom_ptr_and_len(struct AtomTable *table, atom_index_t
267283 return node ;
268284}
269285
270- char * atom_table_atom_to_new_cstring (struct AtomTable * table , atom_index_t atom_index , const char * suffix )
271- {
272- AtomString atom_string = atom_table_get_atom_string (table , atom_index );
273- size_t atom_len = atom_string_len (atom_string );
274- const uint8_t * atom_data = atom_string_data (atom_string );
275- size_t suffix_len = 0 ;
276- if (suffix ) {
277- suffix_len = strlen (suffix );
278- }
279-
280- char * result = malloc (atom_len + suffix_len + 1 );
281- if (IS_NULL_PTR (result )) {
282- return NULL ;
283- }
284-
285- memcpy (result , atom_data , atom_len );
286- if (suffix ) {
287- memcpy (result + atom_len , suffix , suffix_len );
288- }
289- result [atom_len + suffix_len ] = 0 ;
290-
291- return result ;
292- }
293-
294- bool atom_table_is_atom_ref_ascii (struct AtomTable * table , atom_ref_t atom )
286+ static inline void init_node (struct HNode * node , const uint8_t * atom_data , size_t atom_len , long index )
295287{
296- SMP_RDLOCK (table );
297-
298- struct HNode * node = (struct HNode * ) atom ;
299- const uint8_t * data = atom_string_data (node -> key );
300- size_t len = atom_string_len (node -> key );
301-
302- bool result = unicode_buf_is_ascii (data , len );
303-
304- SMP_UNLOCK (table );
305- return result ;
306- }
307-
308- void atom_table_write_bytes (struct AtomTable * table , atom_ref_t atom , size_t buf_len , void * outbuf )
309- {
310- SMP_RDLOCK (table );
311-
312- struct HNode * node = (struct HNode * ) atom ;
313- size_t len = atom_string_len (node -> key );
314- if (len > buf_len ) {
315- len = buf_len ;
316- }
317-
318- memcpy (outbuf , atom_string_data (node -> key ), len );
319-
320- SMP_UNLOCK (table );
321- }
322-
323- void atom_table_write_cstring (
324- struct AtomTable * table , atom_ref_t atom , size_t buf_len , char * outbuf )
325- {
326- SMP_RDLOCK (table );
327-
328- struct HNode * node = (struct HNode * ) atom ;
329- atom_string_to_c (node -> key , outbuf , buf_len );
330-
331- SMP_UNLOCK (table );
332- }
333-
334- static inline void init_node (struct HNode * node , AtomString atom , long index )
335- {
336- node -> key = atom ;
288+ node -> key = atom_data ;
289+ node -> bytes_len = atom_len ;
337290 node -> index = index ;
338291}
339292
@@ -346,14 +299,14 @@ static inline void insert_node_into_bucket(
346299}
347300
348301static inline atom_index_t insert_node (struct AtomTable * table , struct HNodeGroup * node_group ,
349- unsigned long bucket_index , AtomString string )
302+ unsigned long bucket_index , const uint8_t * atom_data , size_t atom_len )
350303{
351304 atom_index_t new_index = table -> count ;
352305 table -> count ++ ;
353306
354307 struct HNode * node = & node_group -> nodes [new_index - node_group -> first_index ];
355308 table -> last_node_group_avail -- ;
356- init_node (node , string , new_index );
309+ init_node (node , atom_data , atom_len , new_index );
357310 insert_node_into_bucket (table , bucket_index , node );
358311
359312 return new_index ;
@@ -383,9 +336,7 @@ static bool do_rehash(struct AtomTable *table, int new_capacity)
383336
384337 for (int i = 0 ; i < group_count ; i ++ ) {
385338 struct HNode * node = & group -> nodes [i ];
386- AtomString key = node -> key ;
387-
388- unsigned long hash = sdbm_hash (key , atom_string_len (key ));
339+ unsigned long hash = sdbm_hash (node -> key , node -> bytes_len );
389340 unsigned long bucket_index = hash % table -> capacity ;
390341
391342 insert_node_into_bucket (table , bucket_index , node );
@@ -409,13 +360,13 @@ static inline bool maybe_rehash(struct AtomTable *table, int new_entries)
409360 return do_rehash (table , new_capacity );
410361}
411362
412- enum AtomTableEnsureAtomResult atom_table_ensure_atom (struct AtomTable * table , AtomString string , enum AtomTableCopyOpt opts , atom_index_t * result )
363+ enum AtomTableEnsureAtomResult atom_table_ensure_atom (struct AtomTable * table , const uint8_t * atom_data , size_t atom_len , enum AtomTableCopyOpt opts , atom_index_t * result )
413364{
414- unsigned long hash = sdbm_hash (string , atom_string_len ( string ) );
365+ unsigned long hash = sdbm_hash (atom_data , atom_len );
415366 SMP_WRLOCK (table );
416367 unsigned long bucket_index = hash % table -> capacity ;
417368
418- struct HNode * node = get_node_from_bucket (table , bucket_index , string );
369+ struct HNode * node = get_node_from_bucket (table , bucket_index , atom_data , atom_len );
419370 if (node ) {
420371 SMP_UNLOCK (table );
421372 * result = node -> index ;
@@ -435,23 +386,21 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atom(struct AtomTable *table, A
435386 }
436387 }
437388
438- AtomString maybe_copied = string ;
439389 if (opts & AtomTableCopyAtom ) {
440- uint8_t len = * ((uint8_t * ) string );
441- uint8_t * buf = malloc (1 + len );
390+ uint8_t * buf = malloc (atom_len );
442391 if (IS_NULL_PTR (buf )) {
443392 SMP_UNLOCK (table );
444393 return AtomTableEnsureAtomAllocFail ;
445394 }
446- memcpy (buf , string , 1 + len );
447- maybe_copied = buf ;
395+ memcpy (buf , atom_data , atom_len );
396+ atom_data = buf ;
448397 }
449398
450399 if (maybe_rehash (table , 1 )) {
451400 bucket_index = hash % table -> capacity ;
452401 }
453402
454- * result = insert_node (table , node_group , bucket_index , maybe_copied );
403+ * result = insert_node (table , node_group , bucket_index , atom_data , atom_len );
455404
456405 SMP_UNLOCK (table );
457406 return AtomTableEnsureAtomOk ;
@@ -478,7 +427,7 @@ static inline int read_encoded_len(const uint8_t **len_bytes)
478427// -1 is not a valid atom index as we're limited to 2^20
479428#define ATOM_TABLE_NOT_FOUND_MARKER ((atom_index_t) -1)
480429
481- enum AtomTableEnsureAtomResult atom_table_ensure_atoms (struct AtomTable * table , const void * atoms , int count ,
430+ enum AtomTableEnsureAtomResult atom_table_ensure_atoms (struct AtomTable * table , const void * atoms , size_t count ,
482431 atom_index_t * translate_table , enum EnsureAtomsOpt opt )
483432{
484433 bool is_long_format = (opt & EnsureLongEncoding ) != 0 ;
@@ -489,35 +438,22 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atoms(struct AtomTable *table,
489438
490439 const uint8_t * current_atom = atoms ;
491440
492- for (int i = 0 ; i < count ; i ++ ) {
441+ for (size_t i = 0 ; i < count ; i ++ ) {
493442 struct HNode * node ;
443+ int atom_len ;
494444 if (is_long_format ) {
495- int atom_len = read_encoded_len (& current_atom );
496- if (UNLIKELY (atom_len < 0 )) {
445+ atom_len = read_encoded_len (& current_atom );
446+ if (UNLIKELY (atom_len < 0 || atom_len > MAX_ATOM_LEN )) {
497447 fprintf (stderr , "Found invalid atom len." );
498448 SMP_UNLOCK (table );
499449 return AtomTableEnsureAtomInvalidLen ;
500- } else if (UNLIKELY (atom_len > 255 )) {
501- fprintf (stderr ,
502- "Unsupported atom length %i bytes.\n"
503- "Unlike OTP >= 28, AtomVM supports a maximum of 255 bytes"
504- "regardeless the number of codepoints.\n"
505- "If you are seeing this error please open an issue on GitHub:\n"
506- "https://github.com/atomvm/AtomVM/issues\n" ,
507- atom_len );
508- SMP_UNLOCK (table );
509- return AtomTableEnsureAtomInvalidLen ;
510450 }
511- char tmp_old_fmt [256 ];
512- tmp_old_fmt [0 ] = atom_len ;
513- memcpy (tmp_old_fmt + 1 , current_atom , atom_len );
514- node = get_node (table , tmp_old_fmt );
515- current_atom += atom_len ;
516451 } else {
517- node = get_node (table , current_atom );
518- uint8_t atom_len = current_atom [0 ];
519- current_atom += 1 + atom_len ;
452+ atom_len = current_atom [0 ];
453+ current_atom ++ ;
520454 }
455+ node = get_node (table , current_atom , atom_len );
456+ current_atom += atom_len ;
521457
522458 if (node ) {
523459 translate_table [i ] = node -> index ;
@@ -532,18 +468,14 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atoms(struct AtomTable *table,
532468 current_atom = atoms ;
533469 int remaining_atoms = new_atoms_count ;
534470 struct HNodeGroup * node_group = table -> last_node_group ;
535- for (int i = 0 ; i < count ; i ++ ) {
536-
537- const uint8_t * to_be_copied = NULL ;
538- const uint8_t * next_atom = current_atom ;
539- uint8_t atom_len ;
471+ for (size_t i = 0 ; i < count ; i ++ ) {
472+ size_t atom_len ;
540473 if (is_long_format ) {
541- atom_len = read_encoded_len (& next_atom );
542- to_be_copied = next_atom ;
543- next_atom += atom_len ;
474+ // Size was checked above
475+ atom_len = (size_t ) read_encoded_len (& current_atom );
544476 } else {
545477 atom_len = current_atom [0 ];
546- next_atom += 1 + atom_len ;
478+ current_atom ++ ;
547479 }
548480
549481 if (translate_table [i ] == ATOM_TABLE_NOT_FOUND_MARKER ) {
@@ -555,28 +487,16 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atoms(struct AtomTable *table,
555487 }
556488 }
557489
558- if (is_long_format ) {
559- uint8_t * atom_copy = malloc (atom_len + 1 );
560- if (IS_NULL_PTR (atom_copy )) {
561- // we are not going to remove atoms that have already been added up to this one
562- SMP_UNLOCK (table );
563- return AtomTableEnsureAtomAllocFail ;
564- }
565- atom_copy [0 ] = atom_len ;
566- memcpy (atom_copy + 1 , to_be_copied , atom_len );
567- current_atom = atom_copy ;
568- }
569-
570490 unsigned long hash = sdbm_hash (current_atom , atom_len );
571491 unsigned long bucket_index = hash % table -> capacity ;
572492
573- translate_table [i ] = insert_node (table , node_group , bucket_index , current_atom );
493+ translate_table [i ] = insert_node (table , node_group , bucket_index , current_atom , atom_len );
574494 remaining_atoms -- ;
575495 if (remaining_atoms == 0 ) {
576496 break ;
577497 }
578498 }
579- current_atom = next_atom ;
499+ current_atom += atom_len ;
580500 }
581501
582502 SMP_UNLOCK (table );
0 commit comments