Skip to content

Commit edb4224

Browse files
Copilotstreamich
andcommitted
Address code review feedback: rename isValidUtf8 to isUtf8, optimize performance, link dependencies directly
Co-authored-by: streamich <9773803+streamich@users.noreply.github.com>
1 parent 5129ec3 commit edb4224

File tree

4 files changed

+42
-36
lines changed

4 files changed

+42
-36
lines changed

src/type/classes/StringType.ts

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import type {json_string} from '@jsonjoy.com/util/lib/json-brand';
2121
import type * as ts from '../../typescript/types';
2222
import type {TypeExportContext} from '../../system/TypeExportContext';
2323
import type * as jtd from '../../jtd/types';
24-
import {validateStringFormat} from '../../util/stringFormats';
24+
import {isAscii, isUtf8} from '../../util/stringFormats';
2525

2626
export class StringType extends AbstractType<schema.StringSchema> {
2727
constructor(protected schema: schema.StringSchema) {
@@ -92,16 +92,19 @@ export class StringType extends AbstractType<schema.StringSchema> {
9292
}
9393
}
9494

95-
// Handle format validation
9695
if (format) {
9796
const formatErr = ctx.err(ValidationError.STR, path);
98-
const validateFn = ctx.codegen.linkDependency(validateStringFormat);
99-
ctx.js(/* js */ `if(!${validateFn}(${r}, "${format}")) return ${formatErr};`);
97+
if (format === 'ascii') {
98+
const validateFn = ctx.codegen.linkDependency(isAscii);
99+
ctx.js(/* js */ `if(!${validateFn}(${r})) return ${formatErr};`);
100+
} else if (format === 'utf8') {
101+
const validateFn = ctx.codegen.linkDependency(isUtf8);
102+
ctx.js(/* js */ `if(!${validateFn}(${r})) return ${formatErr};`);
103+
}
100104
} else if (ascii) {
101-
// Backward compatibility: use ASCII validation if ascii=true and no format specified
102105
const asciiErr = ctx.err(ValidationError.STR, path);
103-
const validateFn = ctx.codegen.linkDependency(validateStringFormat);
104-
ctx.js(/* js */ `if(!${validateFn}(${r}, "ascii")) return ${asciiErr};`);
106+
const validateFn = ctx.codegen.linkDependency(isAscii);
107+
ctx.js(/* js */ `if(!${validateFn}(${r})) return ${asciiErr};`);
105108
}
106109

107110
ctx.emitCustomValidators(this, path, r);

src/type/classes/__tests__/StringType.format.spec.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ describe('StringType format validation', () => {
3131
expect(validator('a')).toBe(true); // Too short
3232
expect(validator('abcdef')).toBe(true); // Too long
3333
expect(validator('ñ')).toBe(true); // Non-ASCII (but would also be too short)
34+
expect(validator('ñoño')).toBe(true); // Good length, but not ASCII
3435
});
3536
});
3637

src/util/__tests__/stringFormats.spec.ts

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import {isAscii, isValidUtf8, validateStringFormat} from '../stringFormats';
1+
import {isAscii, isUtf8, validateStringFormat} from '../stringFormats';
22

33
describe('String format validation utilities', () => {
44
describe('isAscii', () => {
@@ -30,57 +30,57 @@ describe('String format validation utilities', () => {
3030
});
3131
});
3232

33-
describe('isValidUtf8', () => {
33+
describe('isUtf8', () => {
3434
test('returns true for valid UTF-8 strings', () => {
35-
expect(isValidUtf8('')).toBe(true);
36-
expect(isValidUtf8('hello')).toBe(true);
37-
expect(isValidUtf8('héllo')).toBe(true);
38-
expect(isValidUtf8('🚀')).toBe(true);
39-
expect(isValidUtf8('中文')).toBe(true);
40-
expect(isValidUtf8('русский')).toBe(true);
41-
expect(isValidUtf8('👍💖🎉')).toBe(true); // Multiple emojis with surrogate pairs
35+
expect(isUtf8('')).toBe(true);
36+
expect(isUtf8('hello')).toBe(true);
37+
expect(isUtf8('héllo')).toBe(true);
38+
expect(isUtf8('🚀')).toBe(true);
39+
expect(isUtf8('中文')).toBe(true);
40+
expect(isUtf8('русский')).toBe(true);
41+
expect(isUtf8('👍💖🎉')).toBe(true); // Multiple emojis with surrogate pairs
4242
});
4343

4444
test('returns false for unpaired high surrogates', () => {
4545
const highSurrogate = String.fromCharCode(0xD800);
46-
expect(isValidUtf8(highSurrogate)).toBe(false);
47-
expect(isValidUtf8('hello' + highSurrogate)).toBe(false);
48-
expect(isValidUtf8(highSurrogate + 'world')).toBe(false);
46+
expect(isUtf8(highSurrogate)).toBe(false);
47+
expect(isUtf8('hello' + highSurrogate)).toBe(false);
48+
expect(isUtf8(highSurrogate + 'world')).toBe(false);
4949
});
5050

5151
test('returns false for orphaned low surrogates', () => {
5252
const lowSurrogate = String.fromCharCode(0xDC00);
53-
expect(isValidUtf8(lowSurrogate)).toBe(false);
54-
expect(isValidUtf8('hello' + lowSurrogate)).toBe(false);
55-
expect(isValidUtf8(lowSurrogate + 'world')).toBe(false);
53+
expect(isUtf8(lowSurrogate)).toBe(false);
54+
expect(isUtf8('hello' + lowSurrogate)).toBe(false);
55+
expect(isUtf8(lowSurrogate + 'world')).toBe(false);
5656
});
5757

5858
test('returns false for high surrogate not followed by low surrogate', () => {
5959
const highSurrogate = String.fromCharCode(0xD800);
6060
const notLowSurrogate = String.fromCharCode(0xE000); // Outside surrogate range
61-
expect(isValidUtf8(highSurrogate + notLowSurrogate)).toBe(false);
62-
expect(isValidUtf8(highSurrogate + 'a')).toBe(false);
61+
expect(isUtf8(highSurrogate + notLowSurrogate)).toBe(false);
62+
expect(isUtf8(highSurrogate + 'a')).toBe(false);
6363
});
6464

6565
test('returns true for valid surrogate pairs', () => {
6666
// Create a valid surrogate pair manually
6767
const highSurrogate = String.fromCharCode(0xD800);
6868
const lowSurrogate = String.fromCharCode(0xDC00);
69-
expect(isValidUtf8(highSurrogate + lowSurrogate)).toBe(true);
69+
expect(isUtf8(highSurrogate + lowSurrogate)).toBe(true);
7070

7171
// Test with real emoji
72-
expect(isValidUtf8('👨‍💻')).toBe(true); // Complex emoji with ZWJ
73-
expect(isValidUtf8('🏳️‍🌈')).toBe(true); // Rainbow flag emoji
72+
expect(isUtf8('👨‍💻')).toBe(true); // Complex emoji with ZWJ
73+
expect(isUtf8('🏳️‍🌈')).toBe(true); // Rainbow flag emoji
7474
});
7575

7676
test('handles sequences correctly', () => {
7777
const highSurrogate = String.fromCharCode(0xD800);
7878
const lowSurrogate = String.fromCharCode(0xDC00);
7979
const validPair = highSurrogate + lowSurrogate;
8080

81-
expect(isValidUtf8(validPair + validPair)).toBe(true); // Two valid pairs
82-
expect(isValidUtf8(validPair + highSurrogate)).toBe(false); // Valid pair + unpaired high
83-
expect(isValidUtf8('hello' + validPair + 'world')).toBe(true); // Valid pair in middle
81+
expect(isUtf8(validPair + validPair)).toBe(true); // Two valid pairs
82+
expect(isUtf8(validPair + highSurrogate)).toBe(false); // Valid pair + unpaired high
83+
expect(isUtf8('hello' + validPair + 'world')).toBe(true); // Valid pair in middle
8484
});
8585
});
8686

@@ -90,7 +90,7 @@ describe('String format validation utilities', () => {
9090
expect(validateStringFormat('héllo', 'ascii')).toBe(false);
9191
});
9292

93-
test('delegates to isValidUtf8 for utf8 format', () => {
93+
test('delegates to isUtf8 for utf8 format', () => {
9494
expect(validateStringFormat('hello', 'utf8')).toBe(true);
9595
expect(validateStringFormat('héllo', 'utf8')).toBe(true);
9696
expect(validateStringFormat(String.fromCharCode(0xD800), 'utf8')).toBe(false);

src/util/stringFormats.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
* This is highly optimized for performance.
99
*/
1010
export const isAscii = (str: string): boolean => {
11-
for (let i = 0; i < str.length; i++) {
11+
const length = str.length;
12+
for (let i = 0; i < length; i++) {
1213
if (str.charCodeAt(i) > 127) {
1314
return false;
1415
}
@@ -25,14 +26,15 @@ export const isAscii = (str: string): boolean => {
2526
* - Unpaired surrogates (invalid UTF-16 sequences)
2627
* - Characters that would produce invalid UTF-8
2728
*/
28-
export const isValidUtf8 = (str: string): boolean => {
29-
for (let i = 0; i < str.length; i++) {
29+
export const isUtf8 = (str: string): boolean => {
30+
const length = str.length;
31+
for (let i = 0; i < length; i++) {
3032
const code = str.charCodeAt(i);
3133

3234
// Check for high surrogate
3335
if (code >= 0xD800 && code <= 0xDBFF) {
3436
// High surrogate must be followed by low surrogate
35-
if (i + 1 >= str.length) {
37+
if (i + 1 >= length) {
3638
return false; // Unpaired high surrogate at end
3739
}
3840
const nextCode = str.charCodeAt(i + 1);
@@ -57,7 +59,7 @@ export const validateStringFormat = (str: string, format: 'ascii' | 'utf8'): boo
5759
case 'ascii':
5860
return isAscii(str);
5961
case 'utf8':
60-
return isValidUtf8(str);
62+
return isUtf8(str);
6163
default:
6264
return true;
6365
}

0 commit comments

Comments
 (0)