diff --git a/docs/plans/006-matcher-readability-cleanup.md b/docs/plans/006-matcher-readability-cleanup.md new file mode 100644 index 0000000..2bd774b --- /dev/null +++ b/docs/plans/006-matcher-readability-cleanup.md @@ -0,0 +1,54 @@ +# Matcher Readability Cleanup + +## Approach + +Refactor matcher regular expression construction so the current field, value, +and removal fragments are named and easier to review. This preserves matcher +exports, function signatures, capture groups, flags, and matching behavior while +making later parser-first exploration less risky. + +## Pre-implementation + +Create branch `refactor/matcher-readability-cleanup` from `main` after merging +the hardening regression coverage branch. + +## Steps + +1. `docs/plans/006-matcher-readability-cleanup.md` - add this plan for the + behavior-preserving matcher cleanup. +2. `src/matchers.ts` - name local regex fragments for form-encoded, JSON-like, + and escaped JSON-like matchers without changing exported APIs or regex + behavior. + +## Relevant Files + +- `docs/plans/006-matcher-readability-cleanup.md` - new plan for this refactor + slice. +- `src/matchers.ts` - updated matcher construction with named local fragments. + +## Verification + +1. Run `yarn test`. +2. Run `yarn test:coverage`. +3. Run `yarn lint`. +4. Run `yarn format:check`. +5. Run `yarn build`. +6. Re-run workspace diagnostics for touched plan and source files. + +## Decisions + +**Behavior-preserving only** - this branch keeps current regex semantics, even +where future work may choose a parser-first approach, so existing consumers see +no compatibility change. + +**Preserve capture groups** - masking relies on `$1` and `$2` replacement groups +in `stringReplacer`, so local fragment extraction must keep group ordering +intact. + +**Keep regex assembly local** - each matcher keeps its format-specific regex +pieces near the final `RegExp` construction so reviewers can inspect behavior +without jumping through a helper layer. + +**Keep parser-first work separate** - JSON parsing can affect whitespace, +serialization, and malformed string handling, so it remains a later exploration +rather than part of this readability refactor. diff --git a/src/matchers.ts b/src/matchers.ts index d2d5b30..df6d1c4 100644 --- a/src/matchers.ts +++ b/src/matchers.ts @@ -1,5 +1,7 @@ import { DataSanitizationMatcher } from './types'; +const MATCHER_FLAGS = 'gi'; + /** * Escapes regular expression metacharacters in a pattern string. * @@ -52,13 +54,20 @@ const formEncodedMatcher: DataSanitizationMatcher = ( remove = false, ) => { const escaped = escapePattern(pattern); + const fieldName = `\\w*${escaped}\\w*`; + const fieldPrefix = `${fieldName}[=:]`; + const fieldValue = '[^&]*'; + if (remove) { - return new RegExp( - `&\\w*${escaped}\\w*[=:][^&]*|\\w*${escaped}\\w*[=:][^&]*&?`, - 'gi', - ); + const removeLeadingField = `&${fieldPrefix}${fieldValue}`; + const removeField = `${fieldPrefix}${fieldValue}&?`; + + return new RegExp(`${removeLeadingField}|${removeField}`, MATCHER_FLAGS); } - return new RegExp(`(\\w*${escaped}\\w*[=:])[^&]*(&|$)`, 'gi'); + + const maskField = `(${fieldPrefix})${fieldValue}(&|$)`; + + return new RegExp(maskField, MATCHER_FLAGS); }; /** @@ -95,13 +104,21 @@ const formEncodedMatcher: DataSanitizationMatcher = ( */ const jsonMatcher: DataSanitizationMatcher = (pattern, remove = false) => { const escaped = escapePattern(pattern); + const fieldName = `\\w*${escaped}\\w*`; + if (remove) { - return new RegExp( - `,\\s*"\\w*${escaped}\\w*"\\s*:\\s*"[^"]*"|"\\w*${escaped}\\w*"\\s*:\\s*"[^"]*"\\s*,?`, - 'gi', - ); + const fieldPrefix = `"${fieldName}"\\s*:\\s*"`; + const fieldValue = '[^"]*"'; + const removeLeadingField = `,\\s*${fieldPrefix}${fieldValue}`; + const removeField = `${fieldPrefix}${fieldValue}\\s*,?`; + + return new RegExp(`${removeLeadingField}|${removeField}`, MATCHER_FLAGS); } - return new RegExp(`("\\w*${escaped}\\w*"?:\\s*").+?(")`, 'gi'); + + const fieldPrefix = `"${fieldName}"?:\\s*"`; + const maskField = `(${fieldPrefix}).+?(")`; + + return new RegExp(maskField, MATCHER_FLAGS); }; /** @@ -126,16 +143,20 @@ const escapedJsonMatcher: DataSanitizationMatcher = ( remove = false, ) => { const escaped = escapePattern(pattern); + const fieldName = `\\w*${escaped}\\w*`; + const fieldPrefix = `\\\\"${fieldName}\\\\"\\s*:\\s*\\\\"`; + if (remove) { - return new RegExp( - `,\\s*\\\\"\\w*${escaped}\\w*\\\\"\\s*:\\s*\\\\"[^\\\\"]*\\\\"|\\\\"\\w*${escaped}\\w*\\\\"\\s*:\\s*\\\\"[^\\\\"]*\\\\"\\s*,?`, - 'gi', - ); + const fieldValue = '[^\\\\"]*\\\\"'; + const removeLeadingField = `,\\s*${fieldPrefix}${fieldValue}`; + const removeField = `${fieldPrefix}${fieldValue}\\s*,?`; + + return new RegExp(`${removeLeadingField}|${removeField}`, MATCHER_FLAGS); } - return new RegExp( - `(\\\\"\\w*${escaped}\\w*\\\\"\\s*:\\s*\\\\").+?(\\\\")`, - 'gi', - ); + + const maskField = `(${fieldPrefix}).+?(\\\\")`; + + return new RegExp(maskField, MATCHER_FLAGS); }; const defaultMatchers = [formEncodedMatcher, jsonMatcher, escapedJsonMatcher];