Skip to content

Commit 0e92d05

Browse files
authored
chore(iast): query string redaction synchronization (#15366)
Implements query string and vulnerability evidence redaction synchronization to address inconsistencies where query strings could be visible in span tags but redacted in IAST evidence (or vice versa). This PR adds pattern harmonization by making the IAST evidence redactor aware of the query string obfuscation pattern (`DD_TRACE_OBFUSCATION_QUERY_STRING_REGEXP`). When sources originate from query strings (`OriginType.QUERY`), they are now checked against the same pattern used at the span level, ensuring consistent redaction across both systems. APPSEC-52879
1 parent 8010efb commit 0e92d05

File tree

7 files changed

+494
-11
lines changed

7 files changed

+494
-11
lines changed

ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
import string
33

44
from ddtrace.internal.logger import get_logger
5+
from ddtrace.internal.settings._config import config
56
from ddtrace.internal.settings.asm import config as asm_config
67

8+
from .._taint_tracking import OriginType
79
from .._utils import _get_source_index
810
from ..constants import VULN_CMDI
911
from ..constants import VULN_CODE_INJECTION
@@ -41,6 +43,8 @@ class SensitiveHandler:
4143
def __init__(self):
4244
self._name_pattern = re.compile(asm_config._iast_redaction_name_pattern, re.IGNORECASE | re.MULTILINE)
4345
self._value_pattern = re.compile(asm_config._iast_redaction_value_pattern, re.IGNORECASE | re.MULTILINE)
46+
# Query string obfuscation pattern for synchronization with span-level redaction
47+
self._query_string_pattern = config._obfuscation_query_string_pattern
4448

4549
self._sensitive_analyzers = {
4650
VULN_CMDI: command_injection_sensitive_analyzer,
@@ -131,6 +135,21 @@ def is_sensible_value(self, value):
131135
"""
132136
return bool(self._value_pattern.search(value))
133137

138+
def is_query_string_source(self, source):
139+
"""
140+
Checks if a source originates from a query string.
141+
142+
Args:
143+
- source: The source to check.
144+
145+
Returns:
146+
- bool: True if the source is from a query string, False otherwise.
147+
"""
148+
try:
149+
return source is not None and hasattr(source, "origin") and source.origin == OriginType.QUERY
150+
except Exception:
151+
return False
152+
134153
def is_sensible_source(self, source):
135154
"""
136155
Checks if a source is sensible.
@@ -141,11 +160,22 @@ def is_sensible_source(self, source):
141160
Returns:
142161
- bool: True if the source is sensible, False otherwise.
143162
"""
144-
return (
145-
source is not None
146-
and source.value is not None
147-
and (self.is_sensible_name(source.name) or self.is_sensible_value(source.value))
148-
)
163+
if source is None or source.value is None:
164+
return False
165+
166+
# For query string sources, check against the query string obfuscation pattern
167+
# to maintain synchronization with span-level redaction
168+
if self.is_query_string_source(source) and self._query_string_pattern is not None:
169+
try:
170+
# Convert pattern to string for matching (pattern is in bytes, source value is string)
171+
value_bytes = source.value if isinstance(source.value, bytes) else source.value.encode("utf-8")
172+
if self._query_string_pattern.search(value_bytes):
173+
return True
174+
except Exception:
175+
log.debug("Error checking query string pattern against source", exc_info=True)
176+
177+
# Standard IAST redaction patterns
178+
return self.is_sensible_name(source.name) or self.is_sensible_value(source.value)
149179

150180
def scrub_evidence(self, vulnerability_type, evidence, tainted_ranges, sources):
151181
"""
@@ -166,7 +196,10 @@ def scrub_evidence(self, vulnerability_type, evidence, tainted_ranges, sources):
166196
if not evidence.value:
167197
log.debug("No evidence value found in evidence %s", evidence)
168198
return None
169-
sensitive_ranges = sensitive_analyzer(evidence, self._name_pattern, self._value_pattern)
199+
# Pass query string pattern for synchronization with span-level redaction
200+
sensitive_ranges = sensitive_analyzer(
201+
evidence, self._name_pattern, self._value_pattern, self._query_string_pattern
202+
)
170203
return self.to_redacted_json(evidence.value, sensitive_ranges, tainted_ranges, sources)
171204
return None
172205

ddtrace/appsec/_iast/_evidence_redaction/command_injection_sensitive_analyzer.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,19 @@
1010
pattern = re.compile(COMMAND_PATTERN, re.IGNORECASE | re.MULTILINE)
1111

1212

13-
def command_injection_sensitive_analyzer(evidence, name_pattern=None, value_pattern=None):
13+
def command_injection_sensitive_analyzer(evidence, name_pattern=None, value_pattern=None, query_string_pattern=None):
14+
"""
15+
Command injection sensitive analyzer for evidence redaction.
16+
17+
Args:
18+
- evidence: The evidence to analyze
19+
- name_pattern: Pattern for matching sensitive names (unused in command injection analyzer)
20+
- value_pattern: Pattern for matching sensitive values (unused in command injection analyzer)
21+
- query_string_pattern: Query string obfuscation pattern (unused in command injection analyzer)
22+
23+
Returns:
24+
- list: List of sensitive ranges to redact
25+
"""
1426
regex_result = pattern.search(evidence.value)
1527
if regex_result and len(regex_result.groups()) > 0:
1628
start = regex_result.start(1)

ddtrace/appsec/_iast/_evidence_redaction/default_sensitive_analyzer.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,19 @@
44
log = get_logger(__name__)
55

66

7-
def default_sensitive_analyzer(evidence, name_pattern, value_pattern):
7+
def default_sensitive_analyzer(evidence, name_pattern, value_pattern, query_string_pattern=None):
8+
"""
9+
Default sensitive analyzer for evidence redaction.
10+
11+
Args:
12+
- evidence: The evidence to analyze
13+
- name_pattern: Pattern for matching sensitive names
14+
- value_pattern: Pattern for matching sensitive values
15+
- query_string_pattern: Query string obfuscation pattern (unused in default analyzer)
16+
17+
Returns:
18+
- list: List of sensitive ranges to redact
19+
"""
820
if name_pattern.search(evidence.value) or value_pattern.search(evidence.value):
921
return [{"start": 0, "end": len(evidence.value)}]
1022

ddtrace/appsec/_iast/_evidence_redaction/header_injection_sensitive_analyzer.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,19 @@
55
log = get_logger(__name__)
66

77

8-
def header_injection_sensitive_analyzer(evidence, name_pattern, value_pattern):
8+
def header_injection_sensitive_analyzer(evidence, name_pattern, value_pattern, query_string_pattern=None):
9+
"""
10+
Header injection sensitive analyzer for evidence redaction.
11+
12+
Args:
13+
- evidence: The evidence to analyze
14+
- name_pattern: Pattern for matching sensitive names
15+
- value_pattern: Pattern for matching sensitive values
16+
- query_string_pattern: Query string obfuscation pattern (unused in header injection analyzer)
17+
18+
Returns:
19+
- list: List of sensitive ranges to redact
20+
"""
921
evidence_value = evidence.value
1022
sections = evidence_value.split(HEADER_NAME_VALUE_SEPARATOR)
1123
header_name = sections[0]

ddtrace/appsec/_iast/_evidence_redaction/sql_sensitive_analyzer.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,19 @@
4141
patterns[DBAPI_MYSQLDB] = patterns[DBAPI_MYSQL]
4242

4343

44-
def sql_sensitive_analyzer(evidence, name_pattern, value_pattern):
44+
def sql_sensitive_analyzer(evidence, name_pattern, value_pattern, query_string_pattern=None):
45+
"""
46+
SQL sensitive analyzer for evidence redaction.
47+
48+
Args:
49+
- evidence: The evidence to analyze
50+
- name_pattern: Pattern for matching sensitive names
51+
- value_pattern: Pattern for matching sensitive values
52+
- query_string_pattern: Query string obfuscation pattern (unused in SQL analyzer)
53+
54+
Returns:
55+
- list: List of sensitive ranges to redact
56+
"""
4557
pattern = patterns.get(evidence.dialect, patterns[DBAPI_MYSQL])
4658
tokens = []
4759

ddtrace/appsec/_iast/_evidence_redaction/url_sensitive_analyzer.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,52 @@ def find_query_fragment(ranges, evidence):
2929
regex_result = QUERY_FRAGMENT_PATTERN.search(evidence.value, regex_result.end())
3030

3131

32-
def url_sensitive_analyzer(evidence, name_pattern=None, value_pattern=None):
32+
def find_query_string_matches(ranges, evidence, query_string_pattern):
33+
"""
34+
Find sensitive data in query string using the query string obfuscation pattern.
35+
This ensures synchronization with span-level query string redaction.
36+
"""
37+
if query_string_pattern is None:
38+
return
39+
40+
try:
41+
# Extract query string portion from URL
42+
if "?" not in evidence.value:
43+
return
44+
45+
# Find the query string part
46+
query_start = evidence.value.find("?")
47+
query_end = evidence.value.find("#") if "#" in evidence.value else len(evidence.value)
48+
query_string = evidence.value[query_start:query_end]
49+
50+
# Convert to bytes for pattern matching (query string pattern is in bytes)
51+
query_bytes = query_string if isinstance(query_string, bytes) else query_string.encode("utf-8")
52+
53+
# Find all matches
54+
for match in query_string_pattern.finditer(query_bytes):
55+
start = query_start + match.start()
56+
end = query_start + match.end()
57+
ranges.append({"start": start, "end": end})
58+
except Exception:
59+
log.debug("Error applying query string pattern to URL evidence", exc_info=True)
60+
61+
62+
def url_sensitive_analyzer(evidence, name_pattern=None, value_pattern=None, query_string_pattern=None):
63+
"""
64+
Analyzes URL evidence for sensitive information.
65+
66+
Args:
67+
- evidence: The evidence to analyze
68+
- name_pattern: Pattern for matching sensitive names
69+
- value_pattern: Pattern for matching sensitive values
70+
- query_string_pattern: Pattern for matching sensitive query strings (for synchronization)
71+
72+
Returns:
73+
- list: List of sensitive ranges to redact
74+
"""
3375
ranges = []
3476
find_authority(ranges, evidence)
3577
find_query_fragment(ranges, evidence)
78+
# Apply query string pattern for synchronization with span-level redaction
79+
find_query_string_matches(ranges, evidence, query_string_pattern)
3680
return ranges

0 commit comments

Comments
 (0)