Skip to content

Commit ae20207

Browse files
committed
improving normalization
1 parent 7cf4143 commit ae20207

File tree

2 files changed

+85
-28
lines changed

2 files changed

+85
-28
lines changed

ddtrace/internal/process_tags/__init__.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,42 @@
1616
ENTRYPOINT_TYPE_SCRIPT = "script"
1717
ENTRYPOINT_BASEDIR_TAG = "entrypoint.basedir"
1818

19-
_INVALID_CHARS_PATTERN = re.compile(r"[^a-z0-9/._-]")
2019
_CONSECUTIVE_UNDERSCORES_PATTERN = re.compile(r"_{2,}")
20+
_ALLOWED_CHARS = _ALLOWED_CHARS = frozenset("abcdefghijklmnopqrstuvwxyz0123456789/:._-")
21+
MAX_LENGTH = 200
2122

2223

23-
def normalize_tag(value: str) -> str:
24-
normalized = _INVALID_CHARS_PATTERN.sub("_", value.lower())
25-
normalized = _CONSECUTIVE_UNDERSCORES_PATTERN.sub("_", normalized)
26-
return normalized.strip("_")
24+
def normalize_tag_value(value: str) -> str:
25+
# we copy the behavior of the agent which
26+
# checks the size on the original value and not on
27+
# an intermediary normalized step
28+
if len(value) > MAX_LENGTH:
29+
value = value[:MAX_LENGTH]
30+
31+
result = value.lower()
32+
33+
def is_allowed_char(char: str) -> str:
34+
# ASCII alphanumeric and special chars: / : . _ -
35+
if char in _ALLOWED_CHARS:
36+
return char
37+
# Unicode letters and digits
38+
if char.isalpha() or char.isdigit():
39+
return char
40+
return "_"
41+
42+
result = "".join(is_allowed_char(char) for char in result)
43+
result = _CONSECUTIVE_UNDERSCORES_PATTERN.sub("_", result)
44+
return result.strip("_")
2745

2846

2947
def generate_process_tags() -> Optional[str]:
48+
print(config._process_tags_enabled)
3049
if not config._process_tags_enabled:
3150
return None
3251

3352
try:
3453
return ",".join(
35-
f"{key}:{normalize_tag(value)}"
54+
f"{key}:{normalize_tag_value(value)}"
3655
for key, value in sorted(
3756
[
3857
(ENTRYPOINT_WORKDIR_TAG, os.path.basename(os.getcwd())),

tests/internal/test_process_tags.py

Lines changed: 60 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,37 +3,73 @@
33
import pytest
44

55
from ddtrace.internal import process_tags
6-
from ddtrace.internal.process_tags import normalize_tag
6+
from ddtrace.internal.process_tags import normalize_tag_value
77
from ddtrace.internal.settings._config import config
88
from tests.subprocesstest import run_in_subprocess
99
from tests.utils import TracerTestCase
1010
from tests.utils import process_tag_reload
1111

1212

13+
TEST_SCRIPT_PATH = "/path/to/test_script.py"
14+
TEST_WORKDIR_PATH = "/path/to/workdir"
15+
16+
1317
@pytest.mark.parametrize(
1418
"input_tag,expected",
1519
[
16-
("HelloWorld", "helloworld"),
17-
("Hello@World!", "hello_world"),
18-
("HeLLo123", "hello123"),
19-
("hello world", "hello_world"),
20-
("a/b.c_d-e", "a/b.c_d-e"),
21-
("héllø", "h_ll"),
20+
# # Additional test cases from Go implementation
21+
("#test_starting_hash", "test_starting_hash"),
22+
("TestCAPSandSuch", "testcapsandsuch"),
23+
("Test Conversion Of Weird !@#$%^&**() Characters", "test_conversion_of_weird_characters"),
24+
("$#weird_starting", "weird_starting"),
25+
("allowed:c0l0ns", "allowed:c0l0ns"),
26+
("1love", "1love"),
27+
("/love2", "/love2"),
28+
("ünicöde", "ünicöde"),
29+
("ünicöde:metäl", "ünicöde:metäl"),
30+
("Data🐨dog🐶 繋がっ⛰てて", "data_dog_繋がっ_てて"),
31+
(" spaces ", "spaces"),
32+
(" #hashtag!@#spaces #__<># ", "hashtag_spaces"),
33+
(":testing", ":testing"),
34+
("_foo", "foo"),
35+
(":::test", ":::test"),
36+
("contiguous_____underscores", "contiguous_underscores"),
37+
("foo_", "foo"),
38+
("\u017Fodd_\u017Fcase\u017F", "\u017Fodd_\u017Fcase\u017F"),
2239
("", ""),
23-
("💡⚡️", ""),
24-
("!foo@", "foo"),
25-
("123_abc.DEF-ghi/jkl", "123_abc.def-ghi/jkl"),
26-
("Env:Prod-Server#1", "env_prod-server_1"),
27-
("__hello__world__", "hello_world"),
28-
("___test___", "test"),
29-
("_leading", "leading"),
30-
("trailing_", "trailing"),
31-
("double__underscore", "double_underscore"),
40+
(" ", ""),
41+
("ok", "ok"),
42+
("™Ö™Ö™™Ö™", "ö_ö_ö"),
43+
("AlsO:ök", "also:ök"),
44+
(":still_ok", ":still_ok"),
45+
("___trim", "trim"),
46+
("12.:trim@", "12.:trim"),
47+
("12.:trim@@", "12.:trim"),
48+
("fun:ky__tag/1", "fun:ky_tag/1"),
49+
("fun:ky@tag/2", "fun:ky_tag/2"),
50+
("fun:ky@@@tag/3", "fun:ky_tag/3"),
51+
("tag:1/2.3", "tag:1/2.3"),
52+
("---fun:k####y_ta@#g/1_@@#", "---fun:k_y_ta_g/1"),
53+
("AlsO:œ#@ö))œk", "also:œ_ö_œk"),
3254
("test\x99\x8faaa", "test_aaa"),
55+
("test\x99\x8f", "test"),
56+
("a" * 888, "a" * 200),
57+
("a" + "🐶" * 799 + "b", "a"),
58+
("a" + "\ufffd", "a"),
59+
("a" + "\ufffd" + "\ufffd", "a"),
60+
("a" + "\ufffd" + "\ufffd" + "b", "a_b"),
61+
(
62+
"A00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
63+
"000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
64+
" 000000000000",
65+
"a00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
66+
"000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
67+
"_0",
68+
),
3369
],
3470
)
3571
def test_normalize_tag(input_tag, expected):
36-
assert normalize_tag(input_tag) == expected
72+
assert normalize_tag_value(input_tag) == expected
3773

3874

3975
class TestProcessTags(TracerTestCase):
@@ -57,7 +93,7 @@ def test_process_tags_deactivated(self):
5793

5894
@pytest.mark.snapshot
5995
def test_process_tags_activated(self):
60-
with patch("sys.argv", ["/path/to/test_script.py"]), patch("os.getcwd", return_value="/path/to/workdir"):
96+
with patch("sys.argv", [TEST_SCRIPT_PATH]), patch("os.getcwd", return_value=TEST_WORKDIR_PATH):
6197
config._process_tags_enabled = True
6298
process_tag_reload()
6399

@@ -67,7 +103,7 @@ def test_process_tags_activated(self):
67103

68104
@pytest.mark.snapshot
69105
def test_process_tags_edge_case(self):
70-
with patch("sys.argv", ["/test_script"]), patch("os.getcwd", return_value="/path/to/workdir"):
106+
with patch("sys.argv", ["/test_script"]), patch("os.getcwd", return_value=TEST_WORKDIR_PATH):
71107
config._process_tags_enabled = True
72108
process_tag_reload()
73109

@@ -76,7 +112,7 @@ def test_process_tags_edge_case(self):
76112

77113
@pytest.mark.snapshot
78114
def test_process_tags_error(self):
79-
with patch("sys.argv", []), patch("os.getcwd", return_value="/path/to/workdir"):
115+
with patch("sys.argv", []), patch("os.getcwd", return_value=TEST_WORKDIR_PATH):
80116
config._process_tags_enabled = True
81117

82118
with self.override_global_config(dict(_telemetry_enabled=False)):
@@ -89,12 +125,14 @@ def test_process_tags_error(self):
89125
# Check if debug log was called
90126
mock_log.debug.assert_called_once()
91127
call_args = mock_log.debug.call_args[0]
92-
assert "failed to get process_tags" in call_args[0]
128+
assert (
129+
"failed to get process_tags" in call_args[0]
130+
), f"Expected error message not found. Got: {call_args[0]}"
93131

94132
@pytest.mark.snapshot
95133
@run_in_subprocess(env_overrides=dict(DD_TRACE_PARTIAL_FLUSH_ENABLED="true", DD_TRACE_PARTIAL_FLUSH_MIN_SPANS="2"))
96134
def test_process_tags_partial_flush(self):
97-
with patch("sys.argv", ["/path/to/test_script.py"]), patch("os.getcwd", return_value="/path/to/workdir"):
135+
with patch("sys.argv", [TEST_SCRIPT_PATH]), patch("os.getcwd", return_value=TEST_WORKDIR_PATH):
98136
config._process_tags_enabled = True
99137
process_tag_reload()
100138

0 commit comments

Comments
 (0)