Skip to content

Commit 2e358bc

Browse files
committed
[skip actions] [review_short_line] 2025-10-28T06:36:45+02:00
1 parent 6768d08 commit 2e358bc

File tree

2 files changed

+69
-20
lines changed

2 files changed

+69
-20
lines changed

review.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ set -x
88

99
find data -type f -exec chmod -x {} +
1010

11-
.venv/bin/python review_data.py meta data >review.$(date +%Y%m%d_%H%M%S).$(git rev-parse HEAD).$(git status --porcelain | grep -v '??' | wc -l).txt
11+
.venv/bin/python review_data.py meta data --short_line >review.$(date +%Y%m%d_%H%M%S).$(git rev-parse HEAD).$(git status --porcelain | grep -v '??' | wc -l).txt
1212

1313
.venv/bin/python -m benchmark --scanner credsweeper --load .ci/empty_report.json | tee .ci/benchmark.txt

review_data.py

Lines changed: 68 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
EXIT_SUCCESS = 0
2525
EXIT_FAILURE = 1
2626

27+
HUNK_SIZE = 120
28+
2729

2830
@functools.cache
2931
def get_excluding_extensions() -> set[str]:
@@ -39,7 +41,14 @@ def read_cache(path) -> list[str]:
3941
return f.read().replace("\r\n", '\n').replace('\r', '\n').split('\n')
4042

4143

42-
def read_data(path, line_start, line_end, value_start, value_end, ground_truth, creds: List[MetaCred]):
44+
def read_data(path: str,
45+
line_start: int,
46+
line_end: int,
47+
value_start: int,
48+
value_end: int,
49+
ground_truth: str,
50+
short_line: bool,
51+
creds: List[MetaCred]):
4352
lines = read_cache(path)
4453
if line_start == line_end:
4554
data_line = lines[line_start - 1]
@@ -62,6 +71,7 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
6271
line_found_in_cred = False
6372
correct_value_position = False
6473
if creds:
74+
# only if review with credsweeper report
6575
for cred in creds:
6676
if cred.path == path:
6777
if line_start == cred.line_start and line_end == cred.line_start:
@@ -93,20 +103,48 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
93103
line_found_in_cred = True
94104
correct_value_position = True
95105

96-
if 0 <= value_start and 0 <= value_end:
97-
line = data_line[:value_start] \
106+
if short_line:
107+
text_start = value_start - HUNK_SIZE if 0 < value_start - HUNK_SIZE else 0
108+
if 0 <= value_end and value_start <= multiline_end_offset + value_end:
109+
text_end = multiline_end_offset + value_end + HUNK_SIZE \
110+
if len(data_line) > multiline_end_offset + value_end + HUNK_SIZE \
111+
else len(data_line)
112+
elif value_end < 0 <= value_start:
113+
text_end = multiline_end_offset + value_start + HUNK_SIZE \
114+
if len(data_line) > multiline_end_offset + value_start + HUNK_SIZE \
115+
else len(data_line)
116+
elif 0 > value_start >= value_end:
117+
text_start = 0
118+
text_end = HUNK_SIZE if len(data_line) > HUNK_SIZE else len(data_line)
119+
else:
120+
raise ValueError(f"Cannot show {value_start} {value_end}")
121+
else:
122+
text_start = 0
123+
text_end = len(data_line)
124+
125+
if line_start == line_end and 0 <= value_start <= value_end \
126+
or line_start < line_end and 0 <= value_start and 0 <= value_end:
127+
line = data_line[text_start:value_start] \
98128
+ Back.LIGHTYELLOW_EX \
99129
+ data_line[value_start:value_end + multiline_end_offset] \
100130
+ Style.RESET_ALL \
101131
+ fore_style \
102-
+ data_line[value_end + multiline_end_offset:]
103-
elif value_start >= 0 > value_end:
104-
line = data_line[:value_start] \
132+
+ data_line[value_end + multiline_end_offset:text_end]
133+
elif value_end < 0 <= value_start:
134+
line = data_line[text_start:value_start] \
105135
+ Style.BRIGHT \
106-
+ data_line[value_start:]
136+
+ data_line[value_start:text_end]
137+
else:
138+
line = data_line[text_start:text_end]
139+
back_start_style = Back.LIGHTYELLOW_EX if Back.LIGHTYELLOW_EX in line else Style.RESET_ALL
140+
if line_start < line_end:
141+
line.replace('\n', Style.RESET_ALL + '\n' + fore_style + back_start_style)
142+
if '\n' in line:
143+
for n, i in enumerate(line.split('\n')):
144+
start_style = Style.RESET_ALL if 0 == n else back_start_style
145+
print(f"{n + line_start}:{start_style}{fore_style}{i}{Style.RESET_ALL}", flush=True)
107146
else:
108-
line = data_line
109-
print(f"{line_start}:{Style.RESET_ALL}{fore_style}{line}{Style.RESET_ALL}", flush=True)
147+
print(f"{line_start}:{Style.RESET_ALL}{fore_style}{line}{Style.RESET_ALL}", flush=True)
110148
if not correct_value_position:
111149
print("Possible wrong value markup", flush=True)
112150
if not line_found_in_cred:
@@ -115,7 +153,7 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
115153
test_line = data_line.lower()
116154
if not any(
117155
x in test_line for x in
118-
["api", "pass", "secret", "pw", "key", "credential", "token", "auth", "nonce", "salt", "cert"]
156+
["api", "pass", "secret", "pw", "key", "credential", "token", "auth", "nonce", "salt"]
119157
):
120158
repo_id = path.split('/')[1]
121159
subprocess.check_call(
@@ -128,10 +166,12 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
128166

129167
def review(meta_dir: str,
130168
data_dir: str,
169+
short_line: bool,
131170
check_only: bool,
132171
data_filter: dict,
172+
category: Optional[str] = None,
133173
load_json: Optional[str] = None,
134-
category: Optional[str] = None) -> int:
174+
) -> int:
135175
errors = 0
136176
duplicates = 0
137177
if not os.path.exists(meta_dir):
@@ -163,13 +203,15 @@ def review(meta_dir: str,
163203
if not check_only:
164204
print(str(row), flush=True)
165205
try:
166-
read_data(row.FilePath,
167-
row.LineStart,
168-
row.LineEnd,
169-
row.ValueStart,
170-
row.ValueEnd,
171-
row.GroundTruth,
172-
creds)
206+
read_data(path=row.FilePath,
207+
line_start=row.LineStart,
208+
line_end=row.LineEnd,
209+
value_start=row.ValueStart,
210+
value_end=row.ValueEnd,
211+
ground_truth=row.GroundTruth,
212+
short_line=short_line,
213+
creds=creds,
214+
)
173215
except Exception as exc:
174216
print(f"Failure {row}", exc, flush=True)
175217
errors += 1
@@ -240,6 +282,7 @@ def main(argv) -> int:
240282

241283
parser.add_argument("meta_dir", help="Markup location", nargs='?', default="meta")
242284
parser.add_argument("data_dir", help="Dataset location", nargs='?', default="data")
285+
parser.add_argument("--short_line", help="Reduce huge line in review", action='store_true')
243286
parser.add_argument("--check_only", help="Check meta markup only", action='store_true')
244287
parser.add_argument("-T", help="Show TRUE markup", action="store_true")
245288
parser.add_argument("-F", help="Show FALSE markup", action="store_true")
@@ -257,7 +300,13 @@ def main(argv) -> int:
257300
_data_filter["T"] = _args.T
258301
_data_filter["F"] = _args.F
259302
_data_filter["X"] = _args.X
260-
return review(_args.meta_dir, _args.data_dir, bool(_args.check_only), _data_filter, _args.load, _args.category)
303+
return review(meta_dir=_args.meta_dir,
304+
data_dir=_args.data_dir,
305+
short_line=bool(_args.short_line),
306+
check_only=bool(_args.check_only),
307+
data_filter=_data_filter,
308+
load_json=_args.load,
309+
category=_args.category)
261310

262311

263312
if __name__ == """__main__""":

0 commit comments

Comments
 (0)