2424EXIT_SUCCESS = 0
2525EXIT_FAILURE = 1
2626
27+ HUNK_SIZE = 120
28+
2729
2830@functools .cache
2931def get_excluding_extensions () -> set [str ]:
@@ -39,7 +41,14 @@ def read_cache(path) -> list[str]:
3941 return f .read ().replace ("\r \n " , '\n ' ).replace ('\r ' , '\n ' ).split ('\n ' )
4042
4143
42- def read_data (path , line_start , line_end , value_start , value_end , ground_truth , creds : List [MetaCred ]):
44+ def read_data (path : str ,
45+ line_start : int ,
46+ line_end : int ,
47+ value_start : int ,
48+ value_end : int ,
49+ ground_truth : str ,
50+ short_line : bool ,
51+ creds : List [MetaCred ]):
4352 lines = read_cache (path )
4453 if line_start == line_end :
4554 data_line = lines [line_start - 1 ]
@@ -62,6 +71,7 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
6271 line_found_in_cred = False
6372 correct_value_position = False
6473 if creds :
74+ # only if review with credsweeper report
6575 for cred in creds :
6676 if cred .path == path :
6777 if line_start == cred .line_start and line_end == cred .line_start :
@@ -93,20 +103,48 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
93103 line_found_in_cred = True
94104 correct_value_position = True
95105
96- if 0 <= value_start and 0 <= value_end :
97- line = data_line [:value_start ] \
106+ if short_line :
107+ text_start = value_start - HUNK_SIZE if 0 < value_start - HUNK_SIZE else 0
108+ if 0 <= value_end and value_start <= multiline_end_offset + value_end :
109+ text_end = multiline_end_offset + value_end + HUNK_SIZE \
110+ if len (data_line ) > multiline_end_offset + value_end + HUNK_SIZE \
111+ else len (data_line )
112+ elif value_end < 0 <= value_start :
113+ text_end = multiline_end_offset + value_start + HUNK_SIZE \
114+ if len (data_line ) > multiline_end_offset + value_start + HUNK_SIZE \
115+ else len (data_line )
116+ elif 0 > value_start >= value_end :
117+ text_start = 0
118+ text_end = HUNK_SIZE if len (data_line ) > HUNK_SIZE else len (data_line )
119+ else :
120+ raise ValueError (f"Cannot show { value_start } { value_end } " )
121+ else :
122+ text_start = 0
123+ text_end = len (data_line )
124+
125+ if line_start == line_end and 0 <= value_start <= value_end \
126+ or line_start < line_end and 0 <= value_start and 0 <= value_end :
127+ line = data_line [text_start :value_start ] \
98128 + Back .LIGHTYELLOW_EX \
99129 + data_line [value_start :value_end + multiline_end_offset ] \
100130 + Style .RESET_ALL \
101131 + fore_style \
102- + data_line [value_end + multiline_end_offset :]
103- elif value_start >= 0 > value_end :
104- line = data_line [:value_start ] \
132+ + data_line [value_end + multiline_end_offset :text_end ]
133+ elif value_end < 0 <= value_start :
134+ line = data_line [text_start :value_start ] \
105135 + Style .BRIGHT \
106- + data_line [value_start :]
136+ + data_line [value_start :text_end ]
137+ else :
138+ line = data_line [text_start :text_end ]
139+ back_start_style = Back .LIGHTYELLOW_EX if Back .LIGHTYELLOW_EX in line else Style .RESET_ALL
140+ if line_start < line_end :
141+ line .replace ('\n ' , Style .RESET_ALL + '\n ' + fore_style + back_start_style )
142+ if '\n ' in line :
143+ for n , i in enumerate (line .split ('\n ' )):
144+ start_style = Style .RESET_ALL if 0 == n else back_start_style
145+ print (f"{ n + line_start } :{ start_style } { fore_style } { i } { Style .RESET_ALL } " , flush = True )
107146 else :
108- line = data_line
109- print (f"{ line_start } :{ Style .RESET_ALL } { fore_style } { line } { Style .RESET_ALL } " , flush = True )
147+ print (f"{ line_start } :{ Style .RESET_ALL } { fore_style } { line } { Style .RESET_ALL } " , flush = True )
110148 if not correct_value_position :
111149 print ("Possible wrong value markup" , flush = True )
112150 if not line_found_in_cred :
@@ -115,7 +153,7 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
115153 test_line = data_line .lower ()
116154 if not any (
117155 x in test_line for x in
118- ["api" , "pass" , "secret" , "pw" , "key" , "credential" , "token" , "auth" , "nonce" , "salt" , "cert" ]
156+ ["api" , "pass" , "secret" , "pw" , "key" , "credential" , "token" , "auth" , "nonce" , "salt" ]
119157 ):
120158 repo_id = path .split ('/' )[1 ]
121159 subprocess .check_call (
@@ -128,10 +166,12 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
128166
129167def review (meta_dir : str ,
130168 data_dir : str ,
169+ short_line : bool ,
131170 check_only : bool ,
132171 data_filter : dict ,
172+ category : Optional [str ] = None ,
133173 load_json : Optional [str ] = None ,
134- category : Optional [ str ] = None ) -> int :
174+ ) -> int :
135175 errors = 0
136176 duplicates = 0
137177 if not os .path .exists (meta_dir ):
@@ -163,13 +203,15 @@ def review(meta_dir: str,
163203 if not check_only :
164204 print (str (row ), flush = True )
165205 try :
166- read_data (row .FilePath ,
167- row .LineStart ,
168- row .LineEnd ,
169- row .ValueStart ,
170- row .ValueEnd ,
171- row .GroundTruth ,
172- creds )
206+ read_data (path = row .FilePath ,
207+ line_start = row .LineStart ,
208+ line_end = row .LineEnd ,
209+ value_start = row .ValueStart ,
210+ value_end = row .ValueEnd ,
211+ ground_truth = row .GroundTruth ,
212+ short_line = short_line ,
213+ creds = creds ,
214+ )
173215 except Exception as exc :
174216 print (f"Failure { row } " , exc , flush = True )
175217 errors += 1
@@ -240,6 +282,7 @@ def main(argv) -> int:
240282
241283 parser .add_argument ("meta_dir" , help = "Markup location" , nargs = '?' , default = "meta" )
242284 parser .add_argument ("data_dir" , help = "Dataset location" , nargs = '?' , default = "data" )
285+ parser .add_argument ("--short_line" , help = "Reduce huge line in review" , action = 'store_true' )
243286 parser .add_argument ("--check_only" , help = "Check meta markup only" , action = 'store_true' )
244287 parser .add_argument ("-T" , help = "Show TRUE markup" , action = "store_true" )
245288 parser .add_argument ("-F" , help = "Show FALSE markup" , action = "store_true" )
@@ -257,7 +300,13 @@ def main(argv) -> int:
257300 _data_filter ["T" ] = _args .T
258301 _data_filter ["F" ] = _args .F
259302 _data_filter ["X" ] = _args .X
260- return review (_args .meta_dir , _args .data_dir , bool (_args .check_only ), _data_filter , _args .load , _args .category )
303+ return review (meta_dir = _args .meta_dir ,
304+ data_dir = _args .data_dir ,
305+ short_line = bool (_args .short_line ),
306+ check_only = bool (_args .check_only ),
307+ data_filter = _data_filter ,
308+ load_json = _args .load ,
309+ category = _args .category )
261310
262311
263312if __name__ == """__main__""" :
0 commit comments