11import atexit
22import json
33import os
4- import re
54import traceback
65from datetime import datetime
7- from multiprocessing import Process
86from urllib .parse import urlparse
97
10- from biz .utils .token_util import count_tokens , truncate_text_by_tokens
118from apscheduler .schedulers .background import BackgroundScheduler
129from apscheduler .triggers .cron import CronTrigger
1310from dotenv import load_dotenv
1411from flask import Flask , request , jsonify
1512
16- from biz .entity .review_entity import MergeRequestReviewEntity , PushReviewEntity
17- from biz .event .event_manager import event_manager
18- from biz .gitlab .webhook_handler import MergeRequestHandler , PushHandler
13+ from biz .gitlab .webhook_handler import slugify_url
14+ from biz .queue .worker import handle_merge_request_event , handle_push_event
1915from biz .service .review_service import ReviewService
20- from biz .utils .code_reviewer import CodeReviewer
2116from biz .utils .im import notifier
2217from biz .utils .log import logger
18+ from biz .utils .queue import handle_queue
2319from biz .utils .reporter import Reporter
2420
2521load_dotenv ("conf/.env" )
@@ -135,22 +131,23 @@ def handle_webhook():
135131 if not gitlab_token :
136132 return jsonify ({'message' : 'Missing GitLab access token' }), 400
137133
134+ gitlab_url_slug = slugify_url (gitlab_url )
135+
138136 # 打印整个payload数据,或根据需求进行处理
139137 logger .info (f'Received event: { object_kind } ' )
140138 logger .info (f'Payload: { json .dumps (data )} ' )
141139
142140 # 处理Merge Request Hook
143141 if object_kind == "merge_request" :
144142 # 创建一个新进程进行异步处理
145- process = Process (target = __handle_merge_request_event , args = (data , gitlab_token , gitlab_url ))
146- process .start ()
143+ handle_queue (handle_merge_request_event , data , gitlab_token , gitlab_url , gitlab_url_slug )
147144 # 立马返回响应
148145 return jsonify (
149146 {'message' : f'Request received(object_kind={ object_kind } ), will process asynchronously.' }), 200
150147 elif object_kind == "push" :
151148 # 创建一个新进程进行异步处理
152- process = Process ( target = __handle_push_event , args = ( data , gitlab_token , gitlab_url ))
153- process . start ( )
149+ # TODO check if PUSH_REVIEW_ENABLED is needed here
150+ handle_queue ( handle_push_event , data , gitlab_token , gitlab_url , gitlab_url_slug )
154151 # 立马返回响应
155152 return jsonify (
156153 {'message' : f'Request received(object_kind={ object_kind } ), will process asynchronously.' }), 200
@@ -162,171 +159,6 @@ def handle_webhook():
162159 return jsonify ({'message' : 'Invalid data format' }), 400
163160
164161
165- def slugify_url (original_url : str ) -> str :
166- """
167- 将原始URL转换为适合作为文件名的字符串,其中非字母或数字的字符会被替换为下划线,举例:
168- slugify_url("http://example.com/path/to/repo/") => example_com_path_to_repo
169- slugify_url("https://gitlab.com/user/repo.git") => gitlab_com_user_repo_git
170- """
171- # Remove URL scheme (http, https, etc.) if present
172- original_url = re .sub (r'^https?://' , '' , original_url )
173-
174- # Replace non-alphanumeric characters (except underscore) with underscores
175- target = re .sub (r'[^a-zA-Z0-9]' , '_' , original_url )
176-
177- # Remove trailing underscore if present
178- target = target .rstrip ('_' )
179-
180- return target
181-
182-
183- def __handle_push_event (webhook_data : dict , gitlab_token : str , gitlab_url : str ):
184- try :
185- handler = PushHandler (webhook_data , gitlab_token , gitlab_url )
186- logger .info ('Push Hook event received' )
187- commits = handler .get_push_commits ()
188- if not commits :
189- logger .error ('Failed to get commits' )
190- return
191-
192- review_result = None
193- score = 0
194- if PUSH_REVIEW_ENABLED :
195- # 获取PUSH的changes
196- changes = handler .get_push_changes ()
197- logger .info ('changes: %s' , changes )
198- changes = filter_changes (changes )
199- if not changes :
200- logger .info ('未检测到PUSH代码的修改,修改文件可能不满足SUPPORTED_EXTENSIONS。' )
201- return
202- review_result = "关注的文件没有修改"
203-
204- if len (changes ) > 0 :
205- commits_text = ';' .join (commit .get ('message' , '' ).strip () for commit in commits )
206- review_result = review_code (str (changes ), commits_text )
207- score = CodeReviewer .parse_review_score (review_text = review_result )
208- # 将review结果提交到Gitlab的 notes
209- handler .add_push_notes (f'Auto Review Result: \n { review_result } ' )
210-
211- event_manager ['push_reviewed' ].send (PushReviewEntity (
212- project_name = webhook_data ['project' ]['name' ],
213- author = webhook_data ['user_username' ],
214- branch = webhook_data ['project' ]['default_branch' ],
215- updated_at = int (datetime .now ().timestamp ()), # 当前时间
216- commits = commits ,
217- score = score ,
218- review_result = review_result ,
219- gitlab_url_slug = slugify_url (gitlab_url ),
220- ))
221-
222- except Exception as e :
223- error_message = f'服务出现未知错误: { str (e )} \n { traceback .format_exc ()} '
224- notifier .send_notification (content = error_message )
225- logger .error ('出现未知错误: %s' , error_message )
226-
227-
228- def __handle_merge_request_event (webhook_data : dict , gitlab_token : str , gitlab_url : str ):
229- '''
230- 处理Merge Request Hook事件
231- :param webhook_data:
232- :param gitlab_token:
233- :param gitlab_url:
234- :return:
235- '''
236- try :
237- # 解析Webhook数据
238- handler = MergeRequestHandler (webhook_data , gitlab_token , gitlab_url )
239- logger .info ('Merge Request Hook event received' )
240-
241- if (handler .action in ['open' , 'update' ]): # 仅仅在MR创建或更新时进行Code Review
242- # 获取Merge Request的changes
243- changes = handler .get_merge_request_changes ()
244- logger .info ('changes: %s' , changes )
245- changes = filter_changes (changes )
246- if not changes :
247- logger .info ('未检测到有关代码的修改,修改文件可能不满足SUPPORTED_EXTENSIONS。' )
248- return
249-
250- # 获取Merge Request的commits
251- commits = handler .get_merge_request_commits ()
252- if not commits :
253- logger .error ('Failed to get commits' )
254- return
255-
256- # review 代码
257- commits_text = ';' .join (commit ['title' ] for commit in commits )
258- review_result = review_code (str (changes ), commits_text )
259-
260- if "COT ABORT!" in review_result :
261- logger .error ('COT ABORT!' )
262- return
263-
264- # 将review结果提交到Gitlab的 notes
265- handler .add_merge_request_notes (f'Auto Review Result: \n { review_result } ' )
266-
267- # dispatch merge_request_reviewed event
268- event_manager ['merge_request_reviewed' ].send (
269- MergeRequestReviewEntity (
270- project_name = webhook_data ['project' ]['name' ],
271- author = webhook_data ['user' ]['username' ],
272- source_branch = webhook_data ['object_attributes' ]['source_branch' ],
273- target_branch = webhook_data ['object_attributes' ]['target_branch' ],
274- updated_at = int (datetime .now ().timestamp ()),
275- commits = commits ,
276- score = CodeReviewer .parse_review_score (review_text = review_result ),
277- url = webhook_data ['object_attributes' ]['url' ],
278- review_result = review_result ,
279- gitlab_url_slug = slugify_url (gitlab_url ),
280- )
281- )
282-
283- else :
284- logger .info (f"Merge Request Hook event, action={ handler .action } , ignored." )
285-
286- except Exception as e :
287- error_message = f'AI Code Review 服务出现未知错误: { str (e )} \n { traceback .format_exc ()} '
288- notifier .send_notification (content = error_message )
289- logger .error ('出现未知错误: %s' , error_message )
290-
291-
292- def filter_changes (changes : list ):
293- '''
294- 过滤数据,只保留支持的文件类型以及必要的字段信息
295- '''
296- filter_deleted_files_changes = [change for change in changes if change .get ("deleted_file" ) == False ]
297- # 从环境变量中获取支持的文件扩展名
298- SUPPORTED_EXTENSIONS = os .getenv ('SUPPORTED_EXTENSIONS' , '.java,.py,.php' ).split (',' )
299- # 过滤 `new_path` 以支持的扩展名结尾的元素, 仅保留diff和new_path字段
300- filtered_changes = [
301- {
302- 'diff' : item .get ('diff' , '' ),
303- 'new_path' : item ['new_path' ]
304- }
305- for item in filter_deleted_files_changes
306- if any (item .get ('new_path' , '' ).endswith (ext ) for ext in SUPPORTED_EXTENSIONS )
307- ]
308- return filtered_changes
309-
310-
311- def review_code (changes_text : str , commits_text : str = '' ) -> str :
312- # 如果超长,取前REVIEW_MAX_TOKENS个token
313- review_max_tokens = int (os .getenv ('REVIEW_MAX_TOKENS' , 10000 ))
314- # 如果changes为空,打印日志
315- if not changes_text :
316- logger .info ('代码为空, diffs_text = %' , str (changes_text ))
317- return '代码为空'
318-
319- # 计算tokens数量,如果超过REVIEW_MAX_TOKENS,截断changes_text
320- tokens_count = count_tokens (changes_text )
321- if tokens_count > review_max_tokens :
322- changes_text = truncate_text_by_tokens (changes_text , review_max_tokens )
323-
324- review_result = CodeReviewer ().review_code (changes_text , commits_text ).strip ()
325- if review_result .startswith ("```markdown" ) and review_result .endswith ("```" ):
326- return review_result [11 :- 3 ].strip ()
327- return review_result
328-
329-
330162if __name__ == '__main__' :
331163 # 启动定时任务调度器
332164 setup_scheduler ()
0 commit comments