From 4605b0d5b8708429c2a91a355c9c25d9ddc63519 Mon Sep 17 00:00:00 2001 From: Jose Rego Date: Wed, 12 Nov 2025 11:12:14 +0000 Subject: [PATCH] feat: pr files --- properties.sample.json | 56 ++++++++++++++++++++++++++++++++ tap_github/schemas/pr_files.json | 21 ++++++++++++ tap_github/streams.py | 27 ++++++++++++++- 3 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 properties.sample.json create mode 100644 tap_github/schemas/pr_files.json diff --git a/properties.sample.json b/properties.sample.json new file mode 100644 index 00000000..dc7bae04 --- /dev/null +++ b/properties.sample.json @@ -0,0 +1,56 @@ +{ + "streams": [ + { + "stream": "pull_requests", + "tap_stream_id": "pull_requests", + "key_properties": ["id"], + "schema": { + "properties": { + "id": {"type": ["null", "integer"]}, + "number": {"type": ["null", "integer"]}, + "updated_at": {"type": ["null", "string"], "format": "date-time"} + } + }, + "metadata": [ + { + "breadcrumb": [], + "metadata": { + "selected": true, + "tap-stream-id": "pull_requests", + "table-key-properties": ["id"], + "valid-replication-keys": ["updated_at"], + "replication-method": "INCREMENTAL" + } + }, + {"breadcrumb": ["properties", "id"], "metadata": {"inclusion": "automatic"}}, + {"breadcrumb": ["properties", "number"], "metadata": {"inclusion": "automatic"}}, + {"breadcrumb": ["properties", "updated_at"], "metadata": {"inclusion": "automatic"}} + ] + }, + { + "stream": "pr_files", + "tap_stream_id": "pr_files", + "key_properties": ["id"], + "schema": { + "properties": { + "id": {"type": ["null", "string"]}, + "updated_at": {"type": ["null", "string"], "format": "date-time"}, + "filename": {"type": ["null", "string"]} + } + }, + "metadata": [ + { + "breadcrumb": [], + "metadata": { + "selected": true, + "tap-stream-id": "pr_files", + "table-key-properties": ["id"], + "valid-replication-keys": ["updated_at"], + "replication-method": "INCREMENTAL", + "parent-tap-stream-id": "pull_requests" + } + } + ] + } + ] +} \ No newline at end of file diff --git a/tap_github/schemas/pr_files.json b/tap_github/schemas/pr_files.json new file mode 100644 index 00000000..629a4e27 --- /dev/null +++ b/tap_github/schemas/pr_files.json @@ -0,0 +1,21 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": {"type": ["string"]}, + "sha": {"type": ["null", "string"]}, + "filename": {"type": ["null", "string"]}, + "status": {"type": ["null", "string"]}, + "additions": {"type": ["null", "integer"]}, + "deletions": {"type": ["null", "integer"]}, + "changes": {"type": ["null", "integer"]}, + "blob_url": {"type": ["null", "string"]}, + "raw_url": {"type": ["null", "string"]}, + "contents_url": {"type": ["null", "string"]}, + "patch": {"type": ["null", "string"]}, + "previous_filename": {"type": ["null", "string"]}, + "id": {"type": ["null", "string"]}, + "updated_at": {"type": ["null", "string"], "format": "date-time"}, + "pr_number": {"type": ["null", "integer"]}, + "pr_id": {"type": ["null", "integer"]} + } +} diff --git a/tap_github/streams.py b/tap_github/streams.py index 707898ef..15f539a6 100644 --- a/tap_github/streams.py +++ b/tap_github/streams.py @@ -504,6 +504,30 @@ def add_fields_at_1st_level(self, record, parent_record = None): record['pr_id'] = parent_record.get('id') record['id'] = '{}-{}'.format(parent_record.get('id'), record.get('sha')) +class PRFiles(IncrementalStream): + ''' + https://docs.github.com/en/rest/pulls/pulls#list-pull-requests-files + ''' + tap_stream_id = "pr_files" + replication_method = "INCREMENTAL" + replication_keys = "updated_at" + key_properties = ["id"] + path = "pulls/{}/files" + use_repository = True + id_keys = ['number'] + parent = 'pull_requests' + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + # Use the PR's updated_at for incremental replication because file objects lack timestamps + record['updated_at'] = parent_record.get('updated_at') + record['pr_number'] = parent_record.get('number') + record['pr_id'] = parent_record.get('id') + # Build a stable id based on PR id and filename + record['id'] = '{}-{}'.format(parent_record.get('id'), record.get('filename')) + class PullRequests(IncrementalOrderedStream): ''' https://developer.github.com/v3/pulls/#list-pull-requests @@ -513,7 +537,7 @@ class PullRequests(IncrementalOrderedStream): replication_keys = "updated_at" key_properties = ["id"] path = "pulls?state=all&sort=updated&direction=desc" - children = ['reviews', 'review_comments', 'pr_commits'] + children = ['reviews', 'review_comments', 'pr_commits', 'pr_files'] pk_child_fields = ["number"] class TeamMemberships(FullTableStream): @@ -716,6 +740,7 @@ def add_fields_at_1st_level(self, record, parent_record = None): "reviews": Reviews, "review_comments": ReviewComments, "pr_commits": PRCommits, + "pr_files": PRFiles, "teams": Teams, "team_members": TeamMembers, "team_memberships": TeamMemberships,