@@ -3,17 +3,16 @@ name: Sync vector DB
33
44on :
55 workflow_dispatch :
6- inputs :
7- enable_artifact_download :
8- description : ' Enable artifact download step'
9- type : boolean
10- required : false
11- default : true
6+
127jobs :
13- # This workflow contains a single job called "greet"
148 sync_db :
159 # The type of runner that the job will run on
1610 runs-on : ubuntu-latest
11+ permissions :
12+ contents : read
13+ id-token : write
14+ env :
15+ AWS_REGION : us-east-1
1716
1817 # Steps represent a sequence of tasks that will be executed as part of the job
1918 steps :
@@ -31,32 +30,34 @@ jobs:
3130 git lfs install
3231 git lfs pull
3332
34- - name : Download json data
35- id : download-json-data
36- uses : dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8
33+ - name : Configure AWS Credentials for S3
34+ uses : aws-actions/configure-aws-credentials@49f33fe638c0cba4fb16037a27915a7ab7740259
3735 with :
38- repo : stacklok/codegate-data
39- workflow : " .github/workflows/generate-artifact.yml"
40- workflow_conclusion : success
41- name : jsonl-files
42- path : /tmp/
43- name_is_regexp : true
44- skip_unpack : false
45- if_no_artifact_found : ignore
36+ role-to-assume : ${{ secrets.AWS_ROLE_INSIGHT_DATA_IMPORT }}
37+ aws-region : ${{ env.AWS_REGION }}
4638
47- - name : Download artifact
48- if : ${{ github.event.inputs.enable_artifact_download == 'true' }}
49- id : download-artifact
50- uses : dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8
51- with :
52- github_token : ${{ github.token }}
53- workflow : " .github/workflows/import_packages.yml"
54- workflow_conclusion : success
55- name : sqlite_data
56- path : /tmp/
57- name_is_regexp : true
58- skip_unpack : false
59- if_no_artifact_found : ignore
39+ - name : Download JSONL files from S3
40+ run : |
41+ echo "Downloading manifest.json from S3..."
42+ aws s3 cp s3://codegate-data-prod/manifest.json ./manifest.json --region $AWS_REGION
43+ echo "Manifest content:"
44+ cat manifest.json
45+
46+ echo "Parsing manifest..."
47+ MALICIOUS_KEY=$(jq -r '.latest.malicious_packages' manifest.json)
48+ DEPRECATED_KEY=$(jq -r '.latest.deprecated_packages' manifest.json)
49+ ARCHIVED_KEY=$(jq -r '.latest.archived_packages' manifest.json)
50+
51+ echo "Malicious key: $MALICIOUS_KEY"
52+ echo "Deprecated key: $DEPRECATED_KEY"
53+ echo "Archived key: $ARCHIVED_KEY"
54+
55+ mkdir -p /tmp/jsonl-files
56+
57+ # Download and map the S3 files to fixed names in /tmp/jsonl-files
58+ aws s3 cp s3://codegate-data-prod/$MALICIOUS_KEY /tmp/jsonl-files/malicious.jsonl --region $AWS_REGION
59+ aws s3 cp s3://codegate-data-prod/$DEPRECATED_KEY /tmp/jsonl-files/deprecated.jsonl --region $AWS_REGION
60+ aws s3 cp s3://codegate-data-prod/$ARCHIVED_KEY /tmp/jsonl-files/archived.jsonl --region $AWS_REGION
6061
6162 - name : Install Poetry
6263 run : |
0 commit comments