-
Notifications
You must be signed in to change notification settings - Fork 56
Extend url2purl/purl2url coverage for Git-based source hosts #223
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4c168a8
db9d6bc
7e87e09
24fdf1d
7e24835
0020818
5909b64
c47acb5
d02949b
92da2f2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,8 @@ | |
| # Visit https://github.com/package-url/packageurl-python for support and | ||
| # download. | ||
|
|
||
| import re | ||
|
|
||
| from packageurl import PackageURL | ||
| from packageurl.contrib.route import NoRouteAvailable | ||
| from packageurl.contrib.route import Router | ||
|
|
@@ -172,6 +174,88 @@ def build_gitlab_repo_url(purl): | |
| return f"https://gitlab.com/{namespace}/{name}" | ||
|
|
||
|
|
||
| GIT_REPO_GENERIC = { | ||
| # cgit | ||
| ( | ||
| r"git\.kernel\.org", | ||
| r"gitweb\.gentoo\.org", | ||
| "cgit\.git\.savannah\.gnu\.org", | ||
| "web\.git\.kernel\.org", | ||
| ): { | ||
| "commit_url": "https://{namespace}/{name}.git/commit/?id={version}", | ||
| "repo_url": "https://{namespace}/{name}.git", | ||
| }, | ||
| # gitiles | ||
| ( | ||
| r"android\.googlesource\.com", | ||
| r"aomedia\.googlesource\.com", | ||
| r"chromium\.googlesource\.com", | ||
| r"gerrit\.googlesource\.com", | ||
| ): { | ||
| "commit_url": "https://{namespace}/{name}/+/{version}", | ||
| "repo_url": "https://{namespace}/{name}", | ||
| }, | ||
| # allura | ||
| (r"sourceforge\.net", r"forge-allura\.apache\.org"): { | ||
| "commit_url": "https://{namespace}/{name}/ci/{version}", | ||
| "repo_url": "https://{namespace}/{name}", | ||
| }, | ||
| # gitweb | ||
| ( | ||
| r"gcc\.gnu\.org/git", | ||
| r"git\.postgresql\.org", | ||
| "sourceware\.org", | ||
| "git\.openssl\.org", | ||
| "gitbox\.apache\.org", | ||
| ): { | ||
| "commit_url": "https://{namespace}/?p={name}.git;a=commit;h={version}", | ||
| "repo_url": "https://{namespace}/?p={name}.git", | ||
| }, | ||
| # gitea / forgejo | ||
| ( | ||
| r"codeberg\.org", | ||
| r"gitea\.com", | ||
| ): { | ||
| "commit_url": "https://{namespace}/{name}/commit/{version}", | ||
| "repo_url": "https://{namespace}/{name}", | ||
| }, | ||
| # sub gitlab ( excludes gitlab.com ) | ||
| ( | ||
| r"git\.codelinaro\.org.*", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add salsa for Debian projects https://salsa.debian.org/public and https://gitlab.eclipse.org for Eclipse |
||
| r"gitlab\.(?!com\b)[^/]+", | ||
| ): { | ||
| "commit_url": "https://{namespace}/{name}/-/commit/{version}", | ||
| "repo_url": "https://{namespace}/{name}", | ||
| }, | ||
| } | ||
|
|
||
|
|
||
| @repo_router.route("pkg:generic/.*") | ||
| def build_generic_repo_url(purl): | ||
| """ | ||
| Return a Commit URL from the `purl` string. | ||
| """ | ||
| purl_data = PackageURL.from_string(purl) | ||
| name = purl_data.name | ||
| namespace = purl_data.namespace | ||
| version = purl_data.version | ||
|
|
||
| if not (namespace and name): | ||
| return | ||
|
|
||
| for patterns, template_url in GIT_REPO_GENERIC.items(): | ||
| for pattern in patterns: | ||
| if not re.match(pattern, namespace): | ||
| continue | ||
|
|
||
| if version: | ||
| return template_url["commit_url"].format( | ||
| namespace=namespace, name=name, version=version | ||
| ) | ||
| return template_url["repo_url"].format(namespace=namespace, name=name) | ||
| return | ||
|
|
||
|
|
||
| @repo_router.route("pkg:(gem|rubygems)/.*") | ||
| def build_rubygems_repo_url(purl): | ||
| """ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -667,6 +667,265 @@ def build_bitbucket_purl(url): | |
| ) | ||
|
|
||
|
|
||
| def build_route_regex(domain_patterns, path_suffix="/.*"): | ||
| """ | ||
| Build a route regex from a list of domains | ||
| """ | ||
| domain_pattern = "|".join(domain_patterns) | ||
| return rf"https?://({domain_pattern}){path_suffix}" | ||
|
|
||
|
|
||
| SUB_GITLAB_DOMAINS = [r"git\.codelinaro\.org", r"gitlab\.(?!com\b)[^/]+"] | ||
| SUB_GITLAB_ROUTE_REGEX = build_route_regex(SUB_GITLAB_DOMAINS) | ||
|
|
||
|
|
||
| @purl_router.route(SUB_GITLAB_ROUTE_REGEX) | ||
| def build_gitlab_sub_purl(url): | ||
| """ | ||
| Return a PackageURL object from a GitLab Sub domains commit URL | ||
| For example: | ||
| https://gitlab.gnome.org/GNOME/gimp | ||
| https://git.codelinaro.org/clo/qsdk/oss/kernel/linux-msm | ||
| https://gitlab.gnome.org/GNOME/gimp/-/commit/112a5e038f0646eae5ae314988ec074433d2b365 | ||
| https://git.codelinaro.org/linaro/qcom/project/-/commit/a40a9732c840e5a324fba78b0ff7980b497c3831 | ||
| """ | ||
|
|
||
| gitlab_sub_commit_pattern = ( | ||
| r"^https?://" | ||
| r"(?P<namespace>.+?)/" | ||
| r"(?P<name>[^/]+)" | ||
| r"(?:/-/commit/(?P<version>[0-9a-fA-F]{7,64}))?" | ||
| r"/?$" | ||
| ) | ||
|
|
||
| commit_match = re.search(gitlab_sub_commit_pattern, url) | ||
| if commit_match: | ||
| return PackageURL( | ||
| type="generic", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this is GitLab-based it should be a gitlab PURL with a |
||
| namespace=commit_match.group("namespace"), | ||
| name=commit_match.group("name"), | ||
| version=commit_match.group("version"), | ||
| ) | ||
|
|
||
|
|
||
| GITEA_DOMAINS = ["codeberg\.org", "gitea\.com"] | ||
| GITEA_ROUTE_REGEX = build_route_regex(GITEA_DOMAINS) | ||
|
|
||
|
|
||
| @purl_router.route(GITEA_ROUTE_REGEX) | ||
| def build_gitea_purl(url): | ||
| """ | ||
| Return a PackageURL object from a gitea/forgejo url | ||
| For example: | ||
| https://gitea.com/htc47/entur | ||
| https://codeberg.org/alpinelinux/aports | ||
| https://codeberg.org/alpinelinux/aports/commit/a40a9732c840e5a324fba78b0ff7980b497c3831 | ||
| https://gitea.com/htc47/entur/commit/271b852cfb761a1fe257aa0f0a12ff38bd8bfd1c | ||
| """ | ||
|
|
||
| gitea_commit_pattern = ( | ||
| r"^https?://" | ||
| r"(?P<namespace>.+?)/" | ||
| r"(?P<name>[^/]+)" | ||
| r"(?:/commit/(?P<version>[0-9a-fA-F]{7,64}))?" | ||
| r"/?$" | ||
| ) | ||
|
|
||
| commit_match = re.search(gitea_commit_pattern, url) | ||
| if commit_match: | ||
| return PackageURL( | ||
| type="generic", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need a Forgejo PURL! @johnmhoran |
||
| namespace=commit_match.group("namespace"), | ||
| name=commit_match.group("name"), | ||
| version=commit_match.group("version"), | ||
| ) | ||
|
|
||
|
|
||
| CGIT_DOMAINS = [ | ||
| r"git\.kernel\.org", | ||
| r"gitweb\.gentoo\.org", | ||
| "cgit\.git\.savannah\.gnu\.org", | ||
| "web\.git\.kernel\.org", | ||
| ] | ||
| CGIT_ROUTE_REGEX = build_route_regex(CGIT_DOMAINS) | ||
|
|
||
|
|
||
| @purl_router.route(CGIT_ROUTE_REGEX) | ||
| def build_cgit_purl(url): | ||
| """ | ||
| Return a PackageURL object from a cgit url | ||
| For example: | ||
| https://git.kernel.org/pub/scm/utils/b4/b4.git | ||
| https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git | ||
| https://cgit.git.savannah.gnu.org/cgit/uddf.git | ||
| https://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git | ||
| https://git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev.git | ||
| https://gitweb.gentoo.org/dev/darkside.git | ||
| https://gitweb.gentoo.org/repo/gentoo.git | ||
| https://git.kernel.org/pub/scm/bluetooth/bluez.git/commit/?id=74770b1fd2be612f9c2cf807db81fcdcc35e6560 | ||
| https://git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev.git/commit/?h=for-next&id=bd771cf5c4254511cc4abb88f3dab3bd58bdf8e8 | ||
| https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/fs/smb?id=db363b0a1d9e6b9dc556296f1b1007aeb496a8cf | ||
| https://cgit.git.savannah.gnu.org/cgit/uddf.git/commit/?id=98c41e131dc952aee43d4ec392b80ca4c426be8d | ||
| https://gitweb.gentoo.org/dev/darkside.git/commit/?id=8d4b0836f3b6ab7075212926d9aad0b50246d825 | ||
| https://git.kernel.org/stable/c/9a9a8fe26751334b7739193a94eba741073b8a55 | ||
| """ | ||
|
|
||
| # https://git.kernel.org/stable/c/<hash> | ||
| kernel_shorthand = r"^https?://git\.kernel\.org/stable/c/" r"(?P<version>[0-9a-fA-F]{7,64})/?$" | ||
|
|
||
| cgit_project_pattern = ( | ||
| r"^https?://" | ||
| r"(?P<namespace>.+?)/" | ||
| r"(?P<name>[^/]+?)" | ||
| r"(?:\.git)?" | ||
| r"(?:/commit/(?:[^?]+)?\?.*?\bid=(?P<version>[0-9a-fA-F]{7,64})(?:&.*)?)?" | ||
| r"/?$" | ||
| ) | ||
|
|
||
| if match := re.search(kernel_shorthand, url): | ||
| res = match.groupdict() | ||
| namespace = "git.kernel.org/pub/scm/linux/kernel/git/stable/" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs thinking and there is a emerging PURL registry that will cater to the kernel needs. |
||
| name = "linux" | ||
| elif match := re.search(cgit_project_pattern, url): | ||
| res = match.groupdict() | ||
| name = res["name"] | ||
| namespace = res["namespace"] | ||
| else: | ||
| return None | ||
|
|
||
| return PackageURL( | ||
| type="generic", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is in emerging git PURL for that from @darakian |
||
| namespace=namespace, | ||
| name=name, | ||
| version=res["version"], | ||
| qualifiers={}, | ||
| subpath="", | ||
| ) | ||
|
|
||
|
|
||
| GITILES_DOMAINS = [ | ||
| r"android\.googlesource\.com", | ||
| r"aomedia\.googlesource\.com", | ||
| r"chromium\.googlesource\.com", | ||
| r"gerrit\.googlesource\.com", | ||
| ] | ||
| GITILES_ROUTE_REGEX = build_route_regex(GITILES_DOMAINS) | ||
|
|
||
|
|
||
| @purl_router.route(GITILES_ROUTE_REGEX) | ||
| def build_gitiles_purl(url): | ||
| """ | ||
| Return a PackageURL object from Gitiles url | ||
| For example: | ||
| https://android.googlesource.com/platform/frameworks/base | ||
| https://android.googlesource.com/device/generic/vulkan-cereal | ||
| https://android.googlesource.com/platform/packages/apps/Settings/+/2968ccc911956fa5813a9a6a5e5c8970e383a60f | ||
| https://aomedia.googlesource.com/libavifinfo/+/43716e9c34d3389b4882fbd1a81c04543ed04fe3 | ||
| """ | ||
|
|
||
| gitiles_project_pattern = ( | ||
| r"^https?://" | ||
| r"(?P<namespace>(?:(?!/\+/).)+)/" | ||
| r"(?P<name>[^/]+)" | ||
| r"(?:/\+/(?P<version>[0-9a-fA-F]{7,64}))?" | ||
| r"/?$" | ||
| ) | ||
|
|
||
| match = re.search(gitiles_project_pattern, url) | ||
| if match: | ||
| return PackageURL( | ||
| type="generic", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Likely also a candidate for the new git PURL type |
||
| namespace=match.group("namespace"), | ||
| name=match.group("name"), | ||
| version=match.group("version"), | ||
| qualifiers={}, | ||
| subpath="", | ||
| ) | ||
|
|
||
|
|
||
| ALLURA_DOMAINS = [r"sourceforge\.net", r"forge-allura\.apache\.org"] | ||
| ALLURA_ROUTE_REGEX = build_route_regex(ALLURA_DOMAINS, "/p/.*") | ||
|
|
||
|
|
||
| @purl_router.route(ALLURA_ROUTE_REGEX) | ||
| def build_allura_purl(url): | ||
| """ | ||
| Return a PackageURL object from an Apache Allura url (e.g., SourceForge). | ||
| For example: | ||
| https://sourceforge.net/p/djvu/djvulibre-git | ||
| https://sourceforge.net/p/expat/code_git | ||
| https://forge-allura.apache.org/p/allura/git | ||
| https://sourceforge.net/p/djvu/djvulibre-git/ci/e15d51510048927f172f1bf1f27ede65907d940d | ||
| https://sourceforge.net/p/infrarecorder/code/ci/9361b6f267e7b1c1576c48f6dac6dec18d8a93e0/ | ||
| https://forge-allura.apache.org/p/allura/git/ci/674e070e5ca7db7c75cf61d8efd2a3e3e49bd946/ | ||
| """ | ||
|
|
||
| allura_pattern = ( | ||
| r"^https?://" | ||
| r"(?P<namespace>.+?)/" | ||
| r"(?P<name>[^/]+?)" | ||
| r"(?:/ci/(?P<version>[0-9a-fA-F]{7,64}))?" | ||
| r"/?$" | ||
| ) | ||
|
|
||
| commit_match = re.search(allura_pattern, url) | ||
| if commit_match: | ||
| return PackageURL( | ||
| type="generic", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We may have a sourceforge type? Or this is for a git type |
||
| namespace=commit_match.group("namespace"), | ||
| name=commit_match.group("name"), | ||
| version=commit_match.group("version"), | ||
| qualifiers={}, | ||
| subpath="", | ||
| ) | ||
|
|
||
|
|
||
| GITWEB_DOMAINS = [ | ||
| r"gcc\.gnu\.org/git", | ||
| r"git\.postgresql\.org/gitweb", | ||
| "sourceware\.org/git", | ||
| "git\.openssl\.org/gitweb", | ||
| "gitbox\.apache\.org", | ||
| ] | ||
| GITWEB_ROUTE_REGEX = build_route_regex(GITWEB_DOMAINS) | ||
|
|
||
|
|
||
| @purl_router.route(GITWEB_ROUTE_REGEX) | ||
| def build_gitweb_purl(url): | ||
| """ | ||
| Return a PackageURL object from a Gitweb url. | ||
| For example: | ||
| https://gcc.gnu.org/git/?p=gcc.git | ||
| https://git.postgresql.org/gitweb/?p=hamn.git | ||
| https://sourceware.org/git/?p=glibc.git | ||
| https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=82cc94e5fb69d1c45a386f83798251de5bff9339 | ||
| https://git.postgresql.org/gitweb/?p=hamn.git;a=commit;h=a796b71a5b3fe7f751f1086a08cb114b9877dea2 | ||
| https://sourceware.org/git/?p=glibc.git;a=commit;h=dedebed24f77762eea7d3c5ed2739a90a4d60461 | ||
| https://gitbox.apache.org/repos/asf?p=xalan-java.git;a=commit;h=da3e0d06b467247643ce04e88d3346739d119f21 | ||
| """ | ||
|
|
||
| gitweb_pattern = ( | ||
| r"^https?://" | ||
| r"(?P<namespace>[^?]+?)" | ||
| r"/?(?=\?)" | ||
| r"(?=.*[?;&]p=(?P<name>[^;&]+?)(?:\.git)?(?:[;&]|$))" | ||
| r"(?:(?=.*[?;&]h=(?P<version>[0-9a-fA-F]{7,64}))|)" | ||
| ) | ||
|
|
||
| commit_match = re.search(gitweb_pattern, url) | ||
| if commit_match: | ||
| namespace = commit_match.group("namespace") | ||
| name = commit_match.group("name") | ||
| return PackageURL( | ||
| type="generic", | ||
| namespace=namespace, | ||
| name=name, | ||
| version=commit_match.group("version"), | ||
| qualifiers={}, | ||
| subpath="", | ||
| ) | ||
|
|
||
|
|
||
| @purl_router.route("https?://gitlab\\.com/(?!.*/archive/).*") | ||
| def build_gitlab_purl(url): | ||
| """ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://forge.fedoraproject.org/ also on Forgejo