Skip to content
84 changes: 84 additions & 0 deletions src/packageurl/contrib/purl2url.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
# Visit https://github.com/package-url/packageurl-python for support and
# download.

import re

from packageurl import PackageURL
from packageurl.contrib.route import NoRouteAvailable
from packageurl.contrib.route import Router
Expand Down Expand Up @@ -172,6 +174,88 @@ def build_gitlab_repo_url(purl):
return f"https://gitlab.com/{namespace}/{name}"


GIT_REPO_GENERIC = {
# cgit
(
r"git\.kernel\.org",
r"gitweb\.gentoo\.org",
"cgit\.git\.savannah\.gnu\.org",
"web\.git\.kernel\.org",
): {
"commit_url": "https://{namespace}/{name}.git/commit/?id={version}",
"repo_url": "https://{namespace}/{name}.git",
},
# gitiles
(
r"android\.googlesource\.com",
r"aomedia\.googlesource\.com",
r"chromium\.googlesource\.com",
r"gerrit\.googlesource\.com",
): {
"commit_url": "https://{namespace}/{name}/+/{version}",
"repo_url": "https://{namespace}/{name}",
},
# allura
(r"sourceforge\.net", r"forge-allura\.apache\.org"): {
"commit_url": "https://{namespace}/{name}/ci/{version}",
"repo_url": "https://{namespace}/{name}",
},
# gitweb
(
r"gcc\.gnu\.org/git",
r"git\.postgresql\.org",
"sourceware\.org",
"git\.openssl\.org",
"gitbox\.apache\.org",
): {
"commit_url": "https://{namespace}/?p={name}.git;a=commit;h={version}",
"repo_url": "https://{namespace}/?p={name}.git",
},
# gitea / forgejo
(
r"codeberg\.org",
r"gitea\.com",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

): {
"commit_url": "https://{namespace}/{name}/commit/{version}",
"repo_url": "https://{namespace}/{name}",
},
# sub gitlab ( excludes gitlab.com )
(
r"git\.codelinaro\.org.*",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add salsa for Debian projects https://salsa.debian.org/public and https://gitlab.eclipse.org for Eclipse

r"gitlab\.(?!com\b)[^/]+",
): {
"commit_url": "https://{namespace}/{name}/-/commit/{version}",
"repo_url": "https://{namespace}/{name}",
},
}


@repo_router.route("pkg:generic/.*")
def build_generic_repo_url(purl):
"""
Return a Commit URL from the `purl` string.
"""
purl_data = PackageURL.from_string(purl)
name = purl_data.name
namespace = purl_data.namespace
version = purl_data.version

if not (namespace and name):
return

for patterns, template_url in GIT_REPO_GENERIC.items():
for pattern in patterns:
if not re.match(pattern, namespace):
continue

if version:
return template_url["commit_url"].format(
namespace=namespace, name=name, version=version
)
return template_url["repo_url"].format(namespace=namespace, name=name)
return


@repo_router.route("pkg:(gem|rubygems)/.*")
def build_rubygems_repo_url(purl):
"""
Expand Down
259 changes: 259 additions & 0 deletions src/packageurl/contrib/url2purl.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,265 @@ def build_bitbucket_purl(url):
)


def build_route_regex(domain_patterns, path_suffix="/.*"):
"""
Build a route regex from a list of domains
"""
domain_pattern = "|".join(domain_patterns)
return rf"https?://({domain_pattern}){path_suffix}"


SUB_GITLAB_DOMAINS = [r"git\.codelinaro\.org", r"gitlab\.(?!com\b)[^/]+"]
SUB_GITLAB_ROUTE_REGEX = build_route_regex(SUB_GITLAB_DOMAINS)


@purl_router.route(SUB_GITLAB_ROUTE_REGEX)
def build_gitlab_sub_purl(url):
"""
Return a PackageURL object from a GitLab Sub domains commit URL
For example:
https://gitlab.gnome.org/GNOME/gimp
https://git.codelinaro.org/clo/qsdk/oss/kernel/linux-msm
https://gitlab.gnome.org/GNOME/gimp/-/commit/112a5e038f0646eae5ae314988ec074433d2b365
https://git.codelinaro.org/linaro/qcom/project/-/commit/a40a9732c840e5a324fba78b0ff7980b497c3831
"""

gitlab_sub_commit_pattern = (
r"^https?://"
r"(?P<namespace>.+?)/"
r"(?P<name>[^/]+)"
r"(?:/-/commit/(?P<version>[0-9a-fA-F]{7,64}))?"
r"/?$"
)

commit_match = re.search(gitlab_sub_commit_pattern, url)
if commit_match:
return PackageURL(
type="generic",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is GitLab-based it should be a gitlab PURL with a repository_url qualifier

namespace=commit_match.group("namespace"),
name=commit_match.group("name"),
version=commit_match.group("version"),
)


GITEA_DOMAINS = ["codeberg\.org", "gitea\.com"]
GITEA_ROUTE_REGEX = build_route_regex(GITEA_DOMAINS)


@purl_router.route(GITEA_ROUTE_REGEX)
def build_gitea_purl(url):
"""
Return a PackageURL object from a gitea/forgejo url
For example:
https://gitea.com/htc47/entur
https://codeberg.org/alpinelinux/aports
https://codeberg.org/alpinelinux/aports/commit/a40a9732c840e5a324fba78b0ff7980b497c3831
https://gitea.com/htc47/entur/commit/271b852cfb761a1fe257aa0f0a12ff38bd8bfd1c
"""

gitea_commit_pattern = (
r"^https?://"
r"(?P<namespace>.+?)/"
r"(?P<name>[^/]+)"
r"(?:/commit/(?P<version>[0-9a-fA-F]{7,64}))?"
r"/?$"
)

commit_match = re.search(gitea_commit_pattern, url)
if commit_match:
return PackageURL(
type="generic",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need a Forgejo PURL! @johnmhoran

namespace=commit_match.group("namespace"),
name=commit_match.group("name"),
version=commit_match.group("version"),
)


CGIT_DOMAINS = [
r"git\.kernel\.org",
r"gitweb\.gentoo\.org",
"cgit\.git\.savannah\.gnu\.org",
"web\.git\.kernel\.org",
]
CGIT_ROUTE_REGEX = build_route_regex(CGIT_DOMAINS)


@purl_router.route(CGIT_ROUTE_REGEX)
def build_cgit_purl(url):
"""
Return a PackageURL object from a cgit url
For example:
https://git.kernel.org/pub/scm/utils/b4/b4.git
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
https://cgit.git.savannah.gnu.org/cgit/uddf.git
https://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git
https://git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev.git
https://gitweb.gentoo.org/dev/darkside.git
https://gitweb.gentoo.org/repo/gentoo.git
https://git.kernel.org/pub/scm/bluetooth/bluez.git/commit/?id=74770b1fd2be612f9c2cf807db81fcdcc35e6560
https://git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev.git/commit/?h=for-next&id=bd771cf5c4254511cc4abb88f3dab3bd58bdf8e8
https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/fs/smb?id=db363b0a1d9e6b9dc556296f1b1007aeb496a8cf
https://cgit.git.savannah.gnu.org/cgit/uddf.git/commit/?id=98c41e131dc952aee43d4ec392b80ca4c426be8d
https://gitweb.gentoo.org/dev/darkside.git/commit/?id=8d4b0836f3b6ab7075212926d9aad0b50246d825
https://git.kernel.org/stable/c/9a9a8fe26751334b7739193a94eba741073b8a55
"""

# https://git.kernel.org/stable/c/<hash>
kernel_shorthand = r"^https?://git\.kernel\.org/stable/c/" r"(?P<version>[0-9a-fA-F]{7,64})/?$"

cgit_project_pattern = (
r"^https?://"
r"(?P<namespace>.+?)/"
r"(?P<name>[^/]+?)"
r"(?:\.git)?"
r"(?:/commit/(?:[^?]+)?\?.*?\bid=(?P<version>[0-9a-fA-F]{7,64})(?:&.*)?)?"
r"/?$"
)

if match := re.search(kernel_shorthand, url):
res = match.groupdict()
namespace = "git.kernel.org/pub/scm/linux/kernel/git/stable/"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs thinking and there is a emerging PURL registry that will cater to the kernel needs.

name = "linux"
elif match := re.search(cgit_project_pattern, url):
res = match.groupdict()
name = res["name"]
namespace = res["namespace"]
else:
return None

return PackageURL(
type="generic",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is in emerging git PURL for that from @darakian

namespace=namespace,
name=name,
version=res["version"],
qualifiers={},
subpath="",
)


GITILES_DOMAINS = [
r"android\.googlesource\.com",
r"aomedia\.googlesource\.com",
r"chromium\.googlesource\.com",
r"gerrit\.googlesource\.com",
]
GITILES_ROUTE_REGEX = build_route_regex(GITILES_DOMAINS)


@purl_router.route(GITILES_ROUTE_REGEX)
def build_gitiles_purl(url):
"""
Return a PackageURL object from Gitiles url
For example:
https://android.googlesource.com/platform/frameworks/base
https://android.googlesource.com/device/generic/vulkan-cereal
https://android.googlesource.com/platform/packages/apps/Settings/+/2968ccc911956fa5813a9a6a5e5c8970e383a60f
https://aomedia.googlesource.com/libavifinfo/+/43716e9c34d3389b4882fbd1a81c04543ed04fe3
"""

gitiles_project_pattern = (
r"^https?://"
r"(?P<namespace>(?:(?!/\+/).)+)/"
r"(?P<name>[^/]+)"
r"(?:/\+/(?P<version>[0-9a-fA-F]{7,64}))?"
r"/?$"
)

match = re.search(gitiles_project_pattern, url)
if match:
return PackageURL(
type="generic",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likely also a candidate for the new git PURL type

namespace=match.group("namespace"),
name=match.group("name"),
version=match.group("version"),
qualifiers={},
subpath="",
)


ALLURA_DOMAINS = [r"sourceforge\.net", r"forge-allura\.apache\.org"]
ALLURA_ROUTE_REGEX = build_route_regex(ALLURA_DOMAINS, "/p/.*")


@purl_router.route(ALLURA_ROUTE_REGEX)
def build_allura_purl(url):
"""
Return a PackageURL object from an Apache Allura url (e.g., SourceForge).
For example:
https://sourceforge.net/p/djvu/djvulibre-git
https://sourceforge.net/p/expat/code_git
https://forge-allura.apache.org/p/allura/git
https://sourceforge.net/p/djvu/djvulibre-git/ci/e15d51510048927f172f1bf1f27ede65907d940d
https://sourceforge.net/p/infrarecorder/code/ci/9361b6f267e7b1c1576c48f6dac6dec18d8a93e0/
https://forge-allura.apache.org/p/allura/git/ci/674e070e5ca7db7c75cf61d8efd2a3e3e49bd946/
"""

allura_pattern = (
r"^https?://"
r"(?P<namespace>.+?)/"
r"(?P<name>[^/]+?)"
r"(?:/ci/(?P<version>[0-9a-fA-F]{7,64}))?"
r"/?$"
)

commit_match = re.search(allura_pattern, url)
if commit_match:
return PackageURL(
type="generic",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may have a sourceforge type? Or this is for a git type

namespace=commit_match.group("namespace"),
name=commit_match.group("name"),
version=commit_match.group("version"),
qualifiers={},
subpath="",
)


GITWEB_DOMAINS = [
r"gcc\.gnu\.org/git",
r"git\.postgresql\.org/gitweb",
"sourceware\.org/git",
"git\.openssl\.org/gitweb",
"gitbox\.apache\.org",
]
GITWEB_ROUTE_REGEX = build_route_regex(GITWEB_DOMAINS)


@purl_router.route(GITWEB_ROUTE_REGEX)
def build_gitweb_purl(url):
"""
Return a PackageURL object from a Gitweb url.
For example:
https://gcc.gnu.org/git/?p=gcc.git
https://git.postgresql.org/gitweb/?p=hamn.git
https://sourceware.org/git/?p=glibc.git
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=82cc94e5fb69d1c45a386f83798251de5bff9339
https://git.postgresql.org/gitweb/?p=hamn.git;a=commit;h=a796b71a5b3fe7f751f1086a08cb114b9877dea2
https://sourceware.org/git/?p=glibc.git;a=commit;h=dedebed24f77762eea7d3c5ed2739a90a4d60461
https://gitbox.apache.org/repos/asf?p=xalan-java.git;a=commit;h=da3e0d06b467247643ce04e88d3346739d119f21
"""

gitweb_pattern = (
r"^https?://"
r"(?P<namespace>[^?]+?)"
r"/?(?=\?)"
r"(?=.*[?;&]p=(?P<name>[^;&]+?)(?:\.git)?(?:[;&]|$))"
r"(?:(?=.*[?;&]h=(?P<version>[0-9a-fA-F]{7,64}))|)"
)

commit_match = re.search(gitweb_pattern, url)
if commit_match:
namespace = commit_match.group("namespace")
name = commit_match.group("name")
return PackageURL(
type="generic",
namespace=namespace,
name=name,
version=commit_match.group("version"),
qualifiers={},
subpath="",
)


@purl_router.route("https?://gitlab\\.com/(?!.*/archive/).*")
def build_gitlab_purl(url):
"""
Expand Down
Loading
Loading