CenterForOpenScience · mkovalua · Jan 30, 2026 · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026
diff --git a/osf/metadata/osf_gathering.py b/osf/metadata/osf_gathering.py
@@ -6,6 +6,7 @@
 
 from django.contrib.contenttypes.models import ContentType
 from django import db
+from mimetypes import MimeTypes
 import rdflib
 
 from api.caching.tasks import get_storage_usage_total
@@ -44,6 +45,8 @@
 
 logger = logging.getLogger(__name__)
 
+mime = MimeTypes()
+
 
 ##### BEGIN "public" api #####
 
@@ -373,7 +376,7 @@ def osf_iri(guid_or_model):
     return OSFIO[guid._id]
 
 
-def osfguid_from_iri(iri):
+def osfguid_from_iri(iri: str) -> str:
     if iri.startswith(OSFIO):
         return without_namespace(iri, OSFIO)
     raise ValueError(f'expected iri starting with "{OSFIO}" (got "{iri}")')
@@ -702,6 +705,18 @@ def gather_files(focus):
             yield (DCTERMS.requires, file_focus)
 
 
+@gather.er(DCAT.mediaType)
+def gather_file_mediatype(focus):
+    mime_type = mime.guess_type(focus.dbmodel.name)
+    yield (DCAT.mediaType, 'application/octet-stream') if mime_type == (None, None) else (DCAT.mediaType, mime_type[0])
+    mime_type = mime.guess_type(focus.dbmodel.name)[0]
+    yield (DCAT.mediaType, (
+        'application/octet-stream'
+        if mime_type is None
+        else mime_type
+    ))
+
+
 @gather.er(DCTERMS.hasPart, DCTERMS.isPartOf)
 def gather_parts(focus):
     if isinstance(focus.dbmodel, osfdb.AbstractNode):

diff --git a/osf/metadata/serializers/__init__.py b/osf/metadata/serializers/__init__.py
@@ -9,13 +9,16 @@
 from .datacite import DataciteJsonMetadataSerializer, DataciteXmlMetadataSerializer
 from .google_dataset_json_ld import GoogleDatasetJsonLdSerializer
 from .turtle import TurtleMetadataSerializer
+from .linkset import SignpostLinkset, SignpostLinksetJSON
 
 
 METADATA_SERIALIZER_REGISTRY = {
     'turtle': TurtleMetadataSerializer,
     'datacite-json': DataciteJsonMetadataSerializer,
     'datacite-xml': DataciteXmlMetadataSerializer,
     'google-dataset-json-ld': GoogleDatasetJsonLdSerializer,
+    'linkset': SignpostLinkset,
+    'linkset-json': SignpostLinksetJSON
 }
 
 

diff --git a/osf/metadata/serializers/linkset.py b/osf/metadata/serializers/linkset.py
@@ -0,0 +1,148 @@
+"""osf.metadata.serializers.signpost_linkset: FAIR signposting with osf metadata
+FAIR signposting: https://signposting.org/FAIR/
+definition of linkset mediatypes: https://www.rfc-editor.org/rfc/rfc9264.html
+"""
+from __future__ import annotations
+import abc
+from collections.abc import (
+    Iterable,
+    Iterator
+)
+from collections import defaultdict
+import dataclasses
+import json
+from urllib.parse import urljoin, urlsplit, urlencode, urlunsplit
+
+import rdflib
+
+from ._base import MetadataSerializer
+from osf.metadata.osf_gathering import osfguid_from_iri
+from osf.metadata.rdfutils import DOI, DCTERMS, OWL, RDF, OSF, DCAT
+from website.settings import DOMAIN
+from website.util import web_url_for
+
+
+@dataclasses.dataclass
+class SignpostLink:
+    anchor_uri: str
+    relation: str
+    target_uri: str
+    target_attrs: Iterable[tuple[str, str]] = ()
+
+
+class BaseSignpostLinkset(MetadataSerializer, abc.ABC):
+    def _each_link(self) -> Iterator[SignpostLink]:
+        focus_iri = self.basket.focus.iri
+        if self.basket.focus.rdftype == OSF.File:
+            # collection (file's containing obj)
+            for _collection_uri in self.basket[OSF.isContainedBy]:
+                yield SignpostLink(focus_iri, 'collection', str(_collection_uri))
+
+        # author
+        for _creator_iri in self.basket[DCTERMS.creator]:
+            yield SignpostLink(focus_iri, 'author', str(_creator_iri))
+
+        # type
+        if self.basket.focus.rdftype == OSF.File:
+            parent_types = set(self.basket[OSF.isContainedBy / (DCTERMS.type | RDF.type)])
+            for _type_iri in self.basket[DCTERMS.type | RDF.type]:
+                # check the type differs from parent project / registry / preprint
+                if _type_iri not in parent_types:
+                    yield SignpostLink(focus_iri, 'type', str(_type_iri))
+        else:
+            for _type_iri in self.basket[DCTERMS.type | RDF.type]:
+                yield SignpostLink(focus_iri, 'type', str(_type_iri))
+
+        # cite-as
+        yield SignpostLink(focus_iri, 'cite-as', next((
+            _sameas_iri
+            for _sameas_iri in self.basket[OWL.sameAs]
+            if _sameas_iri.startswith(DOI)
+        ), focus_iri))
+
+        base_metadata_url = urljoin(DOMAIN, web_url_for(
+            'metadata_download',  # name of a view function mapped in website/routes.py
+            guid=osfguid_from_iri(self.basket.focus.iri),
+        ))
+        split_base_metadata_url = urlsplit(base_metadata_url)
+
+        # describes
+        yield SignpostLink(
+            base_metadata_url,
+            'describes',
+            focus_iri,
+        )
+
+        from osf.metadata.serializers import METADATA_SERIALIZER_REGISTRY
+        # describedby
+        for _format_key, _serializer in METADATA_SERIALIZER_REGISTRY.items():
+            _metadata_url = urlunsplit(split_base_metadata_url._replace(
+                query=urlencode({'format': _format_key}),
+            ))
+            yield SignpostLink(
+                focus_iri,
+                'describedby',
+                _metadata_url,
+                [('type', _serializer.mediatype)]
+            )
+
+        # license
+        for _license_uri in self.basket[DCTERMS.rights]:
+            if not isinstance(_license_uri, rdflib.BNode):
+                yield SignpostLink(focus_iri, 'license', str(_license_uri))
+
+        # item
+        for _file_iri in self.basket[OSF.contains]:
+            mime_type = next(self.basket[_file_iri:DCAT.mediaType])
+            yield SignpostLink(focus_iri, 'item', str(_file_iri), [('type', mime_type)])
+
+
+class SignpostLinkset(BaseSignpostLinkset):
+    mediatype = 'application/linkset'
+
+    def filename_for_itemid(self, itemid: str):
+        return f'{itemid}-metadata.linkset'
+
+    def serialize(self) -> str | bytes:
+        """serialize a linkset for FAIR signposting
+        see example https://www.rfc-editor.org/rfc/rfc9264.html#section-7.1
+        FAIR signposting: https://signposting.org/FAIR/
+        """
+        result = ',\n'.join(self._serialize_link(link) for link in self._each_link())
+        return '{}\n'.format(result)
+
+    def _serialize_link(self, link: SignpostLink) -> str:
+        segments = [
+            f'<{link.target_uri}>',
+            f'rel="{link.relation}"',
+            f'anchor="{link.anchor_uri}"'
+        ]
+        for key, value in link.target_attrs:
+            segments.append(f'{key}="{value}"')
+        return ' ; '.join(segments)
+
+class SignpostLinksetJSON(BaseSignpostLinkset):
+    mediatype = 'application/linkset+json'
+
+    def filename_for_itemid(self, itemid: str):
+        return f'{itemid}-metadata.linkset.json'
+
+    def serialize(self) -> str | bytes:
+        """serialize linkset json
+        definition: https://www.rfc-editor.org/rfc/rfc9264.html#section-4.2
+        example: https://www.rfc-editor.org/rfc/rfc9264.html#section-7.2
+        """
+        grouped_links = defaultdict(lambda: defaultdict(list))
+
+        for link in self._each_link():
+            link_entry = {'href': link.target_uri}
+            link_entry.update(link.target_attrs)
+            grouped_links[link.anchor_uri][link.relation].append(link_entry)
+
+        linkset = []
+        for anchor, relations in grouped_links.items():
+            anchor_entry = {'anchor': anchor}
+            anchor_entry.update(relations)
+            linkset.append(anchor_entry)
+
+        return json.dumps({'linkset': linkset}, indent=2)
diff --git a/osf_tests/metadata/expected_metadata_files/file_basic.linkset b/osf_tests/metadata/expected_metadata_files/file_basic.linkset
@@ -0,0 +1,10 @@
+<http://localhost:5000/w2ibb> ; rel="collection" ; anchor="http://localhost:5000/w3ibb",
+<https://osf.io/vocab/2022/File> ; rel="type" ; anchor="http://localhost:5000/w3ibb",
+<http://localhost:5000/w3ibb> ; rel="cite-as" ; anchor="http://localhost:5000/w3ibb",
+<http://localhost:5000/w3ibb> ; rel="describes" ; anchor="http://localhost:5000/metadata/w3ibb/",
+<http://localhost:5000/metadata/w3ibb/?format=turtle> ; rel="describedby" ; anchor="http://localhost:5000/w3ibb" ; type="text/turtle; charset=utf-8",
+<http://localhost:5000/metadata/w3ibb/?format=datacite-json> ; rel="describedby" ; anchor="http://localhost:5000/w3ibb" ; type="application/json",
+<http://localhost:5000/metadata/w3ibb/?format=datacite-xml> ; rel="describedby" ; anchor="http://localhost:5000/w3ibb" ; type="application/xml",
+<http://localhost:5000/metadata/w3ibb/?format=google-dataset-json-ld> ; rel="describedby" ; anchor="http://localhost:5000/w3ibb" ; type="application/ld+json",
+<http://localhost:5000/metadata/w3ibb/?format=linkset> ; rel="describedby" ; anchor="http://localhost:5000/w3ibb" ; type="application/linkset",
+<http://localhost:5000/metadata/w3ibb/?format=linkset-json> ; rel="describedby" ; anchor="http://localhost:5000/w3ibb" ; type="application/linkset+json"
diff --git a/osf_tests/metadata/expected_metadata_files/file_basic.linkset.json b/osf_tests/metadata/expected_metadata_files/file_basic.linkset.json
@@ -0,0 +1,56 @@
+{
+  "linkset": [
+    {
+      "anchor": "http://localhost:5000/w3ibb",
+      "collection": [
+        {
+          "href": "http://localhost:5000/w2ibb"
+        }
+      ],
+      "type": [
+        {
+          "href": "https://osf.io/vocab/2022/File"
+        }
+      ],
+      "cite-as": [
+        {
+          "href": "http://localhost:5000/w3ibb"
+        }
+      ],
+      "describedby": [
+        {
+          "href": "http://localhost:5000/metadata/w3ibb/?format=turtle",
+          "type": "text/turtle; charset=utf-8"
+        },
+        {
+          "href": "http://localhost:5000/metadata/w3ibb/?format=datacite-json",
+          "type": "application/json"
+        },
+        {
+          "href": "http://localhost:5000/metadata/w3ibb/?format=datacite-xml",
+          "type": "application/xml"
+        },
+        {
+          "href": "http://localhost:5000/metadata/w3ibb/?format=google-dataset-json-ld",
+          "type": "application/ld+json"
+        },
+        {
+          "href": "http://localhost:5000/metadata/w3ibb/?format=linkset",
+          "type": "application/linkset"
+        },
+        {
+          "href": "http://localhost:5000/metadata/w3ibb/?format=linkset-json",
+          "type": "application/linkset+json"
+        }
+      ]
+    },
+    {
+      "anchor": "http://localhost:5000/metadata/w3ibb/",
+      "describes": [
+        {
+          "href": "http://localhost:5000/w3ibb"
+        }
+      ]
+    }
+  ]
+}
diff --git a/osf_tests/metadata/expected_metadata_files/file_full.linkset b/osf_tests/metadata/expected_metadata_files/file_full.linkset
@@ -0,0 +1,10 @@
+<http://localhost:5000/w2ibb> ; rel="collection" ; anchor="http://localhost:5000/w3ibb",
+<https://osf.io/vocab/2022/File> ; rel="type" ; anchor="http://localhost:5000/w3ibb",
+<http://localhost:5000/w3ibb> ; rel="cite-as" ; anchor="http://localhost:5000/w3ibb",
+<http://localhost:5000/w3ibb> ; rel="describes" ; anchor="http://localhost:5000/metadata/w3ibb/",
+<http://localhost:5000/metadata/w3ibb/?format=turtle> ; rel="describedby" ; anchor="http://localhost:5000/w3ibb" ; type="text/turtle; charset=utf-8",
+<http://localhost:5000/metadata/w3ibb/?format=datacite-json> ; rel="describedby" ; anchor="http://localhost:5000/w3ibb" ; type="application/json",
+<http://localhost:5000/metadata/w3ibb/?format=datacite-xml> ; rel="describedby" ; anchor="http://localhost:5000/w3ibb" ; type="application/xml",
+<http://localhost:5000/metadata/w3ibb/?format=google-dataset-json-ld> ; rel="describedby" ; anchor="http://localhost:5000/w3ibb" ; type="application/ld+json",
+<http://localhost:5000/metadata/w3ibb/?format=linkset> ; rel="describedby" ; anchor="http://localhost:5000/w3ibb" ; type="application/linkset",
+<http://localhost:5000/metadata/w3ibb/?format=linkset-json> ; rel="describedby" ; anchor="http://localhost:5000/w3ibb" ; type="application/linkset+json"
diff --git a/osf_tests/metadata/expected_metadata_files/file_full.linkset.json b/osf_tests/metadata/expected_metadata_files/file_full.linkset.json
@@ -0,0 +1,56 @@
+{
+  "linkset": [
+    {
+      "anchor": "http://localhost:5000/w3ibb",
+      "collection": [
+        {
+          "href": "http://localhost:5000/w2ibb"
+        }
+      ],
+      "type": [
+        {
+          "href": "https://osf.io/vocab/2022/File"
+        }
+      ],
+      "cite-as": [
+        {
+          "href": "http://localhost:5000/w3ibb"
+        }
+      ],
+      "describedby": [
+        {
+          "href": "http://localhost:5000/metadata/w3ibb/?format=turtle",
+          "type": "text/turtle; charset=utf-8"
+        },
+        {
+          "href": "http://localhost:5000/metadata/w3ibb/?format=datacite-json",
+          "type": "application/json"
+        },
+        {
+          "href": "http://localhost:5000/metadata/w3ibb/?format=datacite-xml",
+          "type": "application/xml"
+        },
+        {
+          "href": "http://localhost:5000/metadata/w3ibb/?format=google-dataset-json-ld",
+          "type": "application/ld+json"
+        },
+        {
+          "href": "http://localhost:5000/metadata/w3ibb/?format=linkset",
+          "type": "application/linkset"
+        },
+        {
+          "href": "http://localhost:5000/metadata/w3ibb/?format=linkset-json",
+          "type": "application/linkset+json"
+        }
+      ]
+    },
+    {
+      "anchor": "http://localhost:5000/metadata/w3ibb/",
+      "describes": [
+        {
+          "href": "http://localhost:5000/w3ibb"
+        }
+      ]
+    }
+  ]
+}
diff --git a/osf_tests/metadata/expected_metadata_files/preprint_basic.linkset b/osf_tests/metadata/expected_metadata_files/preprint_basic.linkset
@@ -0,0 +1,11 @@
+<http://localhost:5000/w1ibb> ; rel="author" ; anchor="http://localhost:5000/w4ibb",
+<https://schema.datacite.org/meta/kernel-4/#Preprint> ; rel="type" ; anchor="http://localhost:5000/w4ibb",
+<https://osf.io/vocab/2022/Preprint> ; rel="type" ; anchor="http://localhost:5000/w4ibb",
+<https://doi.org/11.pp/FK2osf.io/w4ibb_v1> ; rel="cite-as" ; anchor="http://localhost:5000/w4ibb",
+<http://localhost:5000/w4ibb> ; rel="describes" ; anchor="http://localhost:5000/metadata/w4ibb/",
+<http://localhost:5000/metadata/w4ibb/?format=turtle> ; rel="describedby" ; anchor="http://localhost:5000/w4ibb" ; type="text/turtle; charset=utf-8",
+<http://localhost:5000/metadata/w4ibb/?format=datacite-json> ; rel="describedby" ; anchor="http://localhost:5000/w4ibb" ; type="application/json",
+<http://localhost:5000/metadata/w4ibb/?format=datacite-xml> ; rel="describedby" ; anchor="http://localhost:5000/w4ibb" ; type="application/xml",
+<http://localhost:5000/metadata/w4ibb/?format=google-dataset-json-ld> ; rel="describedby" ; anchor="http://localhost:5000/w4ibb" ; type="application/ld+json",
+<http://localhost:5000/metadata/w4ibb/?format=linkset> ; rel="describedby" ; anchor="http://localhost:5000/w4ibb" ; type="application/linkset",
+<http://localhost:5000/metadata/w4ibb/?format=linkset-json> ; rel="describedby" ; anchor="http://localhost:5000/w4ibb" ; type="application/linkset+json"