Skip to content

pymupdf.Document.scrub raises AttributeError for a document with annotations #4928

@mahlzahn

Description

@mahlzahn

Description of the bug

Log output of trace
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-2-df56e242e8d3> in ?()
      1 import pymupdf
----> 2 pymupdf.Document('annotated_pdf.pdf').scrub()

/usr/lib/python3.14/site-packages/pymupdf/__init__.py in ?(doc, attached_files, clean_pages, embedded_files, hidden_text, javascript, metadata, redactions, redact_images, remove_links, reset_fields, reset_responses, thumbnails, xml_metadata)
   6677                 for link in links:  # remove all links
   6678                     page.delete_link(link)
   6679 
   6680             found_redacts = False
-> 6681             for annot in page.annots():
   6682                 if annot.type[0] == mupdf.PDF_ANNOT_FILE_ATTACHMENT and attached_files:
   6683                     annot.update_file(buffer_=b" ")  # set file content to empty
   6684                 if reset_responses:

/usr/lib/python3.14/site-packages/pymupdf/__init__.py in ?(self, types)
  10687             annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] not in skip_types]
  10688         else:
  10689             annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] in types and a[1] not in skip_types]
  10690         for xref in annot_xrefs:
> 10691             annot = self.load_annot(xref)
  10692             annot._yielded=True
  10693             yield annot

/usr/lib/python3.14/site-packages/pymupdf/__init__.py in ?(self, ident)
  12760             xref = ident
  12761             name = None
  12762         else:
  12763             raise ValueError("identifier must be a string or integer")
> 12764         val = self._load_annot(name, xref)
  12765         if not val:
  12766             return val
  12767         val.thisown = True

/usr/lib/python3.14/site-packages/pymupdf/__init__.py in ?(self, name, xref)
  10219         if xref == 0:
  10220             annot = JM_get_annot_by_name(page, name)
  10221         else:
  10222             annot = JM_get_annot_by_xref(page, xref)
> 10223         if annot.m_internal:
  10224             return Annot(annot)

AttributeError: 'NoneType' object has no attribute 'm_internal'

How to reproduce the bug

  1. Get sample file:
    wget https://github.com/py-pdf/sample-files/raw/main/024-annotations/annotated_pdf.pdf
  2. Run following commands in python console or program:
    import pymupdf
    pymupdf.Document('annotated_pdf.pdf').scrub()

PyMuPDF version

1.27.1

Operating system

Linux

Python version

3.14

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions