22import os
33from base64 import decodebytes
44from mimetypes import guess_type
5- import fitz
5+ import pikepdf
66
77
88class Inputs :
@@ -48,7 +48,7 @@ def __init__(
4848 self .file_extension = guess_type (file )[0 ]
4949
5050 if input_type == "dummy" :
51- self .file_object = ""
51+ self .file_object = None
5252 self .input_type = ""
5353 self .filename = ""
5454 self .filepath = ""
@@ -60,6 +60,8 @@ def __init__(
6060 )
6161
6262 if self .file_extension == "application/pdf" :
63+ self .check_pdf_open ()
64+
6365 count_pages = self .count_pdf_pages ()
6466
6567 if cut_pdf is True :
@@ -99,47 +101,44 @@ def count_pdf_pages(self):
99101 :return: Number of pages in the Input file for pdfs
100102 """
101103 self .file_object .seek (0 )
102- src = fitz .open (
103- stream = self .file_object .read (),
104- filetype = self .file_extension ,
105- filename = self .filename ,
106- )
107- return len (src )
104+ with pikepdf .open (self .file_object ) as pdf :
105+ return len (pdf .pages )
108106
109107 def merge_pdf_pages (self , pages_number ):
110108 """
111109 :param pages_number: List of pages number to use for merging in the original pdf
112110 :return: (void) Set the Input.file with the reconstructed pdf stream
113111 """
114112 self .file_object .seek (0 )
115- src = fitz .open (stream = self .file_object .read (), filetype = "pdf" )
116- doc = fitz .open ()
117- pdf_pages = [src [n ] for n in pages_number ]
118- for spage in pdf_pages :
119- width = spage .MediaBoxSize [0 ]
120- height = spage .MediaBoxSize [1 ]
121- r = fitz .Rect (0 , 0 , width , height )
122- page = doc .new_page (- 1 , width = width , height = height )
123- try :
124- page .showPDFpage (r , src , spage .number )
125- except :
126- pass
113+ new_pdf = pikepdf .Pdf .new ()
114+ with pikepdf .open (self .file_object ) as pdf :
115+ for page_n in pages_number :
116+ new_pdf .pages .append (pdf .pages [page_n ])
127117 self .file_object .close ()
128- self .file_object = io .BytesIO (doc .write ())
118+ self .file_object = io .BytesIO ()
119+ new_pdf .save (self .file_object )
129120
130121 def check_if_document_is_empty (self ):
131122 """
132123 :return: (void) Check if the document contain only empty pages
133124 """
125+ self .file_object .seek (0 )
126+ with pikepdf .open (self .file_object ) as pdf :
127+ for _ , page in enumerate (pdf .pages ):
128+ if (
129+ "/Font" in page ["/Resources" ].keys ()
130+ or "/XObject" in page ["/Resources" ].keys ()
131+ or page ["/Contents" ]["/Length" ] > 1000
132+ ):
133+ return
134+ raise Exception ("PDF pages are empty" )
134135
136+ def check_pdf_open (self ):
137+ """
138+ :return: (void) Check if the document can be opened using pikepdf
139+ """
135140 self .file_object .seek (0 )
136- src = fitz .open (stream = self .file_object .read (), filetype = "pdf" )
137- fitz .open ()
138- for page in src :
139- if (
140- len (page .get_images ()) > 0
141- or len (page .get_cdrawings ()) > 1
142- or len (page .get_text ()) > 0
143- ):
144- return
145- raise Exception ("PDF pages are empty" )
141+ try :
142+ pikepdf .open (self .file_object )
143+ except Exception as err :
144+ raise Exception ("Couldn't open PDF file. %s" % err )
0 commit comments