Python PyPDF2.PdfFileMerger() Examples
The following are 12
code examples of PyPDF2.PdfFileMerger().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
PyPDF2
, or try the search function
.
Example #1
Source File: pdf2pdfocr.py From pdf2pdfocr with Apache License 2.0 | 6 votes |
def join_ocred_pdf(self): # Join PDF files into one file that contains all OCR "backgrounds" text_pdf_file_list = sorted(glob.glob(self.tmp_dir + "{0}*.{1}".format(self.prefix, "pdf"))) self.debug("We have {0} ocr'ed files".format(len(text_pdf_file_list))) if len(text_pdf_file_list) > 0: pdf_merger = PyPDF2.PdfFileMerger() for text_pdf_file in text_pdf_file_list: pdf_merger.append(PyPDF2.PdfFileReader(text_pdf_file, strict=False)) pdf_merger.write(self.tmp_dir + self.prefix + "-ocr.pdf") pdf_merger.close() else: eprint("No PDF files generated after OCR. This is not expected. Aborting.") self.cleanup() exit(1) # self.debug("Joined ocr'ed PDF files")
Example #2
Source File: strokes.py From strokes with GNU General Public License v3.0 | 6 votes |
def gen_pdfs(pages): merger = PdfFileMerger() pdf_files = [] try: for page in pages: pdf = gen_pdf(page.f.getvalue()) pdf_f = io.BytesIO(pdf) pdf_files.append(pdf_f) merger.append(pdf_f) with io.BytesIO() as fout: merger.write(fout) return fout.getvalue() finally: for pdf_f in pdf_files: pdf_f.close()
Example #3
Source File: merge_pdfs.py From TOBIAS with MIT License | 6 votes |
def run_mergepdf(args): check_required(args, ["input", "output"]) print("Number of input files: {0}".format(len(args.input))) #Preliminary checks print("Checking read/write status") check_files(args.input, action="r") check_files([args.output], action="w") #Join pdfs print("Starting to merge PDFs") merger = PdfFileMerger(strict=False) for pdf in args.input: if os.stat(pdf).st_size != 0: #only join files containing plots merger.append(PdfFileReader(pdf)) print("Writing merged file: {0}".format(args.output)) merger.write(args.output) print("PDFs merged successfully!") #--------------------------------------------------------------------------------------------------------#
Example #4
Source File: merge_pdfs.py From python-tools with MIT License | 5 votes |
def merge_pdfs(input_pdfs, output_pdf): """Combine multiple pdfs to single pdf. Args: input_pdfs (list): List of path files. output_pdf (str): Output file. """ pdf_merger = PdfFileMerger() for path in input_pdfs: pdf_merger.append(path) with open(output_pdf, 'wb') as fileobj: pdf_merger.write(fileobj)
Example #5
Source File: plot.py From kicad-automation-scripts with Apache License 2.0 | 5 votes |
def plot_to_directory(pcb, file_format, layers, plot_directory, temp_dir): output_files = [] pcb.set_plot_directory(temp_dir) logger.debug(file_format) if file_format == 'zip_gerbers': # In theory not needed since gerber does not support dril marks, but added just to be sure pcb.plot_options.SetDrillMarksType(pcbnew.PCB_PLOT_PARAMS.NO_DRILL_SHAPE) for layer in layers: logger.debug('plotting layer {} ({}) to Gerber'.format(layer.get_name(), layer.layer_id)) output_filename = layer.plot(pcbnew.PLOT_FORMAT_GERBER) output_files.append(output_filename) drill_file = pcb.plot_drill() if os.path.isfile(drill_file): # No drill file is generated if no holes exist output_files.append(drill_file) zip_file_name = os.path.join(plot_directory, '{}_gerbers.zip'.format(pcb.name)) with zipfile.ZipFile(zip_file_name, 'w') as z: for f in output_files: z.write(f, os.path.relpath(f, plot_directory)) elif file_format == 'pdf': pcb.plot_options.SetDrillMarksType(pcbnew.PCB_PLOT_PARAMS.FULL_DRILL_SHAPE) merger = PdfFileMerger() for layer in layers: logger.debug('plotting layer {} ({}) to PDF'.format(layer.get_name(), layer.layer_id)) output_filename = layer.plot(pcbnew.PLOT_FORMAT_PDF) output_files.append(output_filename) logger.debug(output_filename) merger.append(PdfFileReader(file(output_filename, 'rb')), bookmark=layer.get_name()) drill_map_file = pcb.plot_drill_map() if os.path.isfile(drill_map_file): # No drill map file is generated if no holes exist merger.append(PdfFileReader(file(drill_map_file, 'rb')), bookmark='Drill map') merger.write(plot_directory+'/{}.pdf'.format(pcb.name))
Example #6
Source File: converter.py From python-automation-scripts with GNU General Public License v3.0 | 5 votes |
def mergeIntoOnePDF(path): f=path+"\\" pdf_files=[fileName for fileName in os.listdir(f) if fileName.endswith('.pdf')] print(pdf_files) merger=PdfFileMerger() for filename in pdf_files: merger.append(PdfFileReader(os.path.join(f,filename),"rb")) merger.write(os.path.join(f,"merged_full.pdf"))
Example #7
Source File: download.py From sslibrary-pdf-downloader with MIT License | 5 votes |
def mergePDF(path, num, name): merger = PdfFileMerger() for cpage in range(1, num + 1): try: merger.append(open(path + '/page%d.pdf' % cpage, 'rb')) except: print(cpage) merger.write(path + '/' + name + '.pdf') merger.close()
Example #8
Source File: merge.py From Python-for-Everyday-Life with MIT License | 5 votes |
def merge(source_pdf_paths, target_pdf_path): merger = PyPDF2.PdfFileMerger() # append PDF source files to merger for pdf_path in source_pdf_paths: with open(pdf_path, 'rb') as f: reader = PyPDF2.PdfFileReader(f) merger.append(reader) # write to output file with open(target_pdf_path, 'wb') as g: merger.write(g)
Example #9
Source File: doTaxes.py From taxes-2018 with GNU Lesser General Public License v3.0 | 5 votes |
def fill_forms(): forms.s_1040.fill_in_form() forms.s1_1040.fill_in_form() forms.s3_1040.fill_in_form() forms.s4_1040.fill_in_form() forms.s5_1040.fill_in_form() forms.a_1040.fill_in_form() forms.b_1040.fill_in_form() forms.se_1040.fill_in_form() forms.cez_1040.fill_in_form() forms.sep_ira.fill_in_form() forms.f_8606.fill_in_form() forms.s_1040v.fill_in_form() forms.tax_worksheet.fill_in_form() pdfs = [ os.path.join('filled', 'f1040.pdf'), os.path.join('filled', 'f1040s1.pdf'), os.path.join('filled', 'f1040s3.pdf'), os.path.join('filled', 'f1040s4.pdf'), os.path.join('filled', 'f1040s5.pdf'), os.path.join('filled', 'tax_worksheet.pdf'), os.path.join('filled', 'f1040sa.pdf'), os.path.join('filled', 'f1040sb.pdf'), os.path.join('filled', 'f1040sce.pdf'), os.path.join('filled', 'f1040sse.pdf'), os.path.join('filled', 'f8606.pdf'), os.path.join('filled', 'f1040v.pdf'), os.path.join('filled', 'SEP_IRA_Worksheet.pdf')] merger = PdfFileMerger() for pdf in pdfs: merger.append(open(pdf, 'rb')) with open( os.path.join('filled', 'Tax_Return.pdf'), 'wb' ) as fd: merger.write(fd)
Example #10
Source File: compare_sourcelists.py From drizzlepac with BSD 3-Clause "New" or "Revised" License | 5 votes |
def pdf_merger(output_path, input_paths): """Merges multiple pdf files into a single multi-page pdf file Parameters ---------- output_path : str name of output multipage pdf file input_paths : list list of pdf files to combine Returns ------- nothing. """ pdf_merger = PdfFileMerger() for path in input_paths: pdf_merger.append(path) with open(output_path, 'wb') as fileobj: pdf_merger.write(fileobj) for path in input_paths: os.remove(path) # -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-
Example #11
Source File: pdf2pdfocr.py From pdf2pdfocr with Apache License 2.0 | 4 votes |
def rebuild_and_merge(self): eprint("Warning: metadata wiped from final PDF file (original file is not an unprotected PDF / " "forcing rebuild from extracted images / using deskew)") # Convert presets # Please read http://www.imagemagick.org/Usage/quantize/#colors_two preset_fast = "-threshold 60% -compress Group4" preset_best = "-colors 2 -colorspace gray -normalize -threshold 60% -compress Group4" preset_grayscale = "-threshold 85% -morphology Dilate Diamond -compress Group4" preset_jpeg = "-strip -interlace Plane -gaussian-blur 0.05 -quality 50% -compress JPEG" preset_jpeg2000 = "-quality 32% -compress JPEG2000" # if self.user_convert_params == "fast": convert_params = preset_fast elif self.user_convert_params == "best": convert_params = preset_best elif self.user_convert_params == "grayscale": convert_params = preset_grayscale elif self.user_convert_params == "jpeg": convert_params = preset_jpeg elif self.user_convert_params == "jpeg2000": convert_params = preset_jpeg2000 else: convert_params = self.user_convert_params # Handle default case if convert_params == "": convert_params = preset_best # self.log("Rebuilding PDF from images") rebuild_list = sorted(glob.glob(self.tmp_dir + self.prefix + "*." + self.extension_images)) rebuild_pool = multiprocessing.Pool(self.cpu_to_use) rebuild_pool_map = rebuild_pool.starmap_async(do_rebuild, zip(rebuild_list, itertools.repeat(self.path_convert), itertools.repeat(convert_params), itertools.repeat(self.tmp_dir), itertools.repeat(self.shell_mode))) while not rebuild_pool_map.ready(): pages_processed = len(glob.glob(self.tmp_dir + "REBUILD_" + self.prefix + "*.pdf")) self.log("Waiting for PDF rebuild to complete. {0}/{1} pages completed...".format(pages_processed, self.input_file_number_of_pages)) time.sleep(5) # rebuilt_pdf_file_list = sorted(glob.glob(self.tmp_dir + "REBUILD_{0}*.pdf".format(self.prefix))) self.debug("We have {0} rebuilt PDF files".format(len(rebuilt_pdf_file_list))) if len(rebuilt_pdf_file_list) > 0: pdf_merger = PyPDF2.PdfFileMerger() for rebuilt_pdf_file in rebuilt_pdf_file_list: pdf_merger.append(PyPDF2.PdfFileReader(rebuilt_pdf_file, strict=False)) pdf_merger.write(self.tmp_dir + self.prefix + "-input_unprotected.pdf") pdf_merger.close() else: eprint("No PDF files generated after image rebuilding. This is not expected. Aborting.") self.cleanup() exit(1) self.debug("PDF rebuilding completed") # self._merge_ocr((self.tmp_dir + self.prefix + "-input_unprotected.pdf"), (self.tmp_dir + self.prefix + "-ocr.pdf"), (self.tmp_dir + self.prefix + "-OUTPUT.pdf"), "rebuild-merge")
Example #12
Source File: admin.py From silver with Apache License 2.0 | 4 votes |
def download_selected_documents(self, request, queryset): # NOTE (important): this works only if the pdf is not stored on local # disk as it is fetched via HTTP now = timezone.now() queryset = queryset.filter( state__in=[BillingDocumentBase.STATES.ISSUED, BillingDocumentBase.STATES.CANCELED, BillingDocumentBase.STATES.PAID] ) base_path = '/tmp' merger = PdfFileMerger() for document in queryset: if document.pdf: local_file_path = self._download_pdf(document.pdf.url, base_path) try: reader = PdfFileReader(open(local_file_path, 'rb')) merger.append(reader) logging_ctx = { 'number': document.series_number, 'status': 'ok' } except Exception as e: logging_ctx = { 'number': document.series_number, 'status': 'failed', 'error': e } logger.debug('Admin aggregate PDF generation: %s', logging_ctx) try: os.remove(local_file_path) except OSError as e: if e.errno != errno.ENOENT: raise response = HttpResponse(content_type='application/pdf') filename = 'Billing-Documents-{now}.pdf'.format(now=now) content_disposition = 'attachment; filename="{fn}'.format(fn=filename) response['Content-Disposition'] = content_disposition merger.write(response) merger.close() return response