Python PyPDF2.PdfFileMerger() Examples

The following are 12 code examples for showing how to use PyPDF2.PdfFileMerger(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module PyPDF2 , or try the search function .

Example 1
Project: pdf2pdfocr   Author: LeoFCardoso   File: pdf2pdfocr.py    License: Apache License 2.0 6 votes vote down vote up
def join_ocred_pdf(self):
        # Join PDF files into one file that contains all OCR "backgrounds"
        text_pdf_file_list = sorted(glob.glob(self.tmp_dir + "{0}*.{1}".format(self.prefix, "pdf")))
        self.debug("We have {0} ocr'ed files".format(len(text_pdf_file_list)))
        if len(text_pdf_file_list) > 0:
            pdf_merger = PyPDF2.PdfFileMerger()
            for text_pdf_file in text_pdf_file_list:
                pdf_merger.append(PyPDF2.PdfFileReader(text_pdf_file, strict=False))
            pdf_merger.write(self.tmp_dir + self.prefix + "-ocr.pdf")
            pdf_merger.close()
        else:
            eprint("No PDF files generated after OCR. This is not expected. Aborting.")
            self.cleanup()
            exit(1)
        #
        self.debug("Joined ocr'ed PDF files") 
Example 2
Project: strokes   Author: d33tah   File: strokes.py    License: GNU General Public License v3.0 6 votes vote down vote up
def gen_pdfs(pages):

    merger = PdfFileMerger()
    pdf_files = []
    try:
        for page in pages:
            pdf = gen_pdf(page.f.getvalue())
            pdf_f = io.BytesIO(pdf)
            pdf_files.append(pdf_f)
            merger.append(pdf_f)

        with io.BytesIO() as fout:
            merger.write(fout)
            return fout.getvalue()
    finally:
        for pdf_f in pdf_files:
            pdf_f.close() 
Example 3
Project: TOBIAS   Author: loosolab   File: merge_pdfs.py    License: MIT License 6 votes vote down vote up
def run_mergepdf(args):

	check_required(args, ["input", "output"])
	print("Number of input files: {0}".format(len(args.input)))

	#Preliminary checks
	print("Checking read/write status")
	check_files(args.input, action="r")
	check_files([args.output], action="w")

	#Join pdfs
	print("Starting to merge PDFs")
	merger = PdfFileMerger(strict=False)
	for pdf in args.input:
		if os.stat(pdf).st_size != 0:	#only join files containing plots
			merger.append(PdfFileReader(pdf))
	
	print("Writing merged file: {0}".format(args.output))
	merger.write(args.output)

	print("PDFs merged successfully!")


#--------------------------------------------------------------------------------------------------------# 
Example 4
Project: python-tools   Author: lucasayres   File: merge_pdfs.py    License: MIT License 5 votes vote down vote up
def merge_pdfs(input_pdfs, output_pdf):
    """Combine multiple pdfs to single pdf.

    Args:
        input_pdfs (list): List of path files.
        output_pdf (str): Output file.

    """
    pdf_merger = PdfFileMerger()
    for path in input_pdfs:
        pdf_merger.append(path)
    with open(output_pdf, 'wb') as fileobj:
        pdf_merger.write(fileobj) 
Example 5
Project: kicad-automation-scripts   Author: productize   File: plot.py    License: Apache License 2.0 5 votes vote down vote up
def plot_to_directory(pcb, file_format, layers, plot_directory, temp_dir):
    output_files = []

    pcb.set_plot_directory(temp_dir)

    logger.debug(file_format)

    if file_format == 'zip_gerbers':
        # In theory not needed since gerber does not support dril marks, but added just to be sure
        pcb.plot_options.SetDrillMarksType(pcbnew.PCB_PLOT_PARAMS.NO_DRILL_SHAPE)

        for layer in layers:
            logger.debug('plotting layer {} ({}) to Gerber'.format(layer.get_name(), layer.layer_id))
            output_filename = layer.plot(pcbnew.PLOT_FORMAT_GERBER)
            output_files.append(output_filename)

        drill_file = pcb.plot_drill()
        if os.path.isfile(drill_file): # No drill file is generated if no holes exist
            output_files.append(drill_file)

        zip_file_name = os.path.join(plot_directory, '{}_gerbers.zip'.format(pcb.name))
        with zipfile.ZipFile(zip_file_name, 'w') as z:
            for f in output_files:
                z.write(f, os.path.relpath(f, plot_directory))

    elif file_format == 'pdf':
        pcb.plot_options.SetDrillMarksType(pcbnew.PCB_PLOT_PARAMS.FULL_DRILL_SHAPE)
        merger = PdfFileMerger()
        for layer in layers:
            logger.debug('plotting layer {} ({}) to PDF'.format(layer.get_name(), layer.layer_id))
            output_filename = layer.plot(pcbnew.PLOT_FORMAT_PDF)
            output_files.append(output_filename)
            logger.debug(output_filename)
            merger.append(PdfFileReader(file(output_filename, 'rb')), bookmark=layer.get_name())

        drill_map_file = pcb.plot_drill_map()
        if os.path.isfile(drill_map_file): # No drill map file is generated if no holes exist
            merger.append(PdfFileReader(file(drill_map_file, 'rb')), bookmark='Drill map')

        merger.write(plot_directory+'/{}.pdf'.format(pcb.name)) 
Example 6
Project: python-automation-scripts   Author: avidLearnerInProgress   File: converter.py    License: GNU General Public License v3.0 5 votes vote down vote up
def mergeIntoOnePDF(path):
    f=path+"\\"
    pdf_files=[fileName for fileName in os.listdir(f) if fileName.endswith('.pdf')]
    print(pdf_files)
    merger=PdfFileMerger()
    for filename in pdf_files:
        merger.append(PdfFileReader(os.path.join(f,filename),"rb"))
    merger.write(os.path.join(f,"merged_full.pdf")) 
Example 7
Project: sslibrary-pdf-downloader   Author: 0NG   File: download.py    License: MIT License 5 votes vote down vote up
def mergePDF(path, num, name):
    merger = PdfFileMerger()
    for cpage in range(1, num + 1):
        try:
            merger.append(open(path + '/page%d.pdf' % cpage, 'rb'))
        except:
            print(cpage)
    merger.write(path + '/' + name + '.pdf')
    merger.close() 
Example 8
Project: Python-for-Everyday-Life   Author: PacktPublishing   File: merge.py    License: MIT License 5 votes vote down vote up
def merge(source_pdf_paths, target_pdf_path):
    merger = PyPDF2.PdfFileMerger()

    # append PDF source files to merger
    for pdf_path in source_pdf_paths:
        with open(pdf_path, 'rb') as f:
            reader = PyPDF2.PdfFileReader(f)
            merger.append(reader)

    # write to output file
    with open(target_pdf_path, 'wb') as g:
        merger.write(g) 
Example 9
Project: taxes-2018   Author: pyTaxPrep   File: doTaxes.py    License: GNU Lesser General Public License v3.0 5 votes vote down vote up
def fill_forms():
    forms.s_1040.fill_in_form()
    forms.s1_1040.fill_in_form()
    forms.s3_1040.fill_in_form()
    forms.s4_1040.fill_in_form()
    forms.s5_1040.fill_in_form()
    forms.a_1040.fill_in_form()
    forms.b_1040.fill_in_form()
    forms.se_1040.fill_in_form()
    forms.cez_1040.fill_in_form()
    forms.sep_ira.fill_in_form()
    forms.f_8606.fill_in_form()
    forms.s_1040v.fill_in_form()
    forms.tax_worksheet.fill_in_form()

    pdfs = [ os.path.join('filled', 'f1040.pdf'),
             os.path.join('filled', 'f1040s1.pdf'),
             os.path.join('filled', 'f1040s3.pdf'),
             os.path.join('filled', 'f1040s4.pdf'),
             os.path.join('filled', 'f1040s5.pdf'),
             os.path.join('filled', 'tax_worksheet.pdf'),
             os.path.join('filled', 'f1040sa.pdf'),
             os.path.join('filled', 'f1040sb.pdf'),
             os.path.join('filled', 'f1040sce.pdf'),
             os.path.join('filled', 'f1040sse.pdf'),
             os.path.join('filled', 'f8606.pdf'),
             os.path.join('filled', 'f1040v.pdf'),
             os.path.join('filled', 'SEP_IRA_Worksheet.pdf')]

    merger = PdfFileMerger()
    for pdf in pdfs:
        merger.append(open(pdf, 'rb'))

    with open( os.path.join('filled', 'Tax_Return.pdf'), 'wb' ) as fd:
        merger.write(fd) 
Example 10
Project: drizzlepac   Author: spacetelescope   File: compare_sourcelists.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def pdf_merger(output_path, input_paths):
    """Merges multiple pdf files into a single multi-page pdf file
    
    Parameters
    ----------
    output_path : str
        name of output multipage pdf file
        
    input_paths : list
        list of pdf files to combine
    
    Returns
    -------
    nothing.
    """
    pdf_merger = PdfFileMerger()

    for path in input_paths:
        pdf_merger.append(path)

    with open(output_path, 'wb') as fileobj:
        pdf_merger.write(fileobj)

    for path in input_paths:
        os.remove(path)


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- 
Example 11
Project: pdf2pdfocr   Author: LeoFCardoso   File: pdf2pdfocr.py    License: Apache License 2.0 4 votes vote down vote up
def rebuild_and_merge(self):
        eprint("Warning: metadata wiped from final PDF file (original file is not an unprotected PDF / "
               "forcing rebuild from extracted images / using deskew)")
        # Convert presets
        # Please read http://www.imagemagick.org/Usage/quantize/#colors_two
        preset_fast = "-threshold 60% -compress Group4"
        preset_best = "-colors 2 -colorspace gray -normalize -threshold 60% -compress Group4"
        preset_grayscale = "-threshold 85% -morphology Dilate Diamond -compress Group4"
        preset_jpeg = "-strip -interlace Plane -gaussian-blur 0.05 -quality 50% -compress JPEG"
        preset_jpeg2000 = "-quality 32% -compress JPEG2000"
        #
        if self.user_convert_params == "fast":
            convert_params = preset_fast
        elif self.user_convert_params == "best":
            convert_params = preset_best
        elif self.user_convert_params == "grayscale":
            convert_params = preset_grayscale
        elif self.user_convert_params == "jpeg":
            convert_params = preset_jpeg
        elif self.user_convert_params == "jpeg2000":
            convert_params = preset_jpeg2000
        else:
            convert_params = self.user_convert_params
        # Handle default case
        if convert_params == "":
            convert_params = preset_best
        #
        self.log("Rebuilding PDF from images")
        rebuild_list = sorted(glob.glob(self.tmp_dir + self.prefix + "*." + self.extension_images))
        rebuild_pool = multiprocessing.Pool(self.cpu_to_use)
        rebuild_pool_map = rebuild_pool.starmap_async(do_rebuild,
                                                      zip(rebuild_list,
                                                          itertools.repeat(self.path_convert),
                                                          itertools.repeat(convert_params),
                                                          itertools.repeat(self.tmp_dir),
                                                          itertools.repeat(self.shell_mode)))
        while not rebuild_pool_map.ready():
            pages_processed = len(glob.glob(self.tmp_dir + "REBUILD_" + self.prefix + "*.pdf"))
            self.log("Waiting for PDF rebuild to complete. {0}/{1} pages completed...".format(pages_processed, self.input_file_number_of_pages))
            time.sleep(5)
        #
        rebuilt_pdf_file_list = sorted(glob.glob(self.tmp_dir + "REBUILD_{0}*.pdf".format(self.prefix)))
        self.debug("We have {0} rebuilt PDF files".format(len(rebuilt_pdf_file_list)))
        if len(rebuilt_pdf_file_list) > 0:
            pdf_merger = PyPDF2.PdfFileMerger()
            for rebuilt_pdf_file in rebuilt_pdf_file_list:
                pdf_merger.append(PyPDF2.PdfFileReader(rebuilt_pdf_file, strict=False))
            pdf_merger.write(self.tmp_dir + self.prefix + "-input_unprotected.pdf")
            pdf_merger.close()
        else:
            eprint("No PDF files generated after image rebuilding. This is not expected. Aborting.")
            self.cleanup()
            exit(1)
        self.debug("PDF rebuilding completed")
        #
        self._merge_ocr((self.tmp_dir + self.prefix + "-input_unprotected.pdf"),
                        (self.tmp_dir + self.prefix + "-ocr.pdf"),
                        (self.tmp_dir + self.prefix + "-OUTPUT.pdf"), "rebuild-merge") 
Example 12
Project: silver   Author: silverapp   File: admin.py    License: Apache License 2.0 4 votes vote down vote up
def download_selected_documents(self, request, queryset):
        # NOTE (important): this works only if the pdf is not stored on local
        # disk as it is fetched via HTTP
        now = timezone.now()

        queryset = queryset.filter(
            state__in=[BillingDocumentBase.STATES.ISSUED,
                       BillingDocumentBase.STATES.CANCELED,
                       BillingDocumentBase.STATES.PAID]
        )

        base_path = '/tmp'
        merger = PdfFileMerger()
        for document in queryset:
            if document.pdf:
                local_file_path = self._download_pdf(document.pdf.url, base_path)
                try:
                    reader = PdfFileReader(open(local_file_path, 'rb'))
                    merger.append(reader)
                    logging_ctx = {
                        'number': document.series_number,
                        'status': 'ok'
                    }
                except Exception as e:
                    logging_ctx = {
                        'number': document.series_number,
                        'status': 'failed',
                        'error': e
                    }

                logger.debug('Admin aggregate PDF generation: %s', logging_ctx)

                try:
                    os.remove(local_file_path)
                except OSError as e:
                    if e.errno != errno.ENOENT:
                        raise

        response = HttpResponse(content_type='application/pdf')
        filename = 'Billing-Documents-{now}.pdf'.format(now=now)
        content_disposition = 'attachment; filename="{fn}'.format(fn=filename)
        response['Content-Disposition'] = content_disposition

        merger.write(response)
        merger.close()

        return response