Python pdfminer.pdftypes.resolve1() Examples

The following are 6 code examples of pdfminer.pdftypes.resolve1(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pdfminer.pdftypes , or try the search function .
Example #1
Source File: pdftk.py    From docassemble with MIT License 6 votes vote down vote up
def read_fields(pdffile):
    import string
    printable = set(string.printable)
    outfields = list()
    fp = open(pdffile, 'rb')
    id_to_page = dict()
    parser = PDFParser(fp)
    doc = PDFDocument(parser)
    pageno = 1;
    for page in PDFPage.create_pages(doc):
        id_to_page[page.pageid] = pageno
        pageno += 1
    if 'AcroForm' not in doc.catalog:
        return None
    fields = resolve1(doc.catalog['AcroForm'])['Fields']
    recursively_add_fields(fields, id_to_page, outfields)
    return sorted(outfields, key=fieldsorter) 
Example #2
Source File: metadataPDF.py    From Yuki-Chan-The-Auto-Pentest with MIT License 5 votes vote down vote up
def getData(self):
		doc = PDFDocument()
		fp = file(self.fname, 'rb')
		parser = PDFParser(fp)
		try:
			parser.set_document(doc)
			doc.set_parser(parser)
			doc.initialize(self.password)
		except:
			return "error"
		
		parser.close()
		fp.close()
		#try:
		#	metadata = resolve1(doc.catalog['Metadata'])
		#	return "ok"
		#except:
		#	print "[x] Error in PDF extractor, Metadata catalog"
		try:
			for xref in doc.xrefs:
				info_ref=xref.trailer.get('Info')
				if info_ref:
					info=resolve1(info_ref)
				self.metadata=info
				self.raw = info
			if self.raw == None:
				return "Empty metadata"
			else:
				return "ok"
		except Exception,e:
			return e 
			print "\t [x] Error in PDF extractor, Trailer Info" 
Example #3
Source File: metadataPDF.py    From ITWSV with MIT License 5 votes vote down vote up
def getData(self):
		doc = PDFDocument()
		fp = file(self.fname, 'rb')
		parser = PDFParser(fp)
		try:
			parser.set_document(doc)
			doc.set_parser(parser)
			doc.initialize(self.password)
		except:
			return "error"
		
		parser.close()
		fp.close()
		#try:
		#	metadata = resolve1(doc.catalog['Metadata'])
		#	return "ok"
		#except:
		#	print "[x] Error in PDF extractor, Metadata catalog"
		try:
			for xref in doc.xrefs:
				info_ref=xref.trailer.get('Info')
				if info_ref:
					info=resolve1(info_ref)
				self.metadata=info
				self.raw = info
			if self.raw == None:
				return "Empty metadata"
			else:
				return "ok"
		except Exception,e:
			return e 
			print "\t [x] Error in PDF extractor, Trailer Info" 
Example #4
Source File: extract_pdf.py    From carpe with Apache License 2.0 5 votes vote down vote up
def __init__(self, doc, attrs):
        self.doc = doc
        # self.attrs = dict_value(attrs)
        # self.resources = resolve1(self.attrs.get('Resources', dict())) 
Example #5
Source File: metadataPDF.py    From EasY_HaCk with Apache License 2.0 5 votes vote down vote up
def getData(self):
		doc = PDFDocument()
		fp = file(self.fname, 'rb')
		parser = PDFParser(fp)
		try:
			parser.set_document(doc)
			doc.set_parser(parser)
			doc.initialize(self.password)
		except:
			return "error"
		
		parser.close()
		fp.close()
		#try:
		#	metadata = resolve1(doc.catalog['Metadata'])
		#	return "ok"
		#except:
		#	print "[x] Error in PDF extractor, Metadata catalog"
		try:
			for xref in doc.xrefs:
				info_ref=xref.trailer.get('Info')
				if info_ref:
					info=resolve1(info_ref)
				self.metadata=info
				self.raw = info
			if self.raw == None:
				return "Empty metadata"
			else:
				return "ok"
		except Exception,e:
			return e 
			print "\t [x] Error in PDF extractor, Trailer Info" 
Example #6
Source File: pdfsheet.py    From avrae with GNU General Public License v3.0 4 votes vote down vote up
def main():
    fn = input("PDF filename: ")
    character = {}
    with open(fn, mode='rb') as f:
        parser = PDFParser(f)
        doc = PDFDocument(parser)
        try:
            fields = resolve1(doc.catalog['AcroForm'])
            fields = resolve1(fields['Fields'])
        except:
            raise Exception('This is not a form-fillable character sheet!')
        for i in fields:
            field = resolve1(i)
            name, value = field.get('T'), field.get('V')
            if isinstance(value, PSLiteral):
                value = value.name
            elif value is not None:
                try:
                    value = value.decode('iso-8859-1').strip()
                except:
                    pass

            character[name.decode('iso-8859-1').strip()] = value

        print(character)
    with open('./output/pdfsheet-test.json', mode='w') as f:
        json.dump(character, f, skipkeys=True, sort_keys=True, indent=4)