Python pdfminer.pdftypes.resolve1() Examples
The following are 6
code examples of pdfminer.pdftypes.resolve1().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pdfminer.pdftypes
, or try the search function
.
Example #1
Source File: pdftk.py From docassemble with MIT License | 6 votes |
def read_fields(pdffile): import string printable = set(string.printable) outfields = list() fp = open(pdffile, 'rb') id_to_page = dict() parser = PDFParser(fp) doc = PDFDocument(parser) pageno = 1; for page in PDFPage.create_pages(doc): id_to_page[page.pageid] = pageno pageno += 1 if 'AcroForm' not in doc.catalog: return None fields = resolve1(doc.catalog['AcroForm'])['Fields'] recursively_add_fields(fields, id_to_page, outfields) return sorted(outfields, key=fieldsorter)
Example #2
Source File: metadataPDF.py From Yuki-Chan-The-Auto-Pentest with MIT License | 5 votes |
def getData(self): doc = PDFDocument() fp = file(self.fname, 'rb') parser = PDFParser(fp) try: parser.set_document(doc) doc.set_parser(parser) doc.initialize(self.password) except: return "error" parser.close() fp.close() #try: # metadata = resolve1(doc.catalog['Metadata']) # return "ok" #except: # print "[x] Error in PDF extractor, Metadata catalog" try: for xref in doc.xrefs: info_ref=xref.trailer.get('Info') if info_ref: info=resolve1(info_ref) self.metadata=info self.raw = info if self.raw == None: return "Empty metadata" else: return "ok" except Exception,e: return e print "\t [x] Error in PDF extractor, Trailer Info"
Example #3
Source File: metadataPDF.py From ITWSV with MIT License | 5 votes |
def getData(self): doc = PDFDocument() fp = file(self.fname, 'rb') parser = PDFParser(fp) try: parser.set_document(doc) doc.set_parser(parser) doc.initialize(self.password) except: return "error" parser.close() fp.close() #try: # metadata = resolve1(doc.catalog['Metadata']) # return "ok" #except: # print "[x] Error in PDF extractor, Metadata catalog" try: for xref in doc.xrefs: info_ref=xref.trailer.get('Info') if info_ref: info=resolve1(info_ref) self.metadata=info self.raw = info if self.raw == None: return "Empty metadata" else: return "ok" except Exception,e: return e print "\t [x] Error in PDF extractor, Trailer Info"
Example #4
Source File: extract_pdf.py From carpe with Apache License 2.0 | 5 votes |
def __init__(self, doc, attrs): self.doc = doc # self.attrs = dict_value(attrs) # self.resources = resolve1(self.attrs.get('Resources', dict()))
Example #5
Source File: metadataPDF.py From EasY_HaCk with Apache License 2.0 | 5 votes |
def getData(self): doc = PDFDocument() fp = file(self.fname, 'rb') parser = PDFParser(fp) try: parser.set_document(doc) doc.set_parser(parser) doc.initialize(self.password) except: return "error" parser.close() fp.close() #try: # metadata = resolve1(doc.catalog['Metadata']) # return "ok" #except: # print "[x] Error in PDF extractor, Metadata catalog" try: for xref in doc.xrefs: info_ref=xref.trailer.get('Info') if info_ref: info=resolve1(info_ref) self.metadata=info self.raw = info if self.raw == None: return "Empty metadata" else: return "ok" except Exception,e: return e print "\t [x] Error in PDF extractor, Trailer Info"
Example #6
Source File: pdfsheet.py From avrae with GNU General Public License v3.0 | 4 votes |
def main(): fn = input("PDF filename: ") character = {} with open(fn, mode='rb') as f: parser = PDFParser(f) doc = PDFDocument(parser) try: fields = resolve1(doc.catalog['AcroForm']) fields = resolve1(fields['Fields']) except: raise Exception('This is not a form-fillable character sheet!') for i in fields: field = resolve1(i) name, value = field.get('T'), field.get('V') if isinstance(value, PSLiteral): value = value.name elif value is not None: try: value = value.decode('iso-8859-1').strip() except: pass character[name.decode('iso-8859-1').strip()] = value print(character) with open('./output/pdfsheet-test.json', mode='w') as f: json.dump(character, f, skipkeys=True, sort_keys=True, indent=4)