Python docx.Document() Examples

The following are 30 code examples of docx.Document(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module docx , or try the search function .
Example #1
Source File: test_properties.py    From docxcompose with MIT License 8 votes vote down vote up
def test_finds_run_nodes_in_complex_field_without_separate_correctly(self):
        document = Document(docx_path('complex_field_without_separate.docx'))
        properties = CustomProperties(document).find_docprops_in_document()

        assert 2 == len(properties), \
            'input should contain two complex field docproperties'

        # The "User.FullName" docproperty should be the one without a separate run
        # In this field, there are the following runs: begin, docprop and end
        matches = [prop for prop in properties if prop.name == 'User.FullName']
        assert 1 == len(matches), \
            "There should be only one User.FullName docproperty"
        prop = matches[0]
        assert prop.get_separate_run() is None, \
            "This complex field should not have a separate run."
        assert [] == prop.get_runs_for_update(), \
            "As there is no separate run, there should be no run to update"

        # As there are no separate, all runs should be removed when dissolving
        # the property.
        runs = prop.get_runs_to_replace_field_with_value()
        assert 3 == len(runs)
        assert runs[0] == prop.begin_run
        assert runs[1] == prop.w_r
        assert runs[2] == prop.end_run 
Example #2
Source File: RastLeak_1_2.py    From RastLeak with GNU General Public License v3.0 7 votes vote down vote up
def Analyze_Metadata_pdf(filename):
####### FUNCTION AnalyzeMetadata ######
	pdfFile = PdfFileReader(file(filename, 'rb'))
	metadata = pdfFile.getDocumentInfo()
	print ' - Document: ' + str(filename)
	for meta in metadata:
		value=(metadata[meta])
		print ' - ' + meta + ':' + metadata[meta]
		if meta == "/Author":
			if value not in meta_author_array:
				meta_author_array.append(value)
		elif meta =="/Producer":
			if value not in meta_producer_array:
				meta_producer_array.append(value)
		elif meta == "/Creator":
			if value not in meta_creator_array:
				meta_creator_array.append(value)
	#Group the different arrays in one with all metadata
	metadata_files.append(meta_author_array)
	metadata_files.append(meta_producer_array)
	metadata_files.append(meta_creator_array)
	#print metadata_files
####### FUNCTION AnalyzeMetadata doc ###### 
Example #3
Source File: translate_word_doc.py    From py-googletrans with MIT License 7 votes vote down vote up
def translate_doc(filename, destination='zh-CN', mix=True):
    """
    translate a word document type of file and save the result as document and keep the exactly same file format.
        :param filename: word doc file
        :param destination='zh-CN':
        :param mix=True: if True, will have original language and target language into the same doc. paragraphs by paragraphs.
    """
    def tx(t): return Translator().translate(t, dest=destination).text
    doc = Document(filename)
    for p in doc.paragraphs:
        txd = tx(p.text)

        p.text = p.text + ('\n' + txd if mix else '')

    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                txd = tx(cell.text)
                p.text = cell.text + ('\n' + txd if mix else '')

    f = filename.replace('.doc', destination.lower() + '.doc')
    doc.save(f) 
Example #4
Source File: test_properties.py    From docxcompose with MIT License 6 votes vote down vote up
def test_dissolving_field_when_three_complex_docprop_in_same_paragraph(self):
        document = Document(docx_path('three_props_in_same_paragraph.docx'))
        assert 1 == len(document.paragraphs), 'input file should contains one paragraph'
        paragraph = document.paragraphs[0]
        properties = CustomProperties(document)
        assert 3 == len(properties.find_docprops_in_document()), \
            'input should contain three complex field docproperties'

        text = u'{text} / {num} mor between the fields {text} and some afte the three fields'
        assert text.format(text="I was spellcecked", num=0) == paragraph.text

        properties.dissolve_fields("Text Property")

        assert 1 == len(document.paragraphs)
        assert 1 == len(properties.find_docprops_in_document()), \
            'document should contain one complex field after removal'
        assert text.format(text="I was spellcecked", num=0) == paragraph.text 
Example #5
Source File: RastLeak_1_0.py    From RastLeak with GNU General Public License v3.0 6 votes vote down vote up
def Analyze_Metadata_doc(fileName):
	#Open file
	docxFile = docx.Document(file(fileName,'rb'))
	#Get the structure
	docxInfo= docxFile.core_properties
	#Print the metadata which it wants to display
	attribute = ["author", "category", "comments", "content_status", 
	    "created", "identifier", "keywords", "language", 
	    "last_modified_by", "last_printed", "modified", 
	    "revision", "subject", "title", "version"]
	#run the list in a for loop to print the value of each metadata
	print ' - Document: ' + str(fileName)
	for meta in attribute:
	    metadata = getattr(docxInfo,meta)
	    if metadata:
	        #Separate the values unicode and time date
	        if isinstance(metadata, unicode): 
	            print " \n\t" + str(meta)+": " + str(metadata)
	        elif isinstance(metadata, datetime.datetime):
	            print " \n\t" + str(meta)+": " + str(metadata) 
Example #6
Source File: test_properties.py    From docxcompose with MIT License 6 votes vote down vote up
def test_complex_docprop_fields_with_multiple_textnodes_are_updated(self):
        document = Document(docx_path('spellchecked_docproperty.docx'))
        paragraphs = xpath(document.element.body, '//w:p')
        assert 1 == len(paragraphs), 'input file contains one paragraph'
        assert 1 == len(xpath(document.element.body, '//w:instrText')), \
            'input contains one complex field docproperty'
        w_p = paragraphs[0]

        cached_values = cached_complex_field_values(w_p)
        assert 4 == len(cached_values), \
            'doc property value is scattered over 4 parts'
        assert 'i will be spllchecked!' == ''.join(cached_values)

        CustomProperties(document).update_all()

        w_p = xpath(document.element.body, '//w:p')[0]
        cached_values = cached_complex_field_values(w_p)
        assert 1 == len(cached_values), \
            'doc property value has been reset to one cached value'
        assert 'i will be spllchecked!' == cached_values[0] 
Example #7
Source File: rastleak_2_0.py    From RastLeak with GNU General Public License v3.0 6 votes vote down vote up
def Analyze_Metadata_pdf(filename):
####### FUNCTION AnalyzeMetadata ######
	pdfFile = PdfFileReader(file(filename, 'rb'))
	metadata = pdfFile.getDocumentInfo()
	print ' - Document: ' + str(filename)
	for meta in metadata:
		value=(metadata[meta])
		print ' - ' + meta + ':' + metadata[meta]
		if meta == "/Author":
			if value not in meta_author_array:
				meta_author_array.append(value)
		elif meta =="/Producer":
			if value not in meta_producer_array:
				meta_producer_array.append(value)
		elif meta == "/Creator":
			if value not in meta_creator_array:
				meta_creator_array.append(value)
	#Group the different arrays in one with all metadata
	metadata_files.append(meta_author_array)
	metadata_files.append(meta_producer_array)
	metadata_files.append(meta_creator_array)

####### FUNCTION AnalyzeMetadata doc ###### 
Example #8
Source File: RastLeak_1_3.py    From RastLeak with GNU General Public License v3.0 6 votes vote down vote up
def Analyze_Metadata_pdf(filename):
####### FUNCTION AnalyzeMetadata ######
	pdfFile = PdfFileReader(file(filename, 'rb'))
	metadata = pdfFile.getDocumentInfo()
	print ' - Document: ' + str(filename)
	for meta in metadata:
		value=(metadata[meta])
		print ' - ' + meta + ':' + metadata[meta]
		if meta == "/Author":
			if value not in meta_author_array:
				meta_author_array.append(value)
		elif meta =="/Producer":
			if value not in meta_producer_array:
				meta_producer_array.append(value)
		elif meta == "/Creator":
			if value not in meta_creator_array:
				meta_creator_array.append(value)
	#Group the different arrays in one with all metadata
	metadata_files.append(meta_author_array)
	metadata_files.append(meta_producer_array)
	metadata_files.append(meta_creator_array)
	#print metadata_files
####### FUNCTION AnalyzeMetadata doc ###### 
Example #9
Source File: rastleak_1_4.py    From RastLeak with GNU General Public License v3.0 6 votes vote down vote up
def Analyze_Metadata_pdf(filename):
####### FUNCTION AnalyzeMetadata ######
	pdfFile = PdfFileReader(file(filename, 'rb'))
	metadata = pdfFile.getDocumentInfo()
	print ' - Document: ' + str(filename)
	for meta in metadata:
		value=(metadata[meta])
		print ' - ' + meta + ':' + metadata[meta]
		if meta == "/Author":
			if value not in meta_author_array:
				meta_author_array.append(value)
		elif meta =="/Producer":
			if value not in meta_producer_array:
				meta_producer_array.append(value)
		elif meta == "/Creator":
			if value not in meta_creator_array:
				meta_creator_array.append(value)
	#Group the different arrays in one with all metadata
	metadata_files.append(meta_author_array)
	metadata_files.append(meta_producer_array)
	metadata_files.append(meta_creator_array)

####### FUNCTION AnalyzeMetadata doc ###### 
Example #10
Source File: downloadfiles.py    From RastLeak with GNU General Public License v3.0 6 votes vote down vote up
def Analyze_Metadata_pdf(filename):
####### FUNCTION AnalyzeMetadata ######
	pdfFile = PdfFileReader(file(filename, 'rb'))
	metadata = pdfFile.getDocumentInfo()
	print ' - Document: ' + str(filename)
	for meta in metadata:
		value=(metadata[meta])
		print ' - ' + meta + ':' + metadata[meta]
		if meta == "/Author":
			if value not in meta_author_array:
				meta_author_array.append(value)
		elif meta =="/Producer":
			if value not in meta_producer_array:
				meta_producer_array.append(value)
		elif meta == "/Creator":
			if value not in meta_creator_array:
				meta_creator_array.append(value)
	#Group the different arrays in one with all metadata
	metadata_files.append(meta_author_array)
	metadata_files.append(meta_producer_array)
	metadata_files.append(meta_creator_array)

####### FUNCTION AnalyzeMetadata doc ###### 
Example #11
Source File: downloadfiles.py    From RastLeak with GNU General Public License v3.0 6 votes vote down vote up
def Analyze_Metadata_pdf(filename):
####### FUNCTION AnalyzeMetadata ######
	pdfFile = PdfFileReader(file(filename, 'rb'))
	metadata = pdfFile.getDocumentInfo()
	print ' - Document: ' + str(filename)
	for meta in metadata:
		value=(metadata[meta])
		print ' - ' + meta + ':' + metadata[meta]
		if meta == "/Author":
			if value not in meta_author_array:
				meta_author_array.append(value)
		elif meta =="/Producer":
			if value not in meta_producer_array:
				meta_producer_array.append(value)
		elif meta == "/Creator":
			if value not in meta_creator_array:
				meta_creator_array.append(value)
	#Group the different arrays in one with all metadata
	metadata_files.append(meta_author_array)
	metadata_files.append(meta_producer_array)
	metadata_files.append(meta_creator_array)

####### FUNCTION AnalyzeMetadata doc ###### 
Example #12
Source File: test_properties.py    From docxcompose with MIT License 6 votes vote down vote up
def test_complex_field_gets_updated(self):
        document = Document(docx_path('docproperties.docx'))
        assert 6 == len(document.paragraphs), 'input file should contain 6 paragraphs'

        properties = xpath(document.element.body, './/w:instrText')
        assert 5 == len(properties),\
            'input should contain five complex field docproperties'

        expected_paragraphs = [u'Custom Doc Properties',
                               u'Text: Foo Bar',
                               u'Number: 123',
                               u'Boolean: Y',
                               u'Date: 11.06.2019',
                               u'Float: 1.1']
        actual_paragraphs = [paragraph.text for paragraph in document.paragraphs]
        assert actual_paragraphs == expected_paragraphs

        CustomProperties(document).update("Number Property", 423)

        expected_paragraphs[2] = u'Number: 423'
        actual_paragraphs = [paragraph.text for paragraph in document.paragraphs]
        assert actual_paragraphs == expected_paragraphs 
Example #13
Source File: mock_osp.py    From open-syllabus-project with Apache License 2.0 6 votes vote down vote up
def _write_docx(self, path, content):

        """
        Write a .docx file.

        Args:
            path (str): The file path.
            content (str): The file content.
        """

        docx = Document()
        docx.add_paragraph(content)
        docx.core_properties.created = datetime.now()
        docx.save(path) 
Example #14
Source File: RastLeak_1_1.py    From RastLeak with GNU General Public License v3.0 6 votes vote down vote up
def Analyze_Metadata_doc(fileName):
	#Open file
	docxFile = docx.Document(file(fileName,'rb'))
	#Get the structure
	docxInfo= docxFile.core_properties
	#Print the metadata which it wants to display
	attribute = ["author", "category", "comments", "content_status", 
	    "created", "identifier", "keywords", "language", 
	    "last_modified_by", "last_printed", "modified", 
	    "revision", "subject", "title", "version"]
	#run the list in a for loop to print the value of each metadata
	print ' - Document: ' + str(fileName)
	for meta in attribute:
	    metadata = getattr(docxInfo,meta)
	    if metadata:
	        #Separate the values unicode and time date
	        if isinstance(metadata, unicode): 
	            print " \n\t" + str(meta)+": " + str(metadata)
	        elif isinstance(metadata, datetime.datetime):
	            print " \n\t" + str(meta)+": " + str(metadata) 
Example #15
Source File: logic.py    From janeway with GNU Affero General Public License v3.0 6 votes vote down vote up
def serve_review_file(assignment):
    """
    Produces a word document representing the review form.
    :param assignment: ReviewAssignment object
    :return: HttpStreamingResponse
    """
    elements = assignment.form.elements.all()
    document = Document()
    document.add_heading('Review #{pk}'.format(pk=assignment.pk), 0)
    document.add_heading('Review of `{article_title}` by {reviewer}'.format(article_title=assignment.article.title,
                                                                            reviewer=assignment.reviewer.full_name()),
                         level=1)
    document.add_paragraph()
    document.add_paragraph('Complete the form below, then upload it under the "FILE UPLOAD" section on your review page'
                           '. There is no need to complete the form on the web page if you are uploading this '
                           'document.')
    document.add_paragraph()

    for element in elements:
        document.add_heading(element.name, level=2)
        document.add_paragraph(element.help_text)
        if element.choices:
            choices = render_choices(element.choices)
            table = document.add_table(rows=1, cols=2)
            hdr_cells = table.rows[0].cells
            hdr_cells[0].text = 'Choice'
            hdr_cells[1].text = 'Indication'

            for choice in element.choices.split('|'):
                row_cells = table.add_row().cells
                row_cells[0].text = str(choice)
        document.add_paragraph()

    filename = '{uuid}.docx'.format(uuid=uuid4())
    filepath = os.path.join(settings.BASE_DIR, 'files', 'temp', filename)
    document.save(filepath)
    return files.serve_temp_file(filepath, filename) 
Example #16
Source File: RastLeak_1_2.py    From RastLeak with GNU General Public License v3.0 6 votes vote down vote up
def Analyze_Metadata_pdf(filename):
####### FUNCTION AnalyzeMetadata ######
	pdfFile = PdfFileReader(file(filename, 'rb'))
	metadata = pdfFile.getDocumentInfo()
	print ' - Document: ' + str(filename)
	for meta in metadata:
		value=(metadata[meta])
		print ' - ' + meta + ':' + metadata[meta]
		if meta == "/Author":
			if value not in meta_author_array:
				meta_author_array.append(value)
		elif meta =="/Producer":
			if value not in meta_producer_array:
				meta_producer_array.append(value)
		elif meta == "/Creator":
			if value not in meta_creator_array:
				meta_creator_array.append(value)
	#Group the different arrays in one with all metadata
	metadata_files.append(meta_author_array)
	metadata_files.append(meta_producer_array)
	metadata_files.append(meta_creator_array)
	#print metadata_files
####### FUNCTION AnalyzeMetadata doc ###### 
Example #17
Source File: test_properties.py    From docxcompose with MIT License 6 votes vote down vote up
def test_removes_simple_field_but_keeps_value(self):
        document = Document(docx_path('outdated_docproperty_with_umlauts.docx'))
        assert 1 == len(document.paragraphs), 'input file should contain 1 paragraph'
        fields = xpath(
            document.element.body,
            simple_field_expression(u"F\xfc\xfc"))
        assert 1 == len(fields), 'should contain one simple field docproperty'

        assert u'Hie chund ds property: ' == document.paragraphs[0].text
        assert u'xxx' == fields[0].text

        CustomProperties(document).dissolve_fields(u"F\xfc\xfc")
        fields = xpath(
            document.element.body,
            simple_field_expression(u"F\xfc\xfc"))
        assert 0 == len(fields), 'should not contain any docproperties anymore'
        # when simple field is removed, the value is moved one up in the hierarchy
        assert u'Hie chund ds property: xxx' == document.paragraphs[0].text 
Example #18
Source File: rastleak_2_0.py    From RastLeak with GNU General Public License v3.0 6 votes vote down vote up
def Analyze_Metadata_pdf(filename):
####### FUNCTION AnalyzeMetadata ######
	pdfFile = PdfFileReader(file(filename, 'rb'))
	metadata = pdfFile.getDocumentInfo()
	print ' - Document: ' + str(filename)
	for meta in metadata:
		value=(metadata[meta])
		print ' - ' + meta + ':' + metadata[meta]
		if meta == "/Author":
			if value not in meta_author_array:
				meta_author_array.append(value)
		elif meta =="/Producer":
			if value not in meta_producer_array:
				meta_producer_array.append(value)
		elif meta == "/Creator":
			if value not in meta_creator_array:
				meta_creator_array.append(value)
	#Group the different arrays in one with all metadata
	metadata_files.append(meta_author_array)
	metadata_files.append(meta_producer_array)
	metadata_files.append(meta_creator_array)

####### FUNCTION AnalyzeMetadata doc ###### 
Example #19
Source File: wl_file_area.py    From Wordless with GNU General Public License v3.0 6 votes vote down vote up
def iter_block_items(self, parent):
        """
        Yield each paragraph and table child within *parent*, in document order.
        Each returned value is an instance of either Table or Paragraph. *parent*
        would most commonly be a reference to a main Document object, but
        also works for a _Cell object, which itself can contain paragraphs and tables.
        """
        if isinstance(parent, Document):
            parent_elm = parent.element.body
        elif isinstance(parent, _Cell):
            parent_elm = parent._tc
        else:
            raise ValueError("something's not right")

        for child in parent_elm.iterchildren():
            if isinstance(child, CT_P):
                yield Paragraph(child, parent)
            elif isinstance(child, CT_Tbl):
                yield Table(child, parent) 
Example #20
Source File: RastLeak_1_3.py    From RastLeak with GNU General Public License v3.0 6 votes vote down vote up
def Analyze_Metadata_pdf(filename):
####### FUNCTION AnalyzeMetadata ######
	pdfFile = PdfFileReader(file(filename, 'rb'))
	metadata = pdfFile.getDocumentInfo()
	print ' - Document: ' + str(filename)
	for meta in metadata:
		value=(metadata[meta])
		print ' - ' + meta + ':' + metadata[meta]
		if meta == "/Author":
			if value not in meta_author_array:
				meta_author_array.append(value)
		elif meta =="/Producer":
			if value not in meta_producer_array:
				meta_producer_array.append(value)
		elif meta == "/Creator":
			if value not in meta_creator_array:
				meta_creator_array.append(value)
	#Group the different arrays in one with all metadata
	metadata_files.append(meta_author_array)
	metadata_files.append(meta_producer_array)
	metadata_files.append(meta_creator_array)
	#print metadata_files
####### FUNCTION AnalyzeMetadata doc ###### 
Example #21
Source File: test_properties.py    From docxcompose with MIT License 6 votes vote down vote up
def test_dissolves_all_instances_of_given_field(self):
        document = Document(docx_path('multiple_identical_properties.docx'))
        assert 3 == len(document.paragraphs), 'input file should contain 3 paragraphs'
        assert 3 == len(xpath(document.element.body, './/w:instrText')), \
            'document should contain three complex field docproperties'

        for paragraph in document.paragraphs:
            assert u'Foo' == paragraph.text

        CustomProperties(document).dissolve_fields("Text Property")

        assert 3 == len(document.paragraphs)
        assert 0 == len(xpath(document.element.body, './/w:instrText')), \
            'document should not contain any complex field anymore'
        for paragraph in document.paragraphs:
            assert u'Foo' == paragraph.text, "value should have been kept in document" 
Example #22
Source File: rastleak_1_4.py    From RastLeak with GNU General Public License v3.0 6 votes vote down vote up
def Analyze_Metadata_pdf(filename):
####### FUNCTION AnalyzeMetadata ######
	pdfFile = PdfFileReader(file(filename, 'rb'))
	metadata = pdfFile.getDocumentInfo()
	print ' - Document: ' + str(filename)
	for meta in metadata:
		value=(metadata[meta])
		print ' - ' + meta + ':' + metadata[meta]
		if meta == "/Author":
			if value not in meta_author_array:
				meta_author_array.append(value)
		elif meta =="/Producer":
			if value not in meta_producer_array:
				meta_producer_array.append(value)
		elif meta == "/Creator":
			if value not in meta_creator_array:
				meta_creator_array.append(value)
	#Group the different arrays in one with all metadata
	metadata_files.append(meta_author_array)
	metadata_files.append(meta_producer_array)
	metadata_files.append(meta_creator_array)

####### FUNCTION AnalyzeMetadata doc ###### 
Example #23
Source File: new_hire_orientation.py    From Automate-it with MIT License 6 votes vote down vote up
def generate_document(employee_data, agenda):
    document = Document()
    for emp in employee_data:
        if emp['isDue']:
            name = emp['name']
            document.add_heading('Your New Hire Orientation\n', level=1)
            document.add_paragraph('Dear %s,' % name)
            document.add_paragraph('Welcome to Google Inc. You have been selected for our new hire orientation.')
            document.add_paragraph('Based on your department you will go through below sessions:')
            department = emp['department']
            for session in agenda[department]:
                document.add_paragraph(
                    session , style='ListBullet'
                )
            document.add_paragraph('Thanks,\n HR Manager')
            document.save('orientation_%s.docx' % emp['id']) 
Example #24
Source File: test_properties.py    From docxcompose with MIT License 5 votes vote down vote up
def test_add_utf8_property():
    document = Document(docx_path('docproperties.docx'))
    props = CustomProperties(document)

    props.add('My Text Property', u'f\xfc\xfc'.encode('utf-8'))
    assert props.get('My Text Property') == u'f\xfc\xfc' 
Example #25
Source File: test_properties.py    From docxcompose with MIT License 5 votes vote down vote up
def test_get_doc_properties():
    document = Document(docx_path('docproperties.docx'))
    props = CustomProperties(document)

    assert props['Text Property'] == 'Foo Bar'
    assert props['Number Property'] == 123
    assert props['Boolean Property'] is True
    assert props['Date Property'] == datetime(2019, 6, 11, 10, 0)

    assert props.get('Text Property') == 'Foo Bar'
    assert props.get('Number Property') == 123
    assert props.get('Boolean Property') is True
    assert props.get('Date Property') == datetime(2019, 6, 11, 10, 0) 
Example #26
Source File: test_properties.py    From docxcompose with MIT License 5 votes vote down vote up
def test_set_doc_properties():
    document = Document(docx_path('docproperties.docx'))
    props = CustomProperties(document)

    props['Text Property'] = 'baz'
    assert props['Text Property'] == 'baz'

    props['Boolean Property'] = False
    assert props['Boolean Property'] is False

    props['Number Property'] = 456
    assert props['Number Property'] == 456

    props['Date Property'] = datetime(2019, 10, 20, 12, 0)
    assert props['Date Property'] == datetime(2019, 10, 20, 12, 0) 
Example #27
Source File: test_styles.py    From docxcompose with MIT License 5 votes vote down vote up
def merged_styles():
    composer = Composer(Document(docx_path("styles_en.docx")))
    composer.append(Document(docx_path("styles_de.docx")))
    return composer 
Example #28
Source File: test_properties.py    From docxcompose with MIT License 5 votes vote down vote up
def test_doc_properties_keys():
    document = Document(docx_path('docproperties.docx'))
    props = CustomProperties(document)

    assert props.keys() == [
        'Text Property',
        'Number Property',
        'Boolean Property',
        'Date Property',
        'Float Property',
    ] 
Example #29
Source File: test_properties.py    From docxcompose with MIT License 5 votes vote down vote up
def test_doc_properties_items():
    document = Document(docx_path('docproperties.docx'))
    props = CustomProperties(document)

    assert props.items() == [
        ('Text Property', 'Foo Bar'),
        ('Number Property', 123),
        ('Boolean Property', True),
        ('Date Property', datetime(2019, 6, 11, 10, 0)),
        ('Float Property', 1.1),
    ] 
Example #30
Source File: RastLeak_1_2.py    From RastLeak with GNU General Public License v3.0 5 votes vote down vote up
def Analyze_Metadata_doc(fileName):
	#Open file
	docxFile = docx.Document(file(fileName,'rb'))
	#Get the structure
	docxInfo= docxFile.core_properties
	#Print the metadata which it wants to display
	attribute = ["author", "category", "comments", "content_status", 
	    "created", "identifier", "keywords", "language", 
	    "last_modified_by", "last_printed", "modified", 
	    "revision", "subject", "title", "version"]
	#run the list in a for loop to print the value of each metadata
	print ' - Document: ' + str(fileName)
	for meta in attribute:
	    metadata = getattr(docxInfo,meta)
	    value = metadata([meta])
	    if metadata:
	    	if meta =="/Author":
	    		if value not in meta_author_array:
	    			meta_author_array.append(value)
			elif meta == "/Producer":
				if value not in meta_producer_array:
					meta_producer_array.append(value)
			elif meta =="/Creator":
				if value not in meta_creator_array:
					meta_creator_array.append(value)
	        #Separate the values unicode and time date
	        if isinstance(metadata, unicode): 
	            print " \n\t" + str(meta)+": " + str(metadata)
	        elif isinstance(metadata, datetime.datetime):
	            print " \n\t" + str(meta)+": " + str(metadata)