org.apache.poi.hpsf.SummaryInformation Java Examples
The following examples show how to use
org.apache.poi.hpsf.SummaryInformation.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WordUtil.java From javatech with Creative Commons Attribution Share Alike 4.0 International | 7 votes |
public static void setDocProperties(String filename) throws IOException { System.out.println("filename = [" + filename + "]"); FileInputStream fis = new FileInputStream(new File(filename)); HWPFDocument doc = new HWPFDocument(fis); SummaryInformation summaryInformation = doc.getSummaryInformation(); summaryInformation.setAuthor("张鹏"); summaryInformation.setLastAuthor("张鹏"); DocumentSummaryInformation documentSummaryInformation = doc.getDocumentSummaryInformation(); documentSummaryInformation.setCompany("张鹏"); documentSummaryInformation.setDocumentVersion("1"); FileOutputStream fos = new FileOutputStream(new File(filename)); doc.write(fos); fos.close(); doc.close(); fis.close(); }
Example #2
Source File: POIDocument.java From lams with GNU General Public License v2.0 | 6 votes |
/** * Find, and create objects for, the standard * Document Information Properties (HPSF). * If a given property set is missing or corrupt, * it will remain null; */ protected void readProperties() { if (initialized) { return; } DocumentSummaryInformation dsi = readPropertySet(DocumentSummaryInformation.class, DocumentSummaryInformation.DEFAULT_STREAM_NAME); if (dsi != null) { dsInf = dsi; } SummaryInformation si = readPropertySet(SummaryInformation.class, SummaryInformation.DEFAULT_STREAM_NAME); if (si != null) { sInf = si; } // Mark the fact that we've now loaded up the properties initialized = true; }
Example #3
Source File: SummaryInformationSanitiser.java From DocBleach with MIT License | 6 votes |
@Override public boolean test(Entry entry) { String entryName = entry.getName(); if (!SummaryInformation.DEFAULT_STREAM_NAME.equals(entryName)) { return true; } if (!(entry instanceof DocumentEntry)) { return true; } DocumentEntry dsiEntry = (DocumentEntry) entry; sanitizeSummaryInformation(session, dsiEntry); return true; }
Example #4
Source File: SummaryInformationSanitiser.java From DocBleach with MIT License | 6 votes |
protected void sanitizeSummaryInformation(BleachSession session, DocumentEntry dsiEntry) { if (dsiEntry.getSize() <= 0) { return; } try (DocumentInputStream dis = new DocumentInputStream(dsiEntry)) { PropertySet ps = new PropertySet(dis); // Useful for debugging purposes // LOGGER.debug("PropertySet sections: {}", ps.getSections()); SummaryInformation dsi = new SummaryInformation(ps); sanitizeSummaryInformation(session, dsi); } catch (NoPropertySetStreamException | UnexpectedPropertySetTypeException | IOException e) { LOGGER.error("An error occured while trying to sanitize the document entry", e); } }
Example #5
Source File: SummaryInformationSanitiser.java From DocBleach with MIT License | 6 votes |
protected void sanitizeComments(BleachSession session, SummaryInformation dsi) { String comments = dsi.getComments(); if (comments == null || comments.isEmpty()) { return; } LOGGER.trace("Removing the document's Comments (was '{}')", comments); dsi.removeComments(); Threat threat = Threat.builder() .type(ThreatType.UNRECOGNIZED_CONTENT) .severity(ThreatSeverity.LOW) .action(ThreatAction.REMOVE) .location("Summary Information - Comment") .details("Comment was: '" + comments + "'") .build(); session.recordThreat(threat); }
Example #6
Source File: SummaryInformationSanitiserTest.java From DocBleach with MIT License | 6 votes |
@Test void test1() { // Test an invalid stream, should be ignored Entry entry = mock(Entry.class); doReturn("\005RandomString").when(entry).getName(); assertTrue(instance.test(entry)); verify(instance, never()).sanitizeSummaryInformation(eq(session), (DocumentEntry) any()); // Test a valid stream name, but wrong type (should be ignored) reset(entry); doReturn(SummaryInformation.DEFAULT_STREAM_NAME).when(entry).getName(); assertTrue(instance.test(entry)); verify(instance, never()).sanitizeSummaryInformation(eq(session), (DocumentEntry) any()); reset(instance, entry); // Test a valid SummaryInformation name DocumentEntry docEntry = mock(DocumentEntry.class); doReturn(SummaryInformation.DEFAULT_STREAM_NAME).when(docEntry).getName(); doNothing().when(instance).sanitizeSummaryInformation(session, docEntry); assertTrue(instance.test(docEntry)); verify(instance, atLeastOnce()).sanitizeSummaryInformation(session, docEntry); }
Example #7
Source File: HPSFPropertiesExtractor.java From lams with GNU General Public License v2.0 | 5 votes |
public String getSummaryInformationText() { if(document == null) { // event based extractor does not have a document return ""; } SummaryInformation si = document.getSummaryInformation(); // Just normal properties return getPropertiesText(si); }
Example #8
Source File: POIDocument.java From lams with GNU General Public License v2.0 | 5 votes |
/** * Fetch the Summary Information of the document * * @return The Summary information for the document or null * if it could not be read for this document. */ public SummaryInformation getSummaryInformation() { if(!initialized) { readProperties(); } return sInf; }
Example #9
Source File: HSSFWorkbook.java From lams with GNU General Public License v2.0 | 5 votes |
/** Writes the workbook out to a brand new, empty POIFS */ private void write(NPOIFSFileSystem fs) throws IOException { // For tracking what we've written out, used if we're // going to be preserving nodes List<String> excepts = new ArrayList<String>(1); // Write out the Workbook stream fs.createDocument(new ByteArrayInputStream(getBytes()), "Workbook"); // Write out our HPFS properties, if we have them writeProperties(fs, excepts); if (preserveNodes) { // Don't write out the old Workbook, we'll be doing our new one // If the file had an "incorrect" name for the workbook stream, // don't write the old one as we'll use the correct name shortly excepts.addAll(Arrays.asList(WORKBOOK_DIR_ENTRY_NAMES)); // summary information has been already written via writeProperties and might go in a // different stream, if the file is cryptoapi encrypted excepts.addAll(Arrays.asList( DocumentSummaryInformation.DEFAULT_STREAM_NAME, SummaryInformation.DEFAULT_STREAM_NAME, getEncryptedPropertyStreamName() )); // Copy over all the other nodes to our new poifs EntryUtils.copyNodes( new FilteringDirectoryNode(getDirectory(), excepts) , new FilteringDirectoryNode(fs.getRoot(), excepts) ); // YK: preserve StorageClsid, it is important for embedded workbooks, // see Bugzilla 47920 fs.getRoot().setStorageClsid(getDirectory().getStorageClsid()); } }
Example #10
Source File: ObjectRemoverTest.java From DocBleach with MIT License | 5 votes |
@Test void testKeepsEverythingElse() { Entry entry = mock(Entry.class); doReturn(SummaryInformation.DEFAULT_STREAM_NAME).when(entry).getName(); assertTrue(instance.test(entry), "Non-object entries should be ignored"); BleachTestBase.assertThreatsFound(session, 0); reset(session); doReturn("RandomName").when(entry).getName(); assertTrue(instance.test(entry), "Non-object entries should be ignored"); BleachTestBase.assertThreatsFound(session, 0); reset(session); }
Example #11
Source File: SummaryInformationSanitiser.java From DocBleach with MIT License | 5 votes |
protected void sanitizeTemplate(BleachSession session, SummaryInformation dsi) { String template = dsi.getTemplate(); if (NORMAL_TEMPLATE.equals(template)) { return; } if (template == null) { return; } LOGGER.trace("Removing the document's template (was '{}')", template); dsi.removeTemplate(); ThreatSeverity severity = isExternalTemplate(template) ? ThreatSeverity.HIGH : ThreatSeverity.LOW; Threat threat = Threat.builder() .type(ThreatType.EXTERNAL_CONTENT) .severity(severity) .action(ThreatAction.REMOVE) .location("Summary Information - Template") .details("Template was: '" + template + "'") .build(); session.recordThreat(threat); }
Example #12
Source File: MacroRemoverTest.java From DocBleach with MIT License | 5 votes |
@Test void testKeepsEverythingElse() { Entry entry = mock(Entry.class); doReturn(SummaryInformation.DEFAULT_STREAM_NAME).when(entry).getName(); assertTrue(instance.test(entry), "Non-macro streams should be ignored"); BleachTestBase.assertThreatsFound(session, 0); reset(session); doReturn("RandomName").when(entry).getName(); assertTrue(instance.test(entry), "Non-macro streams should be ignored"); BleachTestBase.assertThreatsFound(session, 0); reset(session); }
Example #13
Source File: SummaryInformationSanitiserTest.java From DocBleach with MIT License | 5 votes |
@Test void sanitizeComments() { SummaryInformation si = new SummaryInformation(); // When no comment is set, no error/threat is thrown instance.sanitizeComments(session, si); assertThreatsFound(session, 0); // When a comment is set, it should be removed si.setComments("Hello!"); instance.sanitizeComments(session, si); assertNull(si.getComments()); assertThreatsFound(session, 1); }
Example #14
Source File: SpreadsheetAddInfo.java From openbd-core with GNU General Public License v3.0 | 4 votes |
public cfData execute( cfSession _session, List<cfData> parameters ) throws cfmRunTimeException { if ( parameters.get(0).getDataType() != cfData.CFSTRUCTDATA ) throwException(_session, "parameter must be of type structure"); cfSpreadSheetData spreadsheet = (cfSpreadSheetData)parameters.get(1); cfStructData s = (cfStructData)parameters.get(0); Workbook workbook = spreadsheet.getWorkBook(); /* * XSSFWorkbook */ if ( workbook instanceof XSSFWorkbook ){ XSSFWorkbook xSSFWorkbook = (XSSFWorkbook)workbook; CoreProperties cP = xSSFWorkbook.getProperties().getCoreProperties(); if ( s.containsKey("author") ) cP.setCreator( s.getData("author").getString() ); if ( s.containsKey("category") ) cP.setCategory( s.getData("category").getString() ); if ( s.containsKey("subject") ) cP.setSubjectProperty( s.getData("subject").getString() ); if ( s.containsKey("title") ) cP.setTitle( s.getData("title").getString() ); if ( s.containsKey("revision") ) cP.setRevision( s.getData("revision").getString() ); if ( s.containsKey("description") ) cP.setDescription( s.getData("description").getString() ); }else{ HSSFWorkbook hSSFWorkbook = (HSSFWorkbook)workbook; DocumentSummaryInformation dSummary = hSSFWorkbook.getDocumentSummaryInformation(); if ( dSummary == null ){ hSSFWorkbook.createInformationProperties(); dSummary = hSSFWorkbook.getDocumentSummaryInformation(); } if ( s.containsKey("category") ) dSummary.setCategory( s.getData("category").getString() ); if ( s.containsKey("manager") ) dSummary.setManager( s.getData("manager").getString() ); if ( s.containsKey("company") ) dSummary.setCompany( s.getData("company").getString() ); SummaryInformation sInformation = hSSFWorkbook.getSummaryInformation(); if ( s.containsKey("title") ) sInformation.setTitle( s.getData("title").getString() ); if ( s.containsKey("subject") ) sInformation.setSubject( s.getData("subject").getString() ); if ( s.containsKey("author") ) sInformation.setAuthor( s.getData("author").getString() ); if ( s.containsKey("comments") ) sInformation.setComments( s.getData("comments").getString() ); if ( s.containsKey("keywords") ) sInformation.setKeywords( s.getData("keywords").getString() ); if ( s.containsKey("lastauthor") ) sInformation.setLastAuthor( s.getData("lastauthor").getString() ); } return cfBooleanData.TRUE; }
Example #15
Source File: MetadataExtractor.java From document-management-system with GNU General Public License v2.0 | 4 votes |
/** * Extract metadata from Office Word */ public static OfficeMetadata officeExtractor(InputStream is, String mimeType) throws IOException { POIFSFileSystem fs = new POIFSFileSystem(is); OfficeMetadata md = new OfficeMetadata(); SummaryInformation si = null; if (MimeTypeConfig.MIME_MS_WORD.equals(mimeType)) { si = new WordExtractor(fs).getSummaryInformation(); } else if (MimeTypeConfig.MIME_MS_EXCEL.equals(mimeType)) { si = new ExcelExtractor(fs).getSummaryInformation(); } else if (MimeTypeConfig.MIME_MS_POWERPOINT.equals(mimeType)) { si = new PowerPointExtractor(fs).getSummaryInformation(); } if (si != null) { md.setTitle(si.getTitle()); md.setSubject(si.getSubject()); md.setAuthor(si.getAuthor()); md.setLastAuthor(si.getLastAuthor()); md.setKeywords(si.getKeywords()); md.setComments(si.getComments()); md.setTemplate(si.getTemplate()); md.setRevNumber(si.getRevNumber()); md.setApplicationName(si.getApplicationName()); md.setEditTime(si.getEditTime()); md.setPageCount(si.getPageCount()); md.setWordCount(si.getWordCount()); md.setCharCount(si.getCharCount()); md.setSecurity(si.getSecurity()); Calendar createDateTime = Calendar.getInstance(); createDateTime.setTime(si.getCreateDateTime()); md.setCreateDateTime(createDateTime); Calendar lastSaveDateTime = Calendar.getInstance(); lastSaveDateTime.setTime(si.getLastSaveDateTime()); md.setLastSaveDateTime(lastSaveDateTime); Calendar lastPrinted = Calendar.getInstance(); lastPrinted.setTime(si.getLastPrinted()); md.setLastPrinted(lastPrinted); } log.info("officeExtractor: {}", md); return md; }
Example #16
Source File: MSExcelWriter.java From hadoopoffice with Apache License 2.0 | 4 votes |
/** * * Write metadata into HSSF document * */ private void prepareHSSFMetaData() { HSSFWorkbook currentHSSFWorkbook = (HSSFWorkbook) this.currentWorkbook; SummaryInformation summaryInfo = currentHSSFWorkbook.getSummaryInformation(); if (summaryInfo==null) { currentHSSFWorkbook.createInformationProperties(); summaryInfo = currentHSSFWorkbook.getSummaryInformation(); } SimpleDateFormat formatSDF = new SimpleDateFormat(MSExcelParser.DATE_FORMAT); for (Map.Entry<String,String> entry: this.howc.getMetadata().entrySet()) { // process general properties try { switch(entry.getKey()) { case "applicationname": summaryInfo.setApplicationName(entry.getValue()); break; case "author": summaryInfo.setAuthor(entry.getValue()); break; case "charcount": summaryInfo.setCharCount(Integer.parseInt(entry.getValue())); break; case "comments": summaryInfo.setComments(entry.getValue()); break; case "createdatetime": summaryInfo.setCreateDateTime(formatSDF.parse(entry.getValue())); break; case "edittime": summaryInfo.setEditTime(Long.parseLong(entry.getValue())); break; case "keywords": summaryInfo.setKeywords(entry.getValue()); break; case "lastauthor": summaryInfo.setLastAuthor(entry.getValue()); break; case "lastprinted": summaryInfo.setLastPrinted(formatSDF.parse(entry.getValue())); break; case "lastsavedatetime": summaryInfo.setLastSaveDateTime(formatSDF.parse(entry.getValue())); break; case "pagecount": summaryInfo.setPageCount(Integer.parseInt(entry.getValue())); break; case "revnumber": summaryInfo.setRevNumber(entry.getValue()); break; case "security": summaryInfo.setSecurity(Integer.parseInt(entry.getValue())); break; case "subject": summaryInfo.setSubject(entry.getValue()); break; case "template": summaryInfo.setTemplate(entry.getValue()); break; case "title": summaryInfo.setTitle(entry.getValue()); break; case "wordcount": summaryInfo.setWordCount(Integer.parseInt(entry.getValue())); break; default: LOG.warn("Unknown metadata key: "+entry.getKey()); break; } } catch (ParseException pe) { LOG.error(pe); } } }
Example #17
Source File: SummaryInformationSanitiser.java From DocBleach with MIT License | 4 votes |
protected void sanitizeSummaryInformation(BleachSession session, SummaryInformation dsi) { sanitizeTemplate(session, dsi); sanitizeComments(session, dsi); }
Example #18
Source File: POIDocument.java From lams with GNU General Public License v2.0 | 4 votes |
/** * Writes out the standard Document Information Properties (HPSF) * @param outFS the NPOIFSFileSystem to write the properties into * @param writtenEntries a list of POIFS entries to add the property names too * * @throws IOException if an error when writing to the * {@link NPOIFSFileSystem} occurs */ protected void writeProperties(NPOIFSFileSystem outFS, List<String> writtenEntries) throws IOException { EncryptionInfo ei = getEncryptionInfo(); final boolean encryptProps = (ei != null && ei.isDocPropsEncrypted()); NPOIFSFileSystem fs = (encryptProps) ? new NPOIFSFileSystem() : outFS; SummaryInformation si = getSummaryInformation(); if (si != null) { writePropertySet(SummaryInformation.DEFAULT_STREAM_NAME, si, fs); if(writtenEntries != null) { writtenEntries.add(SummaryInformation.DEFAULT_STREAM_NAME); } } DocumentSummaryInformation dsi = getDocumentSummaryInformation(); if (dsi != null) { writePropertySet(DocumentSummaryInformation.DEFAULT_STREAM_NAME, dsi, fs); if(writtenEntries != null) { writtenEntries.add(DocumentSummaryInformation.DEFAULT_STREAM_NAME); } } if (!encryptProps) { return; } // create empty document summary dsi = PropertySetFactory.newDocumentSummaryInformation(); writePropertySet(DocumentSummaryInformation.DEFAULT_STREAM_NAME, dsi, outFS); // remove summary, if previously available if (outFS.getRoot().hasEntry(SummaryInformation.DEFAULT_STREAM_NAME)) { outFS.getRoot().getEntry(SummaryInformation.DEFAULT_STREAM_NAME).delete(); } Encryptor encGen = ei.getEncryptor(); if (!(encGen instanceof CryptoAPIEncryptor)) { throw new EncryptedDocumentException("Using "+ei.getEncryptionMode()+" encryption. Only CryptoAPI encryption supports encrypted property sets!"); } CryptoAPIEncryptor enc = (CryptoAPIEncryptor)encGen; try { enc.setSummaryEntries(outFS.getRoot(), getEncryptedPropertyStreamName(), fs); } catch (GeneralSecurityException e) { throw new IOException(e); } finally { fs.close(); } }
Example #19
Source File: POIOLE2TextExtractor.java From lams with GNU General Public License v2.0 | 2 votes |
/** * Returns the summary information metadata for the document. * * @return The Summary information for the document or null * if it could not be read for this document. */ public SummaryInformation getSummaryInformation() { return document.getSummaryInformation(); }
Example #20
Source File: EventBasedExcelExtractor.java From lams with GNU General Public License v2.0 | 2 votes |
/** * Would return the summary information metadata for the document, * if we supported it */ public SummaryInformation getSummaryInformation() { throw new IllegalStateException("Metadata extraction not supported in streaming mode, please use ExcelExtractor"); }