Java Code Examples for org.apache.poi.hpsf.SummaryInformation

The following examples show how to use org.apache.poi.hpsf.SummaryInformation. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
public static void setDocProperties(String filename) throws IOException {
    System.out.println("filename = [" + filename + "]");
    FileInputStream fis = new FileInputStream(new File(filename));
    HWPFDocument doc = new HWPFDocument(fis);

    SummaryInformation summaryInformation = doc.getSummaryInformation();
    summaryInformation.setAuthor("张鹏");
    summaryInformation.setLastAuthor("张鹏");
    DocumentSummaryInformation documentSummaryInformation = doc.getDocumentSummaryInformation();
    documentSummaryInformation.setCompany("张鹏");
    documentSummaryInformation.setDocumentVersion("1");

    FileOutputStream fos = new FileOutputStream(new File(filename));
    doc.write(fos);

    fos.close();
    doc.close();
    fis.close();
}
 
Example 2
Source Project: lams   Source File: POIDocument.java    License: GNU General Public License v2.0 6 votes vote down vote up
/**
 * Find, and create objects for, the standard
 *  Document Information Properties (HPSF).
 * If a given property set is missing or corrupt,
 *  it will remain null;
 */
protected void readProperties() {
    if (initialized) {
        return;
    }
    DocumentSummaryInformation dsi = readPropertySet(DocumentSummaryInformation.class, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
    if (dsi != null) {
        dsInf = dsi;
    }
    SummaryInformation si = readPropertySet(SummaryInformation.class, SummaryInformation.DEFAULT_STREAM_NAME);
    if (si != null) {
        sInf = si;
    }

    // Mark the fact that we've now loaded up the properties
    initialized = true;
}
 
Example 3
Source Project: DocBleach   Source File: SummaryInformationSanitiser.java    License: MIT License 6 votes vote down vote up
@Override
public boolean test(Entry entry) {
  String entryName = entry.getName();

  if (!SummaryInformation.DEFAULT_STREAM_NAME.equals(entryName)) {
    return true;
  }

  if (!(entry instanceof DocumentEntry)) {
    return true;
  }

  DocumentEntry dsiEntry = (DocumentEntry) entry;
  sanitizeSummaryInformation(session, dsiEntry);

  return true;
}
 
Example 4
Source Project: DocBleach   Source File: SummaryInformationSanitiser.java    License: MIT License 6 votes vote down vote up
protected void sanitizeSummaryInformation(BleachSession session, DocumentEntry dsiEntry) {
  if (dsiEntry.getSize() <= 0) {
    return;
  }
  try (DocumentInputStream dis = new DocumentInputStream(dsiEntry)) {
    PropertySet ps = new PropertySet(dis);
    // Useful for debugging purposes
    // LOGGER.debug("PropertySet sections: {}", ps.getSections());
    SummaryInformation dsi = new SummaryInformation(ps);

    sanitizeSummaryInformation(session, dsi);
  } catch (NoPropertySetStreamException
      | UnexpectedPropertySetTypeException
      | IOException e) {
    LOGGER.error("An error occured while trying to sanitize the document entry", e);
  }
}
 
Example 5
Source Project: DocBleach   Source File: SummaryInformationSanitiser.java    License: MIT License 6 votes vote down vote up
protected void sanitizeComments(BleachSession session, SummaryInformation dsi) {
  String comments = dsi.getComments();

  if (comments == null || comments.isEmpty()) {
    return;
  }

  LOGGER.trace("Removing the document's Comments (was '{}')", comments);

  dsi.removeComments();

  Threat threat = Threat.builder()
      .type(ThreatType.UNRECOGNIZED_CONTENT)
      .severity(ThreatSeverity.LOW)
      .action(ThreatAction.REMOVE)
      .location("Summary Information - Comment")
      .details("Comment was: '" + comments + "'")
      .build();

  session.recordThreat(threat);
}
 
Example 6
Source Project: DocBleach   Source File: SummaryInformationSanitiserTest.java    License: MIT License 6 votes vote down vote up
@Test
void test1() {
  // Test an invalid stream, should be ignored
  Entry entry = mock(Entry.class);
  doReturn("\005RandomString").when(entry).getName();
  assertTrue(instance.test(entry));
  verify(instance, never()).sanitizeSummaryInformation(eq(session), (DocumentEntry) any());

  // Test a valid stream name, but wrong type (should be ignored)
  reset(entry);
  doReturn(SummaryInformation.DEFAULT_STREAM_NAME).when(entry).getName();
  assertTrue(instance.test(entry));
  verify(instance, never()).sanitizeSummaryInformation(eq(session), (DocumentEntry) any());

  reset(instance, entry);

  // Test a valid SummaryInformation name
  DocumentEntry docEntry = mock(DocumentEntry.class);

  doReturn(SummaryInformation.DEFAULT_STREAM_NAME).when(docEntry).getName();
  doNothing().when(instance).sanitizeSummaryInformation(session, docEntry);
  assertTrue(instance.test(docEntry));
  verify(instance, atLeastOnce()).sanitizeSummaryInformation(session, docEntry);
}
 
Example 7
Source Project: lams   Source File: HPSFPropertiesExtractor.java    License: GNU General Public License v2.0 5 votes vote down vote up
public String getSummaryInformationText() {
    if(document == null) {  // event based extractor does not have a document
        return "";
    }

    SummaryInformation si = document.getSummaryInformation();

    // Just normal properties
    return getPropertiesText(si);
}
 
Example 8
Source Project: lams   Source File: POIDocument.java    License: GNU General Public License v2.0 5 votes vote down vote up
/** 
 * Fetch the Summary Information of the document
 * 
 * @return The Summary information for the document or null
 *      if it could not be read for this document.
 */
public SummaryInformation getSummaryInformation() {
    if(!initialized) {
        readProperties();
    }
    return sInf;
}
 
Example 9
Source Project: lams   Source File: HSSFWorkbook.java    License: GNU General Public License v2.0 5 votes vote down vote up
/** Writes the workbook out to a brand new, empty POIFS */
private void write(NPOIFSFileSystem fs) throws IOException {
    // For tracking what we've written out, used if we're
    //  going to be preserving nodes
    List<String> excepts = new ArrayList<String>(1);

    // Write out the Workbook stream
    fs.createDocument(new ByteArrayInputStream(getBytes()), "Workbook");

    // Write out our HPFS properties, if we have them
    writeProperties(fs, excepts);
    
    if (preserveNodes) {
        // Don't write out the old Workbook, we'll be doing our new one
        // If the file had an "incorrect" name for the workbook stream,
        // don't write the old one as we'll use the correct name shortly
        excepts.addAll(Arrays.asList(WORKBOOK_DIR_ENTRY_NAMES));

        // summary information has been already written via writeProperties and might go in a
        // different stream, if the file is cryptoapi encrypted
        excepts.addAll(Arrays.asList(
            DocumentSummaryInformation.DEFAULT_STREAM_NAME,
            SummaryInformation.DEFAULT_STREAM_NAME,
            getEncryptedPropertyStreamName()
        ));

        // Copy over all the other nodes to our new poifs
        EntryUtils.copyNodes(
                new FilteringDirectoryNode(getDirectory(), excepts)
                , new FilteringDirectoryNode(fs.getRoot(), excepts)
                );

        // YK: preserve StorageClsid, it is important for embedded workbooks,
        // see Bugzilla 47920
        fs.getRoot().setStorageClsid(getDirectory().getStorageClsid());
    }
}
 
Example 10
Source Project: DocBleach   Source File: SummaryInformationSanitiser.java    License: MIT License 5 votes vote down vote up
protected void sanitizeTemplate(BleachSession session, SummaryInformation dsi) {
  String template = dsi.getTemplate();

  if (NORMAL_TEMPLATE.equals(template)) {
    return;
  }

  if (template == null) {
    return;
  }

  LOGGER.trace("Removing the document's template (was '{}')", template);
  dsi.removeTemplate();

  ThreatSeverity severity =
      isExternalTemplate(template) ? ThreatSeverity.HIGH : ThreatSeverity.LOW;

  Threat threat = Threat.builder()
      .type(ThreatType.EXTERNAL_CONTENT)
      .severity(severity)
      .action(ThreatAction.REMOVE)
      .location("Summary Information - Template")
      .details("Template was: '" + template + "'")
      .build();

  session.recordThreat(threat);
}
 
Example 11
Source Project: DocBleach   Source File: MacroRemoverTest.java    License: MIT License 5 votes vote down vote up
@Test
void testKeepsEverythingElse() {
  Entry entry = mock(Entry.class);
  doReturn(SummaryInformation.DEFAULT_STREAM_NAME).when(entry).getName();
  assertTrue(instance.test(entry), "Non-macro streams should be ignored");
  BleachTestBase.assertThreatsFound(session, 0);
  reset(session);

  doReturn("RandomName").when(entry).getName();
  assertTrue(instance.test(entry), "Non-macro streams should be ignored");
  BleachTestBase.assertThreatsFound(session, 0);
  reset(session);
}
 
Example 12
Source Project: DocBleach   Source File: SummaryInformationSanitiserTest.java    License: MIT License 5 votes vote down vote up
@Test
void sanitizeComments() {
  SummaryInformation si = new SummaryInformation();

  // When no comment is set, no error/threat is thrown
  instance.sanitizeComments(session, si);
  assertThreatsFound(session, 0);

  // When a comment is set, it should be removed
  si.setComments("Hello!");
  instance.sanitizeComments(session, si);
  assertNull(si.getComments());
  assertThreatsFound(session, 1);
}
 
Example 13
Source Project: DocBleach   Source File: ObjectRemoverTest.java    License: MIT License 5 votes vote down vote up
@Test
void testKeepsEverythingElse() {
  Entry entry = mock(Entry.class);
  doReturn(SummaryInformation.DEFAULT_STREAM_NAME).when(entry).getName();
  assertTrue(instance.test(entry), "Non-object entries should be ignored");
  BleachTestBase.assertThreatsFound(session, 0);
  reset(session);

  doReturn("RandomName").when(entry).getName();
  assertTrue(instance.test(entry), "Non-object entries should be ignored");
  BleachTestBase.assertThreatsFound(session, 0);
  reset(session);
}
 
Example 14
Source Project: lams   Source File: POIDocument.java    License: GNU General Public License v2.0 4 votes vote down vote up
/**
 * Writes out the standard Document Information Properties (HPSF)
 * @param outFS the NPOIFSFileSystem to write the properties into
 * @param writtenEntries a list of POIFS entries to add the property names too
 * 
 * @throws IOException if an error when writing to the 
 *      {@link NPOIFSFileSystem} occurs
 */
protected void writeProperties(NPOIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
    EncryptionInfo ei = getEncryptionInfo();
    final boolean encryptProps = (ei != null && ei.isDocPropsEncrypted());
    NPOIFSFileSystem fs = (encryptProps) ? new NPOIFSFileSystem() : outFS;
    
    SummaryInformation si = getSummaryInformation();
    if (si != null) {
        writePropertySet(SummaryInformation.DEFAULT_STREAM_NAME, si, fs);
        if(writtenEntries != null) {
            writtenEntries.add(SummaryInformation.DEFAULT_STREAM_NAME);
        }
    }
    DocumentSummaryInformation dsi = getDocumentSummaryInformation();
    if (dsi != null) {
        writePropertySet(DocumentSummaryInformation.DEFAULT_STREAM_NAME, dsi, fs);
        if(writtenEntries != null) {
            writtenEntries.add(DocumentSummaryInformation.DEFAULT_STREAM_NAME);
        }
    }

    if (!encryptProps) {
        return;
    }

    // create empty document summary
    dsi = PropertySetFactory.newDocumentSummaryInformation();
    writePropertySet(DocumentSummaryInformation.DEFAULT_STREAM_NAME, dsi, outFS);
    // remove summary, if previously available
    if (outFS.getRoot().hasEntry(SummaryInformation.DEFAULT_STREAM_NAME)) {
        outFS.getRoot().getEntry(SummaryInformation.DEFAULT_STREAM_NAME).delete();
    }
    Encryptor encGen = ei.getEncryptor();
    if (!(encGen instanceof CryptoAPIEncryptor)) {
        throw new EncryptedDocumentException("Using "+ei.getEncryptionMode()+" encryption. Only CryptoAPI encryption supports encrypted property sets!");
    }
    CryptoAPIEncryptor enc = (CryptoAPIEncryptor)encGen;
    try {
        enc.setSummaryEntries(outFS.getRoot(), getEncryptedPropertyStreamName(), fs);
    } catch (GeneralSecurityException e) {
        throw new IOException(e);
    } finally {
        fs.close();
    }
}
 
Example 15
Source Project: DocBleach   Source File: SummaryInformationSanitiser.java    License: MIT License 4 votes vote down vote up
protected void sanitizeSummaryInformation(BleachSession session, SummaryInformation dsi) {
  sanitizeTemplate(session, dsi);
  sanitizeComments(session, dsi);
}
 
Example 16
Source Project: hadoopoffice   Source File: MSExcelWriter.java    License: Apache License 2.0 4 votes vote down vote up
/**
*
* Write metadata into HSSF document
*
*/
private void prepareHSSFMetaData() {
	HSSFWorkbook currentHSSFWorkbook = (HSSFWorkbook) this.currentWorkbook;
	SummaryInformation summaryInfo = currentHSSFWorkbook.getSummaryInformation(); 
	if (summaryInfo==null) {
		currentHSSFWorkbook.createInformationProperties();
		 summaryInfo = currentHSSFWorkbook.getSummaryInformation(); 
	}
	SimpleDateFormat formatSDF = new SimpleDateFormat(MSExcelParser.DATE_FORMAT); 
	for (Map.Entry<String,String> entry: this.howc.getMetadata().entrySet()) {
		// process general properties
		try {
		switch(entry.getKey()) {
			case "applicationname": 
				summaryInfo.setApplicationName(entry.getValue()); 
				break;
			case "author": 
				summaryInfo.setAuthor(entry.getValue()); 
				break;
			case "charcount": 
				summaryInfo.setCharCount(Integer.parseInt(entry.getValue())); 
				break;
			case "comments": 
				summaryInfo.setComments(entry.getValue()); 
				break;
			case "createdatetime": 
				summaryInfo.setCreateDateTime(formatSDF.parse(entry.getValue())); 
				break;
			case "edittime": 
				summaryInfo.setEditTime(Long.parseLong(entry.getValue())); 
				break;
			case "keywords": 
				summaryInfo.setKeywords(entry.getValue()); 
				break;
			case "lastauthor": 
				summaryInfo.setLastAuthor(entry.getValue()); 
				break;
			case "lastprinted": 
				summaryInfo.setLastPrinted(formatSDF.parse(entry.getValue())); 
				break;	
			case "lastsavedatetime": 
				summaryInfo.setLastSaveDateTime(formatSDF.parse(entry.getValue())); 
				break;
			case "pagecount": 
				summaryInfo.setPageCount(Integer.parseInt(entry.getValue())); 
				break;
			case "revnumber": 
				summaryInfo.setRevNumber(entry.getValue()); 
				break;
			case "security": 
				summaryInfo.setSecurity(Integer.parseInt(entry.getValue())); 
				break;
			case "subject": 
				summaryInfo.setSubject(entry.getValue()); 
				break;
			case "template": 
				summaryInfo.setTemplate(entry.getValue()); 
				break;	
			case "title": 
				summaryInfo.setTitle(entry.getValue()); 
				break;
			case "wordcount": 
				summaryInfo.setWordCount(Integer.parseInt(entry.getValue())); 
				break;
			default: 
				LOG.warn("Unknown metadata key: "+entry.getKey()); 
				break;	
		} 
		} catch (ParseException pe) {
			LOG.error(pe);
		}

	}
}
 
Example 17
/**
 * Extract metadata from Office Word
 */
public static OfficeMetadata officeExtractor(InputStream is, String mimeType) throws IOException {
	POIFSFileSystem fs = new POIFSFileSystem(is);
	OfficeMetadata md = new OfficeMetadata();
	SummaryInformation si = null;

	if (MimeTypeConfig.MIME_MS_WORD.equals(mimeType)) {
		si = new WordExtractor(fs).getSummaryInformation();
	} else if (MimeTypeConfig.MIME_MS_EXCEL.equals(mimeType)) {
		si = new ExcelExtractor(fs).getSummaryInformation();
	} else if (MimeTypeConfig.MIME_MS_POWERPOINT.equals(mimeType)) {
		si = new PowerPointExtractor(fs).getSummaryInformation();
	}

	if (si != null) {
		md.setTitle(si.getTitle());
		md.setSubject(si.getSubject());
		md.setAuthor(si.getAuthor());
		md.setLastAuthor(si.getLastAuthor());
		md.setKeywords(si.getKeywords());
		md.setComments(si.getComments());
		md.setTemplate(si.getTemplate());
		md.setRevNumber(si.getRevNumber());
		md.setApplicationName(si.getApplicationName());
		md.setEditTime(si.getEditTime());
		md.setPageCount(si.getPageCount());
		md.setWordCount(si.getWordCount());
		md.setCharCount(si.getCharCount());
		md.setSecurity(si.getSecurity());

		Calendar createDateTime = Calendar.getInstance();
		createDateTime.setTime(si.getCreateDateTime());
		md.setCreateDateTime(createDateTime);

		Calendar lastSaveDateTime = Calendar.getInstance();
		lastSaveDateTime.setTime(si.getLastSaveDateTime());
		md.setLastSaveDateTime(lastSaveDateTime);

		Calendar lastPrinted = Calendar.getInstance();
		lastPrinted.setTime(si.getLastPrinted());
		md.setLastPrinted(lastPrinted);
	}

	log.info("officeExtractor: {}", md);
	return md;
}
 
Example 18
public cfData execute( cfSession _session, List<cfData> parameters ) throws cfmRunTimeException {
	if ( parameters.get(0).getDataType() != cfData.CFSTRUCTDATA )
		throwException(_session, "parameter must be of type structure");
	
	cfSpreadSheetData	spreadsheet	= (cfSpreadSheetData)parameters.get(1);
	cfStructData	s	= (cfStructData)parameters.get(0);
	
	Workbook	workbook	= spreadsheet.getWorkBook();
	
	/*
	 * XSSFWorkbook
	 */
	if ( workbook instanceof XSSFWorkbook ){
		XSSFWorkbook xSSFWorkbook = (XSSFWorkbook)workbook;
		
		CoreProperties cP = xSSFWorkbook.getProperties().getCoreProperties();
		
		if ( s.containsKey("author") )
			cP.setCreator( s.getData("author").getString() );
		if ( s.containsKey("category") )
			cP.setCategory( s.getData("category").getString() );
		if ( s.containsKey("subject") )
			cP.setSubjectProperty( s.getData("subject").getString() );
		if ( s.containsKey("title") )
			cP.setTitle( s.getData("title").getString() );
		if ( s.containsKey("revision") )
			cP.setRevision( s.getData("revision").getString() );
		if ( s.containsKey("description") )
			cP.setDescription( s.getData("description").getString() );
		 		
	}else{
		HSSFWorkbook hSSFWorkbook = (HSSFWorkbook)workbook;
		DocumentSummaryInformation dSummary = hSSFWorkbook.getDocumentSummaryInformation();
		
		if ( dSummary == null ){
			hSSFWorkbook.createInformationProperties();
			dSummary = hSSFWorkbook.getDocumentSummaryInformation();
		}
		
		if ( s.containsKey("category") )
			dSummary.setCategory( s.getData("category").getString() );
		if ( s.containsKey("manager") )
			dSummary.setManager( s.getData("manager").getString() );
		if ( s.containsKey("company") )
			dSummary.setCompany( s.getData("company").getString() );

		SummaryInformation sInformation = hSSFWorkbook.getSummaryInformation();
		
		if ( s.containsKey("title") )
			sInformation.setTitle( s.getData("title").getString() );
		if ( s.containsKey("subject") )
			sInformation.setSubject( s.getData("subject").getString() );
		if ( s.containsKey("author") )
			sInformation.setAuthor( s.getData("author").getString() );
		if ( s.containsKey("comments") )
			sInformation.setComments( s.getData("comments").getString() );
		if ( s.containsKey("keywords") )
			sInformation.setKeywords( s.getData("keywords").getString() );
		if ( s.containsKey("lastauthor") )
			sInformation.setLastAuthor( s.getData("lastauthor").getString() );
	}
	
	return cfBooleanData.TRUE;
}
 
Example 19
/**
 * Would return the summary information metadata for the document,
 *  if we supported it
 */
public SummaryInformation getSummaryInformation() {
    throw new IllegalStateException("Metadata extraction not supported in streaming mode, please use ExcelExtractor");
}
 
Example 20
Source Project: lams   Source File: POIOLE2TextExtractor.java    License: GNU General Public License v2.0 2 votes vote down vote up
/**
 * Returns the summary information metadata for the document.
 *
    * @return The Summary information for the document or null
    *      if it could not be read for this document.
 */
public SummaryInformation getSummaryInformation() {
	return document.getSummaryInformation();
}