org.apache.tika.metadata.Metadata Java Exaples

Source File: DetectMimeTypeBuilder.java From kite with Apache License 2.0

7 votes

/**
 * Detects the content type of the given input event. Returns
 * <code>application/octet-stream</code> if the type of the event can not be
 * detected.
 * <p>
 * It is legal for the event headers or body to be empty. The detector may
 * read bytes from the start of the body stream to help in type detection.
 * 
 * @return detected media type, or <code>application/octet-stream</code>
 */
private String getMediaType(InputStream in, Metadata metadata, boolean excludeParameters) {
  MediaType mediaType;
  try {
    mediaType = getDetector().detect(in, metadata);
  } catch (IOException e) {
    throw new MorphlineRuntimeException(e);
  }
  String mediaTypeStr = mediaType.toString();
  if (excludeParameters) {
    int i = mediaTypeStr.indexOf(';');
    if (i >= 0) {
      mediaTypeStr = mediaTypeStr.substring(0, i);
    }
  }
  return mediaTypeStr;
}

Source File: StoredFile.java From openmeetings with Apache License 2.0

6 votes

private void init(String inName, String inExt, InputStream is) {
	if (Strings.isEmpty(inExt)) {
		int idx = inName.lastIndexOf('.');
		name = idx < 0 ? inName : inName.substring(0, idx);
		ext = getFileExt(inName);
	} else {
		name = inName;
		ext = inExt.toLowerCase(Locale.ROOT);
	}
	Metadata md = new Metadata();
	md.add(RESOURCE_NAME_KEY, String.format(FILE_NAME_FMT, name, ext));
	try {
		mime = tika.getDetector().detect(is == null ? null : TikaInputStream.get(is), md);
	} catch (Throwable e) {
		mime = null;
		log.error("Unexpected exception while detecting mime type", e);
	}
}

Source File: ContentExtractor.java From jate with GNU Lesser General Public License v3.0

6 votes

private String parseTXTToString(InputStream stream, Metadata metadata) throws IOException, TikaException {
	WriteOutContentHandler handler = new WriteOutContentHandler(maxStringLength);
	try {
		ParseContext context = new ParseContext();
		context.set(Parser.class, txtParser);
		txtParser.parse(stream, new BodyContentHandler(handler), metadata, context);
	} catch (SAXException e) {
		if (!handler.isWriteLimitReached(e)) {
			// This should never happen with BodyContentHandler...
			throw new TikaException("Unexpected SAX processing failure", e);
		}
	} finally {
		stream.close();
	}
	return handler.toString();
}

Source File: TikaDocumentItemProcessor.java From CogStack-Pipeline with Apache License 2.0

6 votes

private void extractPageCountMetadata(Document doc, Set<String> metaKeys,
                                      Metadata metadata) {
    if (metaKeys.contains("xmpTPg:NPages")) {
        doc.getAssociativeArray().put("X-TL-PAGE-COUNT",
            metadata.get("xmpTPg:NPages"));

    } else if (metaKeys.contains("Page-Count")) {
        doc.getAssociativeArray().put("X-TL-PAGE-COUNT",
            metadata.get("Page-Count"));

    } else if (metaKeys.contains("meta:page-count")) {
        doc.getAssociativeArray().put("X-TL-PAGE-COUNT",
            metadata.get("meta:page-count"));

    } else {
        doc.getAssociativeArray().put("X-TL-PAGE-COUNT",
            "TL_PAGE_COUNT_UNKNOWN");
    }
}

Source File: CSVDetector.java From data-prep with Apache License 2.0

6 votes

/**
 * A private utility class used to detect format.
 *
 * @param metadata the specified TIKA {@link Metadata}
 * @param inputStream the specified input stream
 * @return either null or an CSV format
 * @throws IOException
 */
private Format detectText(Metadata metadata, InputStream inputStream) throws IOException {
    MediaType mediaType = mimeTypes.detect(inputStream, metadata);
    if (mediaType != null) {
        String mediaTypeName = mediaType.toString();

        if (StringUtils.startsWith(mediaTypeName, TEXT_PLAIN)) {
            Charset charset = null;
            try {
                charset = encodingDetector.detect(inputStream, metadata);
            } catch (IOException e) {
                LOGGER.debug("Unable to detect the encoding for a data set in CSV format", e);
            }
            if (charset != null) {
                return new Format(csvFormatFamily, charset.name());
            } else {
                return new Format(csvFormatFamily, FormatUtils.DEFAULT_ENCODING);
            }
        }
    }
    return null;
}

Source File: ExtractorTest.java From extract with MIT License

6 votes

@Test
public void testIgnoreEmbeds() throws Throwable {
	final Extractor extractor = new Extractor();

	extractor.setEmbedHandling(Extractor.EmbedHandling.IGNORE);
	Assert.assertEquals(extractor.getEmbedHandling(), Extractor.EmbedHandling.IGNORE);

	TikaDocument tikaDocument = extractor.extract(Paths.get(getClass().getResource("/documents/ocr/embedded.pdf").getPath()));

	String text;

	try (final Reader reader = tikaDocument.getReader()) {
		text = Spewer.toString(reader);
	}

	Assert.assertEquals("application/pdf", tikaDocument.getMetadata().get(Metadata.CONTENT_TYPE));
	Assert.assertEquals("\n\n\n\n", text);
}

Source File: TikaFormat.java From gate-core with GNU Lesser General Public License v3.0

6 votes

private void setTikaFeature(Metadata metadata, Property property, FeatureMap fmap) {
  String value = metadata.get(property);
  if (value == null) {
    return;
  }

  value = value.trim();
  if (value.length() == 0) {
    return;
  }
  String key = property.getName().toUpperCase();
  if (fmap.containsKey(key)) {
    fmap.put("TIKA_" + key, value);
  }
  else {
    fmap.put(key, value);
    fmap.put("TIKA_" + key, value);
  }		
}

Source File: ExtractorTest.java From extract with MIT License

6 votes

@Test
public void testGarbage() throws Throwable {
	final Extractor extractor = new Extractor();

	TikaDocument tikaDocument = extractor.extract(Paths.get(getClass().getResource("/documents/garbage.bin").getPath()));


	thrown.expect(IOException.class);
	thrown.expectMessage("");
	thrown.expectCause(new CauseMatcher(TikaException.class, "Parse error"));

	final int read;

	try (final Reader reader = tikaDocument.getReader()) {
		read = reader.read();
	} catch (IOException e) {
		Assert.assertEquals("application/octet-stream", tikaDocument.getMetadata().get(Metadata.CONTENT_TYPE));
		throw e;
	}

	Assert.fail(String.format("Read \"%d\" while expecting exception.", read));
}

Source File: NodeTika.java From node-tika with MIT License

6 votes

public static String extractMeta(String uri, String contentType) throws Exception {
	final AutoDetectParser parser = createParser();
	final Metadata metadata = new Metadata();

	fillMetadata(parser, metadata, contentType, uri);

	final TikaInputStream inputStream = createInputStream(uri, metadata);

	parser.parse(inputStream, new DefaultHandler(), metadata);

	Map meta = new HashMap();
	for (String name : metadata.names()) {
		String[] values = metadata.getValues(name);
		meta.put(name, values);
	}

	inputStream.close();

	return new Gson().toJson(meta);
}

Source File: ContentMetadataExtracterTagMappingTest.java From alfresco-repository with GNU Lesser General Public License v3.0

6 votes

@SuppressWarnings("unchecked")
public Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
{
    Map<String, Serializable> rawMap = super.extractRaw(reader);
    
    // Add some test keywords to those actually extracted from the file including a nodeRef
    List<String> keywords = new ArrayList<String>(Arrays.asList(
            new String[] { existingTagNodeRef, TAG_2, TAG_3, TAG_NONEXISTENT_NODEREF }));
    Serializable extractedKeywords = rawMap.get(Metadata.KEYWORDS);
    if (extractedKeywords != null && extractedKeywords instanceof String)
    {
        keywords.add((String) extractedKeywords);
    }
    else if (extractedKeywords != null && extractedKeywords instanceof Collection<?>)
    {
        keywords.addAll((Collection<? extends String>) extractedKeywords);
    }
    putRawValue(Metadata.KEYWORDS, (Serializable) keywords, rawMap);
    return rawMap;
}

Source File: TikaAutoMetadataExtracterTest.java From alfresco-repository with GNU Lesser General Public License v3.0

6 votes

@Override
public void setUp() throws Exception
{
    super.setUp();
    
    TikaConfig config = (TikaConfig)ctx.getBean("tikaConfig");
    extracter = new TikaAutoMetadataExtracter(config);
    extracter.setDictionaryService(dictionaryService);
    extracter.register();
    
    // Attach some extra mappings, using the Tika
    //  metadata keys namespace
    // These will be tested later
    HashMap<String, Set<QName>> newMap = new HashMap<String, Set<QName>>(
          extracter.getMapping()
    );
    
    Set<QName> tlaSet = new HashSet<QName>();
    tlaSet.add(TIKA_MIMETYPE_TEST_PROPERTY);
    newMap.put( Metadata.CONTENT_TYPE, tlaSet );
    
    extracter.setMapping(newMap);
}

Source File: ReplayCrawl.java From ache with Apache License 2.0

6 votes

private Metadata createHeadersMetadata(Page page) {
    Map<String, List<String>> headers = page.getResponseHeaders();
    Metadata metadata = new Metadata();
    for (Entry<String, List<String>> header : headers.entrySet()) {
        for (String value : header.getValue()) {
            metadata.set(header.getKey(), value);
        }
    }
    return metadata;
}

Source File: TikaAutoMetadataExtracter.java From alfresco-repository with GNU Lesser General Public License v3.0

6 votes

/**
 * Because some editors use JPEG_IMAGE_HEIGHT_TAG when
 * saving JPEG images , a more reliable source for
 * image size are the values provided by Tika
 * and not the exif/tiff metadata read from the file
 * This will override the tiff:Image size 
 * which gets embedded into the alfresco node properties
 * for jpeg files that contain such exif information
 */
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
     Map<String, Serializable> properties, Map<String,String> headers) 
{
    if(MimetypeMap.MIMETYPE_IMAGE_JPEG.equals(metadata.get(Metadata.CONTENT_TYPE)))
    {
        //check if the image has exif information
        if(metadata.get(EXIF_IMAGE_WIDTH_TAG) != null
                && metadata.get(EXIF_IMAGE_HEIGHT_TAG) != null
                && metadata.get(COMPRESSION_TAG) != null)
        {
            //replace the exif size properties that will be embedded in the node with
            //the guessed dimensions from Tika
            putRawValue(TIFF.IMAGE_LENGTH.getName(), extractSize(metadata.get(EXIF_IMAGE_HEIGHT_TAG)), properties);
            putRawValue(TIFF.IMAGE_WIDTH.getName(), extractSize(metadata.get(EXIF_IMAGE_WIDTH_TAG)), properties);
            putRawValue(JPEG_IMAGE_HEIGHT_TAG, metadata.get(EXIF_IMAGE_HEIGHT_TAG), properties);
            putRawValue(JPEG_IMAGE_WIDTH_TAG, metadata.get(EXIF_IMAGE_WIDTH_TAG), properties);
        }
    }
    return properties;
}

Source File: TikaPoweredMetadataExtracter.java From alfresco-repository with GNU Lesser General Public License v3.0

6 votes

private String getMetadataValue(Metadata metadata, String key)
{
    if (metadata.isMultiValued(key))
    {
        String[] parts = metadata.getValues(key);
        
        // use Set to prevent duplicates
        Set<String> value = new LinkedHashSet<String>(parts.length);
        
        for (int i = 0; i < parts.length; i++)
        {
            value.add(parts[i]);
        }
        
        String valueStr = value.toString();
        
        // remove leading/trailing braces []
        return valueStr.substring(1, valueStr.length() - 1);
    }
    else
    {
        return metadata.get(key);
    }
}

Source File: NodeTika.java From node-tika with MIT License

6 votes

public static String detectContentType(String uri) throws FileNotFoundException, IOException, TikaException {
	final Detector detector = config.getDetector();
	final TikaInputStream inputStream = createInputStream(uri);
	final Metadata metadata = new Metadata();

	// Set the file name. This provides some level of type-hinting.
	metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());

	// Detect the content type.
	String contentType = detector.detect(inputStream, metadata).toString();

	inputStream.close();

	// Return the default content-type if undetermined.
	if (contentType == null || contentType.isEmpty()) {
		return MediaType.OCTET_STREAM.toString();
	}

	return contentType;
}

Source File: Page.java From ache with Apache License 2.0

6 votes

private void parseResponseHeaders(Metadata headerAsMetadata) {
    Map<String, List<String>> responseHeaders = new HashMap<>();
    String[] names = headerAsMetadata.names();
    if(names != null && names.length > 0) {
        for(String name : names) {
            List<String> values = Arrays.asList(headerAsMetadata.getValues(name));
            if(values.isEmpty()) {
                continue;
            }
            responseHeaders.put(name, values);
            if("content-type".compareToIgnoreCase(name) == 0) {
                this.contentType = values.get(0);
            }
        }
    }
    this.responseHeaders = responseHeaders;
}

Source File: UniversalEncodingDetector.java From onedev with MIT License

6 votes

public static Charset detect(InputStream input) throws IOException {
    input.mark(LOOKAHEAD);
    try {
        UniversalEncodingListener listener =
                new UniversalEncodingListener(new Metadata());

        byte[] b = new byte[BUFSIZE];
        int n = 0;
        int m = input.read(b);
        while (m != -1 && n < LOOKAHEAD && !listener.isDone()) {
            n += m;
            listener.handleData(b, 0, m);
            m = input.read(b, 0, Math.min(b.length, LOOKAHEAD - n));
        }

        return listener.dataEnd();
    } catch (IOException e) {
        throw e;
    } finally {
        input.reset();
    }
}

Source File: TikaAudioMetadataExtracter.java From alfresco-repository with GNU Lesser General Public License v3.0

6 votes

/**
 * Generate the description
 * 
 * @param metadata     the metadata extracted from the file
 * @return          the description
 */
@SuppressWarnings("deprecation")
private String generateDescription(Metadata metadata)
{
    StringBuilder result = new StringBuilder();
    if (metadata.get(Metadata.TITLE) != null)
    {
        result.append(metadata.get(Metadata.TITLE));
        if (metadata.get(XMPDM.ALBUM) != null)
        {
           result
            .append(" - ")
            .append(metadata.get(XMPDM.ALBUM));
        }
        if (metadata.get(XMPDM.ARTIST) != null)
        {
           result
            .append(" (")
            .append(metadata.get(XMPDM.ARTIST))
            .append(")");
        }
    }
    
    return result.toString();
}

Source File: TikaAudioMetadataExtracter.java From alfresco-repository with GNU Lesser General Public License v3.0

6 votes

/**
 * Generates the release date
 */
private Date generateReleaseDate(Metadata metadata)
{
   String date = metadata.get(XMPDM.RELEASE_DATE);
   if(date == null || date.length() == 0)
   {
      return null;
   }
      
   // Is it just a year?
   if(date.matches("\\d\\d\\d\\d"))
   {
      // Just a year, we need a full date
      // Go for the 1st of the 1st
      Calendar c = Calendar.getInstance();
      c.set(
            Integer.parseInt(date), Calendar.JANUARY, 1,
            0, 0, 0
      );
      c.set(Calendar.MILLISECOND, 0);
      return c.getTime();
   }
   
   // Treat as a normal date
   return makeDate(date);
}

Source File: TikaAudioMetadataExtracter.java From alfresco-repository with GNU Lesser General Public License v3.0

6 votes

@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
     Map<String, Serializable> properties, Map<String,String> headers) 
{
   // Most things can go with the default Tika -> Alfresco Mapping
   // Handle the few special cases here
   
   // The description is special
   putRawValue(KEY_DESCRIPTION, generateDescription(metadata), properties);
   
   // The release date can be fiddly
   Date releaseDate = generateReleaseDate(metadata);
   putRawValue(KEY_CREATED, releaseDate, properties);
   putRawValue(XMPDM.RELEASE_DATE.getName(), releaseDate, properties);
   
   // TODO Get the Lyrics from the content
   //putRawValue(KEY_LYRICS, getLyrics(), properties);
   
   // All done
   return properties;
}

Source File: TikaLuceneContentExtractor.java From cxf with Apache License 2.0

5 votes

private Document extractAll(final InputStream in,
                            LuceneDocumentMetadata documentMetadata,
                            boolean extractContent,
                            boolean extractMetadata) {

    TikaContent content =
        extractor.extract(in, extractContent ? new ToTextContentHandler() : null);

    if (content == null) {
        return null;
    }
    final Document document = new Document();

    if (documentMetadata == null) {
        documentMetadata = defaultDocumentMetadata;
    }
    if (content.getContent() != null) {
        document.add(getContentField(documentMetadata, content.getContent()));
    }

    if (extractMetadata) {
        Metadata metadata = content.getMetadata();
        for (final String property: metadata.names()) {
            addField(document, documentMetadata, property, metadata.get(property));
        }
    }

    if (!StringUtils.isEmpty(documentMetadata.getSource())) {
        document.add(new StringField(documentMetadata.getSourceFieldName(),
            documentMetadata.getSource(), Store.YES));
    }

    return document;

}

Source File: FileParser.java From scava with Eclipse Public License 2.0

5 votes

/**
 * 
 * @param file
 * @return Null if the file is not supported
 * @throws Exception 
 */
public static FileContent extractText(File file) throws Exception
{
	FileInputStream fis = fileToInputStream(file);
	BufferedInputStream bif = new BufferedInputStream(fis);
	Metadata metadata = new Metadata();
	metadata.add(Metadata.RESOURCE_NAME_KEY, file.getName());
	FileContent fileContent = extractText(bif, metadata);
	bif.close();
	fis.close();
	return fileContent;
}

Source File: OpenDocumentMetadataExtracter.java From alfresco-repository with GNU Lesser General Public License v3.0

5 votes

@SuppressWarnings("deprecation")
    @Override
    protected Map<String, Serializable> extractSpecific(Metadata metadata,
         Map<String, Serializable> properties, Map<String, String> headers) 
    {
       putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(Metadata.CREATION_DATE)), properties);
       putRawValue(KEY_CREATOR, metadata.get(Metadata.CREATOR), properties);
       putRawValue(KEY_DATE, getDateOrNull(metadata.get(Metadata.DATE)), properties);
       putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.DESCRIPTION), properties);
       putRawValue(KEY_GENERATOR, metadata.get("generator"), properties);
       putRawValue(KEY_INITIAL_CREATOR, metadata.get("initial-creator"), properties);
       putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties);
       putRawValue(KEY_LANGUAGE, metadata.get(Metadata.LANGUAGE), properties);
//     putRawValue(KEY_PRINT_DATE, getDateOrNull(metadata.get(Metadata.)), rawProperties);
//     putRawValue(KEY_PRINTED_BY, metadata.get(Metadata.), rawProperties);
           
       // Handle user-defined properties dynamically
       Map<String, Set<QName>> mapping = super.getMapping();
       for (String key : mapping.keySet())
       {
           if (metadata.get(CUSTOM_PREFIX + key) != null)
           {
                putRawValue(key, metadata.get(CUSTOM_PREFIX + key), properties);
           }
       }
       
       return properties;
    }

Source File: ParsingReader.java From extract with MIT License

5 votes

/**
 * Creates a reader for the content of the given binary stream
 * with the given document metadata. The given parser is used for the
 * parsing task that is run with the given executor.
 *
 * The created reader will be responsible for closing the given stream.
 * The stream and any associated resources will be closed at or before
 * the time when the {@link #close()} method is called on this reader.
 *
 * @param parser parser instance
 * @param input binary stream
 * @param metadata document metadata
 * @param context parsing context
 * @throws IOException if the document can not be parsed
 */
public ParsingReader(final Parser parser, final InputStream input, final Metadata metadata, final ParseContext
		context, final Function<Writer, ContentHandler> handler) throws IOException {
	final PipedReader pipedReader = new PipedReader();

	this.parser = parser;
	reader = new BufferedReader(pipedReader);

	try {
		writer = new PipedWriter(pipedReader);
	} catch (IOException e) {
		throw new IllegalStateException(e); // Should never happen.
	}

	this.input = input;
	this.metadata = metadata;
	this.context = context;

	// Generate the handler.
	this.handler = handler.apply(writer);

	parse();
	
	// TIKA-203: Buffer first character to force metadata extraction.
	reader.mark(1);

	//noinspection ResultOfMethodCallIgnored
	reader.read();
	reader.reset();
}

Source File: EmbedSpawner.java From extract with MIT License

5 votes

private void writeEmbed(final TikaInputStream tis, final EmbeddedTikaDocument embed, final String name) throws IOException {
	final Path destination = outputPath.resolve(embed.getHash());
	final Path source;

	final Metadata metadata = embed.getMetadata();
	final Object container = tis.getOpenContainer();

	// If the input is a container, write it to a temporary file so that it can then be copied atomically.
	// This happens with, for example, an Outlook Message that is an attachment of another Outlook Message.
	if (container instanceof DirectoryEntry) {
		try (final TemporaryResources tmp = new TemporaryResources();
		     final POIFSFileSystem fs = new POIFSFileSystem()) {
			source = tmp.createTempFile();
			saveEntries((DirectoryEntry) container, fs.getRoot());

			try (final OutputStream output = Files.newOutputStream(source)) {
				fs.writeFilesystem(output);
			}
		}
	} else {
		source = tis.getPath();
	}

	// Set the content-length as it isn't (always?) set by Tika for embeds.
	if (null == metadata.get(Metadata.CONTENT_LENGTH)) {
		metadata.set(Metadata.CONTENT_LENGTH, Long.toString(Files.size(source)));
	}

	// To prevent massive duplication and because the disk is only a storage for underlying data, save using the
	// straight hash as a filename.
	try {
		Files.copy(source, destination);
	} catch (final FileAlreadyExistsException e) {
		if (Files.size(source) != Files.size(destination)) {
			Files.copy(source, destination, StandardCopyOption.REPLACE_EXISTING);
		} else {
			logger.info("Temporary file for document \"{}\" in \"{}\" already exists.", name, root);
		}
	}
}

Source File: CachingTesseractOCRParser.java From extract with MIT License

5 votes

private void cachedParse(final InputStream in, final ContentHandler handler, final Metadata metadata,
                         final ParseContext context, TesseractOCRConfig config, final boolean inline)
		throws IOException, SAXException, TikaException {
	try (final TikaInputStream tis = TikaInputStream.get(in)) {
		cachedParse(tis, handler, metadata, context, config, inline);
	} catch (final InterruptedException e) {
		throw new TikaException("Interrupted.", e);
	}
}

Source File: ParseResultTest.java From beam with Apache License 2.0

5 votes

static Metadata getMetadata() {
  Metadata m = new Metadata();
  m.add("Author", "BeamTikaUser");
  m.add("Author", "BeamTikaUser2");
  m.add("Date", "2017-09-01");
  return m;
}

Source File: MediaTypeDisablingDocumentSelector.java From alfresco-repository with GNU Lesser General Public License v3.0

5 votes

@Override
public boolean select(Metadata metadata)
{
    String contentType = metadata.get(Metadata.CONTENT_TYPE);
    if (contentType == null || contentType.equals("") || disabledMediaTypes == null)
    {
        return true;
    }
    return !disabledMediaTypes.contains(contentType);
}

Source File: AttachAttribute.java From entando-components with GNU Lesser General Public License v3.0

5 votes

@Override
public String getIndexeableFieldValue() {
	StringBuilder buffer = new StringBuilder();
	if (null != super.getIndexeableFieldValue()) {
		buffer.append(super.getIndexeableFieldValue());
	}
	String extraValue = null;
	ResourceInterface resource = this.getResource();
	if (resource != null) {
		InputStream is = ((AttachResource) resource).getResourceStream();
		if (null != is) {
			AutoDetectParser parser = new AutoDetectParser();
			BodyContentHandler handler = new BodyContentHandler(-1);
			Metadata metadata = new Metadata();
			try {
				parser.parse(is, handler, metadata);
				extraValue = handler.toString();
			} catch (Throwable t) {
				_logger.error("Error while processing the parsing", t);
			} finally {
				try {
					is.close();
				} catch (IOException ex) {
					_logger.error("Error closing stream", ex);
				}
			}
		}
	}
	if (null != extraValue) {
		buffer.append(" ").append(extraValue);
	}
	return buffer.toString();
}

Source File: NodeTika.java From node-tika with MIT License

5 votes

private static void fillMetadata(Metadata metadata, String contentType, String uri) {

		// Set the file name.
		if (uri != null) {
			metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());
		}

		// Normalise the content-type.
		contentType = normalizeContentType(contentType);

		// Set the content-type.
		if (contentType != null) {
			metadata.add(HttpHeaders.CONTENT_TYPE, contentType);
		}
	}

org.apache.tika.metadata.Metadata Java Examples