Java Code Examples for org.apache.tika.mime.MediaType#parse()

The following examples show how to use org.apache.tika.mime.MediaType#parse() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HTMLRenderingEngine.java    From alfresco-repository with GNU Lesser General Public License v3.0 6 votes vote down vote up
@Override
protected void render(RenderingContext context)
{
    ContentReader contentReader = context.makeContentReader();
    String sourceMimeType = contentReader.getMimetype();
    
    // Check that Tika supports the supplied file
    AutoDetectParser p = new AutoDetectParser(tikaConfig);
    MediaType sourceMediaType = MediaType.parse(sourceMimeType);
    if(! p.getParsers().containsKey(sourceMediaType))
    {
       throw new RenditionServiceException(
             "Source mime type of " + sourceMimeType + 
             " is not supported by Tika for HTML conversions"
       );
    }
    
    // Make the HTML Version using Tika
    // This will also extract out any images as found
    generateHTML(p, context);
}
 
Example 2
Source File: ContentDetector.java    From onedev with MIT License 5 votes vote down vote up
public static MediaType detectMediaType(InputStream contentStream, @Nullable String fileName) {
	try {
		return MediaType.parse(tika.detect(contentStream, fileName));
	} catch (IOException e) {
		throw new RuntimeException(e);
	}
}
 
Example 3
Source File: UniversalEncodingListener.java    From onedev with MIT License 5 votes vote down vote up
public UniversalEncodingListener(Metadata metadata) {
    MediaType type = MediaType.parse(metadata.get(Metadata.CONTENT_TYPE));
    if (type != null) {
        hint = type.getParameters().get("charset");
    }
    if (hint == null) {
        hint = metadata.get(Metadata.CONTENT_ENCODING);
    }
}
 
Example 4
Source File: MimetypeMap.java    From alfresco-data-model with GNU Lesser General Public License v3.0 5 votes vote down vote up
private MediaType typeBasedOnDetectedTypeAndExtension(MediaType type, String filename)
{
    if (filename != null && type != null)
    {
        String[] detectedAndPossibleTypes = new String[]
        {
            MIMETYPE_PDF, MIMETYPE_APPLICATION_ILLUSTRATOR,
            MIMETYPE_APPLICATION_PS, MIMETYPE_APPLICATION_EPS
        };

        for (int i=detectedAndPossibleTypes.length-1; i>=0; i-=2)
        {
            String detectedType = detectedAndPossibleTypes[i-1];
            if (detectedType.equals(type.toString()))
            {
                String possibleType = detectedAndPossibleTypes[i];
                String extension = getExtension(possibleType);
                if (filename.endsWith("."+extension))
                {
                    type = MediaType.parse(possibleType);
                    break;
                }
            }
        }
    }
    return type;
}
 
Example 5
Source File: MimetypeMap.java    From alfresco-data-model with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * Use Apache Tika to check if the mime type of the document really matches
 * what it claims to be. This is typically used when a transformation or
 * metadata extractions fails, and you want to know if someone has renamed a
 * file and consequently it has the wrong mime type.
 * 
 * @return Null if the mime type seems ok, otherwise the mime type it
 *         probably is
 */
public String getMimetypeIfNotMatches(ContentReader reader)
{
    MediaType type = detectType(null, reader);
    if (type == null)
    {
        // Tika doesn't know so we can't help, sorry...
        return null;
    }

    // Is it a good match?
    if (type.toString().equals(reader.getMimetype())) { return null; }

    // Is it close?
    MediaType claimed = MediaType.parse(reader.getMimetype());
    if (tikaConfig.getMediaTypeRegistry().isSpecializationOf(claimed, type)
            || tikaConfig.getMediaTypeRegistry().isSpecializationOf(type, claimed))
    {
        // Probably close enough
        return null;
    }
    
    // Check through known aliases of the type
    SortedSet<MediaType> aliases = tikaConfig.getMediaTypeRegistry().getAliases(type);
    for (MediaType alias : aliases)
    {
        String aliasType = alias.toString();
        if (aliasType.equals(claimed.toString())) 
        {
            return null; 
        }
    }

    // If we get here, then most likely the type is wrong
    return type.toString();
}
 
Example 6
Source File: TikaUtil.java    From scipio-erp with Apache License 2.0 5 votes vote down vote up
/**
 * Makes a non-normalized tika MediaType instance (non-normalized means it may be an alias).
 * Result NOT necessarily exists in the registry.
 */
public static MediaType asMediaType(String mediaType) {
    return MediaType.parse(mediaType);
    // this code returned null for aliases.
    //MimeType mimeType = getMimeTypeForMediaTypeSafe(mediaType, getMimeTypeRegistry(), exact);
    //return mimeType != null ? mimeType.getType() : null;
}
 
Example 7
Source File: HttpUtils.java    From flink-crawler with Apache License 2.0 5 votes vote down vote up
public static String getMimeTypeFromContentType(String contentType) {
    String result = "";
    MediaType mt = MediaType.parse(contentType);
    if (mt != null) {
        result = mt.getType() + "/" + mt.getSubtype();
    }

    return result;
}
 
Example 8
Source File: HttpUtils.java    From flink-crawler with Apache License 2.0 5 votes vote down vote up
public static String getCharsetFromContentType(String contentType) {
    String result = "";
    MediaType mt = MediaType.parse(contentType);
    if (mt != null) {
        String charset = mt.getParameters().get("charset");
        if (charset != null) {
            result = charset;
        }
    }

    return result;
}
 
Example 9
Source File: BaseFetcher.java    From ache with Apache License 2.0 5 votes vote down vote up
protected static String getMimeTypeFromContentType(String contentType) {
    String result = "";
    MediaType mt = MediaType.parse(contentType);
    if (mt != null) {
        result = mt.getType() + "/" + mt.getSubtype();
    }

    return result;
}
 
Example 10
Source File: MediaTypeValidator.java    From iaf with Apache License 2.0 5 votes vote down vote up
/**
 * Detects media type from input stream
 * 
 * @param inputStream
 * @param filename
 * @return
 * @throws IOException
 */
public MediaType getMediaType(InputStream inputStream, String filename) throws IOException {
	// Create every time as TemporaryResources is not thread-safe
	TemporaryResources tmp = new TemporaryResources();
	tmp.setTemporaryFileDirectory(Paths.get(pdfOutputlocation));
	try (TikaInputStream tis = TikaInputStream.get(inputStream, tmp)) {
		String type = tika.detect(tis, filename);
		return MediaType.parse(type);
	}
}
 
Example 11
Source File: ContentDetector.java    From onedev with MIT License 4 votes vote down vote up
public static MediaType detectMediaType(byte[] contentBytes, @Nullable String fileName) {
	return MediaType.parse(tika.detect(contentBytes, fileName));
}
 
Example 12
Source File: StrToMediaTypeDeserializer.java    From jwala with Apache License 2.0 4 votes vote down vote up
@Override
public MediaType deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException {
    return MediaType.parse(jp.getText());
}
 
Example 13
Source File: SolrCellBuilder.java    From kite with Apache License 2.0 4 votes vote down vote up
private MediaType parseMediaType(String mediaTypeStr) {
  MediaType mediaType = MediaType.parse(mediaTypeStr.trim().toLowerCase(Locale.ROOT));
  return mediaType.getBaseType();
}