org.apache.tika.detect.DefaultDetector Java Examples

The following examples show how to use org.apache.tika.detect.DefaultDetector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TikaPoweredContainerExtractor.java    From alfresco-repository with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * Injects the TikaConfig to use
 * 
 * @param tikaConfig The Tika Config to use 
 */
public void setTikaConfig(TikaConfig tikaConfig)
{
    this.config = tikaConfig;
    
    // Setup the detector and parser
    detector = new DefaultDetector(config.getMimeRepository());
    parser = new AutoDetectParser(detector);
}
 
Example #2
Source File: MimeTypeUtils.java    From oodt with Apache License 2.0 5 votes vote down vote up
public MimeTypeUtils(InputStream mimeIs, boolean magic) {
	try {
		this.mimeTypes = MimeTypesFactory.create(mimeIs);
		this.mimeMagic = magic;
		this.tika = new Tika(new DefaultDetector(this.mimeTypes));
	}catch (Exception e) {
		LOG.log(Level.SEVERE, "Failed to load MimeType Registry : " + e.getMessage(), e);
	}
}
 
Example #3
Source File: TikaAnalysis.java    From tutorials with MIT License 5 votes vote down vote up
public static String detectDocTypeUsingDetector(InputStream stream) throws IOException {
    Detector detector = new DefaultDetector();
    Metadata metadata = new Metadata();

    MediaType mediaType = detector.detect(stream, metadata);
    return mediaType.toString();
}
 
Example #4
Source File: MimetypeMap.java    From alfresco-data-model with GNU Lesser General Public License v3.0 4 votes vote down vote up
/**
 * Initialises the map using the configuration service provided
 */
public void init()
{
    PropertyCheck.mandatory(this, "configService", configService);
    PropertyCheck.mandatory(this, "contentCharsetFinder", contentCharsetFinder);

    // Do we have any properties that indicate we will read JSON?
    if (mimetypeJsonConfigDir != null || jsonObjectMapper != null || cronExpression != null || initialAndOnErrorCronExpression != null)
    {
        PropertyCheck.mandatory(this, "jsonObjectMapper", jsonObjectMapper);
        // If we have a cronExpression it indicates that we will schedule reading.
        if (cronExpression != null)
        {
            PropertyCheck.mandatory(this, "initialAndOnErrorCronExpression", initialAndOnErrorCronExpression);
        }
        jsonConfigFileFinder = new ConfigFileFinder(jsonObjectMapper)
        {
            @Override
            protected void readJson(JsonNode jsonNode, String readFromMessage, String baseUrl) throws IOException
            {
                try
                {
                    JsonNode mediaTypes = jsonNode.get("mediaTypes");
                    if (mediaTypes != null && mediaTypes.isArray())
                    {
                        List<ConfigElement> mimetypes = new ArrayList<>();
                        for (JsonNode mediaType : mediaTypes)
                        {
                            MediaTypeDef def = jsonObjectMapper.convertValue(mediaType, MediaTypeDef.class);
                            GenericConfigElement mimetype = new GenericConfigElement(ATTR_MIMETYPE);
                            mimetype.addAttribute(ATTR_DISPLAY, def.name);
                            mimetype.addAttribute(ATTR_MIMETYPE, def.mediaType);
                            if (def.text)
                            {
                                mimetype.addAttribute(ATTR_TEXT, Boolean.TRUE.toString());
                            }

                            GenericConfigElement ext = null;
                            int count = 0;
                            for (ExtensionDef extension : def.extensions)
                            {
                                ext = new GenericConfigElement(ATTR_EXTENSION);
                                ext.setValue(extension.extension);
                                if (extension.name != null && !extension.name.isBlank())
                                {
                                    ext.addAttribute(ATTR_DISPLAY, extension.name);
                                }
                                if (extension.isDefault)
                                {
                                    ext.addAttribute(ATTR_DEFAULT, Boolean.TRUE.toString());
                                }
                                mimetype.addChild(ext);
                                count++;
                            }
                            if (count == 1 && ext.getAttribute(ATTR_DEFAULT) == null)
                            {
                                ext.addAttribute(ATTR_DEFAULT, Boolean.TRUE.toString());
                            }
                            mimetypes.add(mimetype);
                        }
                        registerMimetypes(mimetypes);
                        Data data = getData();
                        data.fileCount++;
                    }
                }
                catch (IllegalArgumentException e)
                {
                    logger.error("Error reading "+readFromMessage+" "+e.getMessage());
                }
            }
        };
    }

    // TikaConfig should be given, but work around it if not
    if (tikaConfig == null)
    {
        logger.warn("TikaConfig spring parameter not supplied, using default config");
        setTikaConfig(TikaConfig.getDefaultConfig());
    }
    // Create our Tika mimetype detector up-front
    // We can then be sure we only have the one, so it's quick (ALF-10813)
    detector = new DefaultDetector(tikaConfig.getMimeRepository());

    // Work out the mappings - only runs once and straight away if cronExpression is null
    configScheduler.run(true, logger, cronExpression, initialAndOnErrorCronExpression);
}
 
Example #5
Source File: AssetMimeHandler.java    From usergrid with Apache License 2.0 4 votes vote down vote up
AssetMimeHandler() {
    detector = new DefaultDetector();
}
 
Example #6
Source File: TikaContentExtractor.java    From cxf with Apache License 2.0 2 votes vote down vote up
/**
 * Create new Tika-based content extractor using the provided parser instances.
 * @param parsers parser instances
 */
public TikaContentExtractor(final List<Parser> parsers) {
    this(parsers, new DefaultDetector());
}
 
Example #7
Source File: TikaContentExtractor.java    From cxf with Apache License 2.0 2 votes vote down vote up
/**
 * Create new Tika-based content extractor using the provided parser instance and
 * optional media type validation. If validation is enabled, the implementation parser
 * will try to detect the media type of the input and validate it against media types
 * supported by the parser.
 * @param parser parser instance
 * @param validateMediaType enabled or disable media type validationparser
 */
public TikaContentExtractor(final Parser parser, final boolean validateMediaType) {
    this(Collections.singletonList(parser), validateMediaType ? new DefaultDetector() : null);
}