Java Code Examples for com.optimaize.langdetect.profiles.LanguageProfileReader

The following examples show how to use com.optimaize.langdetect.profiles.LanguageProfileReader. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: vespa   Source File: OptimaizeDetector.java    License: Apache License 2.0 6 votes vote down vote up
static private void initOptimaize() {
    synchronized (initGuard) {
        if ((textObjectFactory != null) && (languageDetector != null)) return;

        // origin: https://github.com/optimaize/language-detector
        // load all languages:
        List<LanguageProfile> languageProfiles;
        try {
            languageProfiles = new LanguageProfileReader().readAllBuiltIn();
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }

        //build language detector:
        languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
                                                  .withProfiles(languageProfiles)
                                                  .build();

        //create a text object factory
        textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
    }
}
 
Example 2
Source Project: KaellyBot   Source File: Translator.java    License: GNU General Public License v3.0 6 votes vote down vote up
private static LanguageDetector getLanguageDetector(){
    if (languageDetector == null){
        try {
            List<String> languages = new ArrayList<>();
            for(Language lg : Language.values())
                languages.add(lg.getAbrev().toLowerCase());

            List<LanguageProfile> languageProfiles = new LanguageProfileReader().read(languages);
            languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
                            .withProfiles(languageProfiles).build();
        }
        catch (IOException e) {
            LOG.error("Translator.getLanguageDetector", e);
        }
    }
    return languageDetector;
}
 
Example 3
Source Project: modernmt   Source File: AbstractOptimaizeFilter.java    License: Apache License 2.0 6 votes vote down vote up
protected final LanguageDetector getLanguageDetector() {
    if (detectorInstance == null) {
        synchronized (AbstractOptimaizeFilter.class) {
            if (detectorInstance == null) {
                try {
                    detectorInstance = LanguageDetectorBuilder.create(NgramExtractors.standard())
                            .shortTextAlgorithm(0)
                            .withProfiles(new LanguageProfileReader().readAllBuiltIn())
                            .build();
                } catch (IOException e) {
                    throw new RuntimeIOException(e);
                }
            }
        }
    }

    return detectorInstance;
}
 
Example 4
Source Project: jstarcraft-nlp   Source File: CommandLineInterface.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Using all language profiles from the given directory.
 */
private LanguageDetector makeDetector() throws IOException {
    double alpha = getParamDouble("alpha", DEFAULT_ALPHA);
    String profileDirectory = requireParamString("directory") + "/";
    Optional<Long> seed = Optional.fromNullable(getParamLongOrNull("seed"));

    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAll(new File(profileDirectory));

    return LanguageDetectorBuilder.create(NgramExtractors.standard()).alpha(alpha).seed(seed).shortTextAlgorithm(50).withProfiles(languageProfiles).build();
}
 
Example 5
Source Project: translator   Source File: LanguageCheckerImpl.java    License: MIT License 5 votes vote down vote up
public LanguageCheckerImpl() {
    //build language detector:
    try {
        languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
                .withProfiles(new LanguageProfileReader().readAllBuiltIn()) //load all languages:
                .build();
    } catch (IOException e) {
        log.error("LanguageCheckerImpl {}", e.getMessage());
    }

    //create a text object factory
    textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
}
 
Example 6
public NonEnglishCheckerService() {
    try {
        languageProfiles = new LanguageProfileReader().readAllBuiltIn();
        optimaizeDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
                .withProfiles(languageProfiles)
                .build();
        textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
        tikaDetector = new OptimaizeLangDetector().loadModels();
        writer = new LanguageWriter(tikaDetector);

    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
Example 7
Source Project: SkyTube   Source File: VideoBlocker.java    License: GNU General Public License v3.0 5 votes vote down vote up
private LanguageDetectionSingleton() throws IOException {
	// load all languages
	List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();

	// build language detector
	languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
			.withProfiles(languageProfiles)
			.build();

	// create a text object factory
	textObjectFactory = CommonTextObjectFactories.forDetectingShortCleanText();
}
 
Example 8
Source Project: baleen   Source File: DocumentLanguage.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void doInitialize(UimaContext aContext) throws ResourceInitializationException {
  try {
    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();
    languageDetector =
        LanguageDetectorBuilder.create(NgramExtractors.standard())
            .withProfiles(languageProfiles)
            .build();

    textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
  } catch (IOException ioe) {
    throw new ResourceInitializationException(ioe);
  }
}
 
Example 9
Source Project: language-detector   Source File: CommandLineInterface.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Using all language profiles from the given directory.
 */
private LanguageDetector makeDetector() throws IOException {
    double alpha = getParamDouble("alpha", DEFAULT_ALPHA);
    String profileDirectory = requireParamString("directory") + "/";
    Optional<Long> seed = Optional.fromNullable(getParamLongOrNull("seed"));

    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAll(new File(profileDirectory));

    return LanguageDetectorBuilder.create(NgramExtractors.standard())
            .alpha(alpha)
            .seed(seed)
            .shortTextAlgorithm(50)
            .withProfiles(languageProfiles)
            .build();
}
 
Example 10
public DataLanguageDetectorImplTest() throws IOException {
    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();

    shortDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
            .shortTextAlgorithm(100)
            .withProfiles(languageProfiles)
            .build();

    longDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
            .shortTextAlgorithm(0)
            .withProfiles(new LanguageProfileReader().readAllBuiltIn())
            .build();
}
 
Example 11
Source Project: jstarcraft-nlp   Source File: LanguageProfileValidator.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Adds all {@link LanguageProfile}s that are available when calling {@link LanguageProfileReader#readAllBuiltIn()}.
 */
public LanguageProfileValidator loadAllBuiltInLanguageProfiles() throws IOException {
    this.languageProfiles.addAll(new LanguageProfileReader().readAllBuiltIn());
    return this;
}
 
Example 12
Source Project: jstarcraft-nlp   Source File: NgramFrequencyDataTest.java    License: Apache License 2.0 4 votes vote down vote up
private static NgramFrequencyData forAll(int gramSize) throws IOException {
    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();
    return NgramFrequencyData.create(languageProfiles, ImmutableSet.of(gramSize));
}
 
Example 13
public OptimaizeLanguageGuesser() throws IOException {
    this.languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
                    .withProfiles(new LanguageProfileReader().readAllBuiltIn())
                    .build();
}
 
Example 14
/**
 * Adds all {@link LanguageProfile}s that are available when calling {@link LanguageProfileReader#readAllBuiltIn()}.
 */
public LanguageProfileValidator loadAllBuiltInLanguageProfiles() throws IOException {
    this.languageProfiles.addAll(new LanguageProfileReader().readAllBuiltIn());
    return this;
}
 
Example 15
Source Project: language-detector   Source File: NgramFrequencyDataTest.java    License: Apache License 2.0 4 votes vote down vote up
private static NgramFrequencyData forAll(int gramSize) throws IOException {
    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();
    return NgramFrequencyData.create(languageProfiles, ImmutableSet.of(gramSize));
}
 
Example 16
public DataLanguageDetectorImplTest() throws IOException {
    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();

    shortDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()).shortTextAlgorithm(100).withProfiles(languageProfiles).build();

    longDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()).shortTextAlgorithm(0).withProfiles(new LanguageProfileReader().readAllBuiltIn()).build();
}