com.optimaize.langdetect.profiles.LanguageProfileReader Java Examples

The following examples show how to use com.optimaize.langdetect.profiles.LanguageProfileReader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OptimaizeDetector.java    From vespa with Apache License 2.0 6 votes vote down vote up
static private void initOptimaize() {
    synchronized (initGuard) {
        if ((textObjectFactory != null) && (languageDetector != null)) return;

        // origin: https://github.com/optimaize/language-detector
        // load all languages:
        List<LanguageProfile> languageProfiles;
        try {
            languageProfiles = new LanguageProfileReader().readAllBuiltIn();
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }

        //build language detector:
        languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
                                                  .withProfiles(languageProfiles)
                                                  .build();

        //create a text object factory
        textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
    }
}
 
Example #2
Source File: Translator.java    From KaellyBot with GNU General Public License v3.0 6 votes vote down vote up
private static LanguageDetector getLanguageDetector(){
    if (languageDetector == null){
        try {
            List<String> languages = new ArrayList<>();
            for(Language lg : Language.values())
                languages.add(lg.getAbrev().toLowerCase());

            List<LanguageProfile> languageProfiles = new LanguageProfileReader().read(languages);
            languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
                            .withProfiles(languageProfiles).build();
        }
        catch (IOException e) {
            LOG.error("Translator.getLanguageDetector", e);
        }
    }
    return languageDetector;
}
 
Example #3
Source File: AbstractOptimaizeFilter.java    From modernmt with Apache License 2.0 6 votes vote down vote up
protected final LanguageDetector getLanguageDetector() {
    if (detectorInstance == null) {
        synchronized (AbstractOptimaizeFilter.class) {
            if (detectorInstance == null) {
                try {
                    detectorInstance = LanguageDetectorBuilder.create(NgramExtractors.standard())
                            .shortTextAlgorithm(0)
                            .withProfiles(new LanguageProfileReader().readAllBuiltIn())
                            .build();
                } catch (IOException e) {
                    throw new RuntimeIOException(e);
                }
            }
        }
    }

    return detectorInstance;
}
 
Example #4
Source File: CommandLineInterface.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
/**
 * Using all language profiles from the given directory.
 */
private LanguageDetector makeDetector() throws IOException {
    double alpha = getParamDouble("alpha", DEFAULT_ALPHA);
    String profileDirectory = requireParamString("directory") + "/";
    Optional<Long> seed = Optional.fromNullable(getParamLongOrNull("seed"));

    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAll(new File(profileDirectory));

    return LanguageDetectorBuilder.create(NgramExtractors.standard()).alpha(alpha).seed(seed).shortTextAlgorithm(50).withProfiles(languageProfiles).build();
}
 
Example #5
Source File: LanguageCheckerImpl.java    From translator with MIT License 5 votes vote down vote up
public LanguageCheckerImpl() {
    //build language detector:
    try {
        languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
                .withProfiles(new LanguageProfileReader().readAllBuiltIn()) //load all languages:
                .build();
    } catch (IOException e) {
        log.error("LanguageCheckerImpl {}", e.getMessage());
    }

    //create a text object factory
    textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
}
 
Example #6
Source File: NonEnglishCheckerService.java    From Natty with GNU General Public License v3.0 5 votes vote down vote up
public NonEnglishCheckerService() {
    try {
        languageProfiles = new LanguageProfileReader().readAllBuiltIn();
        optimaizeDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
                .withProfiles(languageProfiles)
                .build();
        textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
        tikaDetector = new OptimaizeLangDetector().loadModels();
        writer = new LanguageWriter(tikaDetector);

    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
Example #7
Source File: VideoBlocker.java    From SkyTube with GNU General Public License v3.0 5 votes vote down vote up
private LanguageDetectionSingleton() throws IOException {
	// load all languages
	List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();

	// build language detector
	languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
			.withProfiles(languageProfiles)
			.build();

	// create a text object factory
	textObjectFactory = CommonTextObjectFactories.forDetectingShortCleanText();
}
 
Example #8
Source File: DocumentLanguage.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
public void doInitialize(UimaContext aContext) throws ResourceInitializationException {
  try {
    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();
    languageDetector =
        LanguageDetectorBuilder.create(NgramExtractors.standard())
            .withProfiles(languageProfiles)
            .build();

    textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
  } catch (IOException ioe) {
    throw new ResourceInitializationException(ioe);
  }
}
 
Example #9
Source File: CommandLineInterface.java    From language-detector with Apache License 2.0 5 votes vote down vote up
/**
 * Using all language profiles from the given directory.
 */
private LanguageDetector makeDetector() throws IOException {
    double alpha = getParamDouble("alpha", DEFAULT_ALPHA);
    String profileDirectory = requireParamString("directory") + "/";
    Optional<Long> seed = Optional.fromNullable(getParamLongOrNull("seed"));

    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAll(new File(profileDirectory));

    return LanguageDetectorBuilder.create(NgramExtractors.standard())
            .alpha(alpha)
            .seed(seed)
            .shortTextAlgorithm(50)
            .withProfiles(languageProfiles)
            .build();
}
 
Example #10
Source File: DataLanguageDetectorImplTest.java    From language-detector with Apache License 2.0 5 votes vote down vote up
public DataLanguageDetectorImplTest() throws IOException {
    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();

    shortDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
            .shortTextAlgorithm(100)
            .withProfiles(languageProfiles)
            .build();

    longDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
            .shortTextAlgorithm(0)
            .withProfiles(new LanguageProfileReader().readAllBuiltIn())
            .build();
}
 
Example #11
Source File: LanguageProfileValidator.java    From jstarcraft-nlp with Apache License 2.0 4 votes vote down vote up
/**
 * Adds all {@link LanguageProfile}s that are available when calling {@link LanguageProfileReader#readAllBuiltIn()}.
 */
public LanguageProfileValidator loadAllBuiltInLanguageProfiles() throws IOException {
    this.languageProfiles.addAll(new LanguageProfileReader().readAllBuiltIn());
    return this;
}
 
Example #12
Source File: NgramFrequencyDataTest.java    From jstarcraft-nlp with Apache License 2.0 4 votes vote down vote up
private static NgramFrequencyData forAll(int gramSize) throws IOException {
    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();
    return NgramFrequencyData.create(languageProfiles, ImmutableSet.of(gramSize));
}
 
Example #13
Source File: OptimaizeLanguageGuesser.java    From datashare with GNU Affero General Public License v3.0 4 votes vote down vote up
public OptimaizeLanguageGuesser() throws IOException {
    this.languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
                    .withProfiles(new LanguageProfileReader().readAllBuiltIn())
                    .build();
}
 
Example #14
Source File: LanguageProfileValidator.java    From language-detector with Apache License 2.0 4 votes vote down vote up
/**
 * Adds all {@link LanguageProfile}s that are available when calling {@link LanguageProfileReader#readAllBuiltIn()}.
 */
public LanguageProfileValidator loadAllBuiltInLanguageProfiles() throws IOException {
    this.languageProfiles.addAll(new LanguageProfileReader().readAllBuiltIn());
    return this;
}
 
Example #15
Source File: NgramFrequencyDataTest.java    From language-detector with Apache License 2.0 4 votes vote down vote up
private static NgramFrequencyData forAll(int gramSize) throws IOException {
    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();
    return NgramFrequencyData.create(languageProfiles, ImmutableSet.of(gramSize));
}
 
Example #16
Source File: DataLanguageDetectorImplTest.java    From jstarcraft-nlp with Apache License 2.0 3 votes vote down vote up
public DataLanguageDetectorImplTest() throws IOException {
    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();

    shortDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()).shortTextAlgorithm(100).withProfiles(languageProfiles).build();

    longDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()).shortTextAlgorithm(0).withProfiles(new LanguageProfileReader().readAllBuiltIn()).build();
}