Java Code Examples for net.sourceforge.tess4j.Tesseract

The following examples show how to use net.sourceforge.tess4j.Tesseract. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: MyBox   Source File: ImageOCRBatchController.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public boolean makeActualParameters() {
    if (!super.makeActualParameters()) {
        return false;
    }

    try {
        OCRinstance = new Tesseract();
        // https://stackoverflow.com/questions/58286373/tess4j-pdf-to-tiff-to-tesseract-warning-invalid-resolution-0-dpi-using-70/58296472#58296472
        OCRinstance.setTessVariable("user_defined_dpi", "96");
        OCRinstance.setTessVariable("debug_file", "/dev/null");
        String path = AppVariables.getUserConfigValue("TessDataPath", null);
        if (path != null) {
            OCRinstance.setDatapath(path);
        }
        if (selectedLanguages != null) {
            OCRinstance.setLanguage(selectedLanguages);
        }
        textFiles = new ArrayList<>();
        return true;
    } catch (Exception e) {
        logger.error(e.toString());
        return false;
    }

}
 
Example 2
Source Project: javautils   Source File: Tess4jUtil.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * 从图片中提取文字,默认设置英文字库,使用classpath目录下的训练库
 * @param path
 * @return
 */
public static String take(String path){
    // JNA Interface Mapping
    ITesseract instance = new Tesseract();
    // JNA Direct Mapping
    // ITesseract instance = new Tesseract1();
    File imageFile = new File(path);
    //In case you don't have your own tessdata, let it also be extracted for you
    //这样就能使用classpath目录下的训练库了
    File tessDataFolder = LoadLibs.extractTessResources("tessdata");
    //Set the tessdata path
    instance.setDatapath(tessDataFolder.getAbsolutePath());
    //英文库识别数字比较准确
    instance.setLanguage(Constants.ENG);
    return getOCRText(instance, imageFile);
}
 
Example 3
Source Project: tutorials   Source File: Tess4JExample.java    License: MIT License 6 votes vote down vote up
public static void main(String[] args) {
    String result = null;
    try {
        File image = new File("src/main/resources/images/baeldung.png");
        Tesseract tesseract = new Tesseract();
        tesseract.setLanguage("spa");
        tesseract.setPageSegMode(1);
        tesseract.setOcrEngineMode(1);
        tesseract.setHocr(true);
        tesseract.setDatapath("src/main/resources/tessdata");
        result = tesseract.doOCR(image, new Rectangle(1200, 200));
    } catch (TesseractException e) {
        e.printStackTrace();
    }
    System.out.println(result);
}
 
Example 4
Source Project: ocr-tess4j-rest   Source File: Tess4jV1.java    License: Apache License 2.0 6 votes vote down vote up
@RequestMapping(value = "ocr/v1/convert", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
public Text convertImageToText(@RequestBody final Image image) throws Exception {

    File tmpFile = File.createTempFile("ocr_image", image.getExtension());
    try {
        FileUtils.writeByteArrayToFile(tmpFile, Base64.decodeBase64(image.getImage()));
        Tesseract tesseract = new Tesseract(); // JNA Interface Mapping
        String imageText = tesseract.doOCR(tmpFile);
        LOGGER.debug("OCR Image Text = " + imageText);
        return new Text(imageText);
    } catch (Exception e) {
        LOGGER.error("Exception while converting/uploading image: ", e);
        throw new TesseractException();
    } finally {
        tmpFile.delete();
    }
}
 
Example 5
Source Project: ocr-tess4j-rest   Source File: Tess4jV1.java    License: Apache License 2.0 6 votes vote down vote up
@RequestMapping(value = "ocr/v1/upload", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
public Status doOcr(@RequestBody Image image) throws Exception {
    try {
        //FileUtils.writeByteArrayToFile(tmpFile, Base64.decodeBase64(image.getImage()));
        ByteArrayInputStream bis = new ByteArrayInputStream(Base64.decodeBase64(image.getImage()));
        Tesseract tesseract = new Tesseract(); // JNA Interface Mapping
        String imageText = tesseract.doOCR(ImageIO.read(bis));
        image.setText(imageText);
        repository.save(image);
        LOGGER.debug("OCR Result = " + imageText);
    } catch (Exception e) {
        LOGGER.error("TessearctException while converting/uploading image: ", e);
        throw new TesseractException();
    }

    return new Status("success");
}
 
Example 6
Source Project: MillionHero   Source File: TessOcr.java    License: MIT License 5 votes vote down vote up
TessOcr() {
    instance = new Tesseract();
    File tessDataFolder = LoadLibs.extractTessResources("tessdata");
    instance.setLanguage("chi_sim");
    //Set the tessdata path
    instance.setDatapath(tessDataFolder.getAbsolutePath());
}
 
Example 7
Source Project: MyBox   Source File: PdfOcrBatchController.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public boolean makeActualParameters() {
    if (!super.makeActualParameters()) {
        return false;
    }
    separator = separatorInput.getText();
    if (!separatorCheck.isSelected() || separator == null || separator.isEmpty()) {
        separator = null;
    }
    try {
        OCRinstance = new Tesseract();
        // https://stackoverflow.com/questions/58286373/tess4j-pdf-to-tiff-to-tesseract-warning-invalid-resolution-0-dpi-using-70/58296472#58296472
        if (convertRadio.isSelected()) {
            OCRinstance.setTessVariable("user_defined_dpi", dpi + "");
        } else {
            OCRinstance.setTessVariable("user_defined_dpi", "96");
        }
        OCRinstance.setTessVariable("debug_file", "/dev/null");

        String path = AppVariables.getUserConfigValue("TessDataPath", null);
        if (path != null) {
            OCRinstance.setDatapath(path);
        }
        OCRinstance.setLanguage(selectedLanguages);
        return true;
    } catch (Exception e) {
        logger.error(e.toString());
        return false;
    }
}
 
Example 8
Source Project: Java-for-Data-Science   Source File: TessrJExample.java    License: MIT License 5 votes vote down vote up
public static void main(String[] args) {
    ITesseract instance = new Tesseract();
    instance.setLanguage("eng");
    try {
        String result;
        result = instance.doOCR(new File("OCRExample.png"));
        System.out.println(result);
    } catch (TesseractException e) {
        System.err.println(e.getMessage());
    }
}
 
Example 9
Source Project: javautils   Source File: Tess4jUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 从图片中提取文字
 * @param path
 * @param dataPath
 * @param language
 * @return
 */
public static String take(String path, String dataPath, String language){
    File imageFile = new File(path);
    ITesseract instance = new Tesseract();
    instance.setDatapath(dataPath);
    //英文库识别数字比较准确
    instance.setLanguage(language);
    return getOCRText(instance, imageFile);
}
 
Example 10
Source Project: JewelCrawler   Source File: ImageUtil.java    License: GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args){
		try {
			boolean load = true;
			load = false;
//			BufferedImage image = ImageIO.read(new URL("http://www.miitbeian.gov.cn/captcha.jpg")) ;
//			if(load){
//				ImageIO.write(image, "jpg", new File("E:/captcha.jpg") );
//			}else{
//				image = ImageIO.read(new File("D:\\爬虫测试\\yzm\\111.png")) ;
//			}
			BufferedImage image = ImageIO.read(new File("D:\\爬虫测试\\yzm\\11.jpg")) ;
//			image = ImageUtil.grayFilter(image);
			image = ImageUtil.binaryFilter(image);
			image = ImageUtil.lineFilter(image);
//			image = ImageUtil.lineFilter(image);
//			image = ImageUtil.line2Filter(image);
//			image = ImageUtil.point2Filter(image);
//			image = ImageUtil.lineFilter(image);
			image = ImageUtil.meanFilter(image);
//			image = ImageUtil.lineFilter(image);
//			image = ImageUtil.binaryFilter(image);
			
			
			File imageFile = new File("E:/captcha5.jpg");
//			imageFile = new File("E:/test/test.jpg");
			
			ImageIO.write(image, "jpg", imageFile);
			
			Tesseract tesseract = Tesseract.getInstance();
			tesseract.setLanguage("eng");
			String code = tesseract.doOCR(imageFile);

			System.out.println(code);
			
		} catch (Exception e) {
			e.printStackTrace();
		}

	}
 
Example 11
Source Project: MyBox   Source File: PdfViewController.java    License: Apache License 2.0 4 votes vote down vote up
@FXML
public void startOCR() {
    checkLanguages();
    if (imageView.getImage() == null
            || selectedLanguages == null || selectedLanguages.isEmpty()) {
        return;
    }
    synchronized (this) {
        if (task != null) {
            return;
        }
        task = new SingletonTask<Void>() {

            private String result;

            @Override
            protected boolean handle() {
                try {
                    ITesseract instance = new Tesseract();
                    instance.setTessVariable("user_defined_dpi", "96");
                    instance.setTessVariable("debug_file", "/dev/null");
                    String path = AppVariables.getUserConfigValue("TessDataPath", null);
                    if (path != null) {
                        instance.setDatapath(path);
                    }
                    if (selectedLanguages != null) {
                        instance.setLanguage(selectedLanguages);
                    }

                    Image selected = cropImage();
                    if (selected == null) {
                        selected = imageView.getImage();
                    }
                    BufferedImage bufferedImage = SwingFXUtils.fromFXImage(selected, null);
                    if (task == null || isCancelled()) {
                        return false;
                    }
                    result = instance.doOCR(bufferedImage);
                    return result != null;
                } catch (Exception e) {
                    error = e.toString();
                    return false;
                }
            }

            @Override
            protected void whenSucceeded() {
                if (result.length() == 0) {
                    popText(message("OCRMissComments"), 5000, "white", "1.1em", null);
                }
                ocrArea.setText(result);
                resultLabel.setText(MessageFormat.format(message("OCRresults"),
                        result.length(), DateTools.showTime(cost)));

                orcPage = currentPage;
            }

        };
        openHandlingStage(task, Modality.WINDOW_MODAL);
        Thread thread = new Thread(task);
        thread.setDaemon(true);
        thread.start();
    }

}
 
Example 12
Source Project: justtestlah   Source File: OCR.java    License: Apache License 2.0 4 votes vote down vote up
@Autowired
public OCR(Tesseract ocr) {
  this.ocr = ocr;
}
 
Example 13
Source Project: justtestlah   Source File: VisualConfig.java    License: Apache License 2.0 4 votes vote down vote up
@Bean
public Tesseract tesseract() {
  Tesseract tess = new Tesseract();
  tess.setDatapath(tesseractDataPath);
  return tess;
}
 
Example 14
Source Project: justtestlah   Source File: OCRTest.java    License: Apache License 2.0 4 votes vote down vote up
public void helloWorldTest() throws TesseractException {
  TakesScreenshot driver = mock(TakesScreenshot.class);
  target = new OCR(new Tesseract()).withDriver(driver);
  when(driver.getScreenshotAs(OutputType.FILE)).thenReturn(getPath("helloworld.png"));
  assertThat(target.getText().trim()).isEqualTo("hello world");
}
 
Example 15
Source Project: opentest   Source File: TesseractOcr.java    License: MIT License 4 votes vote down vote up
public TesseractOcr() {
        this.tess = new Tesseract();
        this.tess.setDatapath(System.getProperty("user.dir"));
//        this.tess.setOcrEngineMode(ITessAPI.TessOcrEngineMode.OEM_CUBE_ONLY);
    }