net.sourceforge.tess4j.Tesseract Java Examples

The following examples show how to use net.sourceforge.tess4j.Tesseract. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ImageOCRBatchController.java    From MyBox with Apache License 2.0 6 votes vote down vote up
@Override
public boolean makeActualParameters() {
    if (!super.makeActualParameters()) {
        return false;
    }

    try {
        OCRinstance = new Tesseract();
        // https://stackoverflow.com/questions/58286373/tess4j-pdf-to-tiff-to-tesseract-warning-invalid-resolution-0-dpi-using-70/58296472#58296472
        OCRinstance.setTessVariable("user_defined_dpi", "96");
        OCRinstance.setTessVariable("debug_file", "/dev/null");
        String path = AppVariables.getUserConfigValue("TessDataPath", null);
        if (path != null) {
            OCRinstance.setDatapath(path);
        }
        if (selectedLanguages != null) {
            OCRinstance.setLanguage(selectedLanguages);
        }
        textFiles = new ArrayList<>();
        return true;
    } catch (Exception e) {
        logger.error(e.toString());
        return false;
    }

}
 
Example #2
Source File: Tess4jUtil.java    From javautils with Apache License 2.0 6 votes vote down vote up
/**
 * 从图片中提取文字,默认设置英文字库,使用classpath目录下的训练库
 * @param path
 * @return
 */
public static String take(String path){
    // JNA Interface Mapping
    ITesseract instance = new Tesseract();
    // JNA Direct Mapping
    // ITesseract instance = new Tesseract1();
    File imageFile = new File(path);
    //In case you don't have your own tessdata, let it also be extracted for you
    //这样就能使用classpath目录下的训练库了
    File tessDataFolder = LoadLibs.extractTessResources("tessdata");
    //Set the tessdata path
    instance.setDatapath(tessDataFolder.getAbsolutePath());
    //英文库识别数字比较准确
    instance.setLanguage(Constants.ENG);
    return getOCRText(instance, imageFile);
}
 
Example #3
Source File: Tess4JExample.java    From tutorials with MIT License 6 votes vote down vote up
public static void main(String[] args) {
    String result = null;
    try {
        File image = new File("src/main/resources/images/baeldung.png");
        Tesseract tesseract = new Tesseract();
        tesseract.setLanguage("spa");
        tesseract.setPageSegMode(1);
        tesseract.setOcrEngineMode(1);
        tesseract.setHocr(true);
        tesseract.setDatapath("src/main/resources/tessdata");
        result = tesseract.doOCR(image, new Rectangle(1200, 200));
    } catch (TesseractException e) {
        e.printStackTrace();
    }
    System.out.println(result);
}
 
Example #4
Source File: Tess4jV1.java    From ocr-tess4j-rest with Apache License 2.0 6 votes vote down vote up
@RequestMapping(value = "ocr/v1/convert", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
public Text convertImageToText(@RequestBody final Image image) throws Exception {

    File tmpFile = File.createTempFile("ocr_image", image.getExtension());
    try {
        FileUtils.writeByteArrayToFile(tmpFile, Base64.decodeBase64(image.getImage()));
        Tesseract tesseract = new Tesseract(); // JNA Interface Mapping
        String imageText = tesseract.doOCR(tmpFile);
        LOGGER.debug("OCR Image Text = " + imageText);
        return new Text(imageText);
    } catch (Exception e) {
        LOGGER.error("Exception while converting/uploading image: ", e);
        throw new TesseractException();
    } finally {
        tmpFile.delete();
    }
}
 
Example #5
Source File: Tess4jV1.java    From ocr-tess4j-rest with Apache License 2.0 6 votes vote down vote up
@RequestMapping(value = "ocr/v1/upload", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
public Status doOcr(@RequestBody Image image) throws Exception {
    try {
        //FileUtils.writeByteArrayToFile(tmpFile, Base64.decodeBase64(image.getImage()));
        ByteArrayInputStream bis = new ByteArrayInputStream(Base64.decodeBase64(image.getImage()));
        Tesseract tesseract = new Tesseract(); // JNA Interface Mapping
        String imageText = tesseract.doOCR(ImageIO.read(bis));
        image.setText(imageText);
        repository.save(image);
        LOGGER.debug("OCR Result = " + imageText);
    } catch (Exception e) {
        LOGGER.error("TessearctException while converting/uploading image: ", e);
        throw new TesseractException();
    }

    return new Status("success");
}
 
Example #6
Source File: TessOcr.java    From MillionHero with MIT License 5 votes vote down vote up
TessOcr() {
    instance = new Tesseract();
    File tessDataFolder = LoadLibs.extractTessResources("tessdata");
    instance.setLanguage("chi_sim");
    //Set the tessdata path
    instance.setDatapath(tessDataFolder.getAbsolutePath());
}
 
Example #7
Source File: PdfOcrBatchController.java    From MyBox with Apache License 2.0 5 votes vote down vote up
@Override
public boolean makeActualParameters() {
    if (!super.makeActualParameters()) {
        return false;
    }
    separator = separatorInput.getText();
    if (!separatorCheck.isSelected() || separator == null || separator.isEmpty()) {
        separator = null;
    }
    try {
        OCRinstance = new Tesseract();
        // https://stackoverflow.com/questions/58286373/tess4j-pdf-to-tiff-to-tesseract-warning-invalid-resolution-0-dpi-using-70/58296472#58296472
        if (convertRadio.isSelected()) {
            OCRinstance.setTessVariable("user_defined_dpi", dpi + "");
        } else {
            OCRinstance.setTessVariable("user_defined_dpi", "96");
        }
        OCRinstance.setTessVariable("debug_file", "/dev/null");

        String path = AppVariables.getUserConfigValue("TessDataPath", null);
        if (path != null) {
            OCRinstance.setDatapath(path);
        }
        OCRinstance.setLanguage(selectedLanguages);
        return true;
    } catch (Exception e) {
        logger.error(e.toString());
        return false;
    }
}
 
Example #8
Source File: TessrJExample.java    From Java-for-Data-Science with MIT License 5 votes vote down vote up
public static void main(String[] args) {
    ITesseract instance = new Tesseract();
    instance.setLanguage("eng");
    try {
        String result;
        result = instance.doOCR(new File("OCRExample.png"));
        System.out.println(result);
    } catch (TesseractException e) {
        System.err.println(e.getMessage());
    }
}
 
Example #9
Source File: Tess4jUtil.java    From javautils with Apache License 2.0 5 votes vote down vote up
/**
 * 从图片中提取文字
 * @param path
 * @param dataPath
 * @param language
 * @return
 */
public static String take(String path, String dataPath, String language){
    File imageFile = new File(path);
    ITesseract instance = new Tesseract();
    instance.setDatapath(dataPath);
    //英文库识别数字比较准确
    instance.setLanguage(language);
    return getOCRText(instance, imageFile);
}
 
Example #10
Source File: ImageUtil.java    From JewelCrawler with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args){
		try {
			boolean load = true;
			load = false;
//			BufferedImage image = ImageIO.read(new URL("http://www.miitbeian.gov.cn/captcha.jpg")) ;
//			if(load){
//				ImageIO.write(image, "jpg", new File("E:/captcha.jpg") );
//			}else{
//				image = ImageIO.read(new File("D:\\爬虫测试\\yzm\\111.png")) ;
//			}
			BufferedImage image = ImageIO.read(new File("D:\\爬虫测试\\yzm\\11.jpg")) ;
//			image = ImageUtil.grayFilter(image);
			image = ImageUtil.binaryFilter(image);
			image = ImageUtil.lineFilter(image);
//			image = ImageUtil.lineFilter(image);
//			image = ImageUtil.line2Filter(image);
//			image = ImageUtil.point2Filter(image);
//			image = ImageUtil.lineFilter(image);
			image = ImageUtil.meanFilter(image);
//			image = ImageUtil.lineFilter(image);
//			image = ImageUtil.binaryFilter(image);
			
			
			File imageFile = new File("E:/captcha5.jpg");
//			imageFile = new File("E:/test/test.jpg");
			
			ImageIO.write(image, "jpg", imageFile);
			
			Tesseract tesseract = Tesseract.getInstance();
			tesseract.setLanguage("eng");
			String code = tesseract.doOCR(imageFile);

			System.out.println(code);
			
		} catch (Exception e) {
			e.printStackTrace();
		}

	}
 
Example #11
Source File: PdfViewController.java    From MyBox with Apache License 2.0 4 votes vote down vote up
@FXML
public void startOCR() {
    checkLanguages();
    if (imageView.getImage() == null
            || selectedLanguages == null || selectedLanguages.isEmpty()) {
        return;
    }
    synchronized (this) {
        if (task != null) {
            return;
        }
        task = new SingletonTask<Void>() {

            private String result;

            @Override
            protected boolean handle() {
                try {
                    ITesseract instance = new Tesseract();
                    instance.setTessVariable("user_defined_dpi", "96");
                    instance.setTessVariable("debug_file", "/dev/null");
                    String path = AppVariables.getUserConfigValue("TessDataPath", null);
                    if (path != null) {
                        instance.setDatapath(path);
                    }
                    if (selectedLanguages != null) {
                        instance.setLanguage(selectedLanguages);
                    }

                    Image selected = cropImage();
                    if (selected == null) {
                        selected = imageView.getImage();
                    }
                    BufferedImage bufferedImage = SwingFXUtils.fromFXImage(selected, null);
                    if (task == null || isCancelled()) {
                        return false;
                    }
                    result = instance.doOCR(bufferedImage);
                    return result != null;
                } catch (Exception e) {
                    error = e.toString();
                    return false;
                }
            }

            @Override
            protected void whenSucceeded() {
                if (result.length() == 0) {
                    popText(message("OCRMissComments"), 5000, "white", "1.1em", null);
                }
                ocrArea.setText(result);
                resultLabel.setText(MessageFormat.format(message("OCRresults"),
                        result.length(), DateTools.showTime(cost)));

                orcPage = currentPage;
            }

        };
        openHandlingStage(task, Modality.WINDOW_MODAL);
        Thread thread = new Thread(task);
        thread.setDaemon(true);
        thread.start();
    }

}
 
Example #12
Source File: OCR.java    From justtestlah with Apache License 2.0 4 votes vote down vote up
@Autowired
public OCR(Tesseract ocr) {
  this.ocr = ocr;
}
 
Example #13
Source File: VisualConfig.java    From justtestlah with Apache License 2.0 4 votes vote down vote up
@Bean
public Tesseract tesseract() {
  Tesseract tess = new Tesseract();
  tess.setDatapath(tesseractDataPath);
  return tess;
}
 
Example #14
Source File: OCRTest.java    From justtestlah with Apache License 2.0 4 votes vote down vote up
public void helloWorldTest() throws TesseractException {
  TakesScreenshot driver = mock(TakesScreenshot.class);
  target = new OCR(new Tesseract()).withDriver(driver);
  when(driver.getScreenshotAs(OutputType.FILE)).thenReturn(getPath("helloworld.png"));
  assertThat(target.getText().trim()).isEqualTo("hello world");
}
 
Example #15
Source File: TesseractOcr.java    From opentest with MIT License 4 votes vote down vote up
public TesseractOcr() {
        this.tess = new Tesseract();
        this.tess.setDatapath(System.getProperty("user.dir"));
//        this.tess.setOcrEngineMode(ITessAPI.TessOcrEngineMode.OEM_CUBE_ONLY);
    }