net.sourceforge.tess4j.Tesseract Java Examples
The following examples show how to use
net.sourceforge.tess4j.Tesseract.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ImageOCRBatchController.java From MyBox with Apache License 2.0 | 6 votes |
@Override public boolean makeActualParameters() { if (!super.makeActualParameters()) { return false; } try { OCRinstance = new Tesseract(); // https://stackoverflow.com/questions/58286373/tess4j-pdf-to-tiff-to-tesseract-warning-invalid-resolution-0-dpi-using-70/58296472#58296472 OCRinstance.setTessVariable("user_defined_dpi", "96"); OCRinstance.setTessVariable("debug_file", "/dev/null"); String path = AppVariables.getUserConfigValue("TessDataPath", null); if (path != null) { OCRinstance.setDatapath(path); } if (selectedLanguages != null) { OCRinstance.setLanguage(selectedLanguages); } textFiles = new ArrayList<>(); return true; } catch (Exception e) { logger.error(e.toString()); return false; } }
Example #2
Source File: Tess4jUtil.java From javautils with Apache License 2.0 | 6 votes |
/** * 从图片中提取文字,默认设置英文字库,使用classpath目录下的训练库 * @param path * @return */ public static String take(String path){ // JNA Interface Mapping ITesseract instance = new Tesseract(); // JNA Direct Mapping // ITesseract instance = new Tesseract1(); File imageFile = new File(path); //In case you don't have your own tessdata, let it also be extracted for you //这样就能使用classpath目录下的训练库了 File tessDataFolder = LoadLibs.extractTessResources("tessdata"); //Set the tessdata path instance.setDatapath(tessDataFolder.getAbsolutePath()); //英文库识别数字比较准确 instance.setLanguage(Constants.ENG); return getOCRText(instance, imageFile); }
Example #3
Source File: Tess4JExample.java From tutorials with MIT License | 6 votes |
public static void main(String[] args) { String result = null; try { File image = new File("src/main/resources/images/baeldung.png"); Tesseract tesseract = new Tesseract(); tesseract.setLanguage("spa"); tesseract.setPageSegMode(1); tesseract.setOcrEngineMode(1); tesseract.setHocr(true); tesseract.setDatapath("src/main/resources/tessdata"); result = tesseract.doOCR(image, new Rectangle(1200, 200)); } catch (TesseractException e) { e.printStackTrace(); } System.out.println(result); }
Example #4
Source File: Tess4jV1.java From ocr-tess4j-rest with Apache License 2.0 | 6 votes |
@RequestMapping(value = "ocr/v1/convert", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) public Text convertImageToText(@RequestBody final Image image) throws Exception { File tmpFile = File.createTempFile("ocr_image", image.getExtension()); try { FileUtils.writeByteArrayToFile(tmpFile, Base64.decodeBase64(image.getImage())); Tesseract tesseract = new Tesseract(); // JNA Interface Mapping String imageText = tesseract.doOCR(tmpFile); LOGGER.debug("OCR Image Text = " + imageText); return new Text(imageText); } catch (Exception e) { LOGGER.error("Exception while converting/uploading image: ", e); throw new TesseractException(); } finally { tmpFile.delete(); } }
Example #5
Source File: Tess4jV1.java From ocr-tess4j-rest with Apache License 2.0 | 6 votes |
@RequestMapping(value = "ocr/v1/upload", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) public Status doOcr(@RequestBody Image image) throws Exception { try { //FileUtils.writeByteArrayToFile(tmpFile, Base64.decodeBase64(image.getImage())); ByteArrayInputStream bis = new ByteArrayInputStream(Base64.decodeBase64(image.getImage())); Tesseract tesseract = new Tesseract(); // JNA Interface Mapping String imageText = tesseract.doOCR(ImageIO.read(bis)); image.setText(imageText); repository.save(image); LOGGER.debug("OCR Result = " + imageText); } catch (Exception e) { LOGGER.error("TessearctException while converting/uploading image: ", e); throw new TesseractException(); } return new Status("success"); }
Example #6
Source File: TessOcr.java From MillionHero with MIT License | 5 votes |
TessOcr() { instance = new Tesseract(); File tessDataFolder = LoadLibs.extractTessResources("tessdata"); instance.setLanguage("chi_sim"); //Set the tessdata path instance.setDatapath(tessDataFolder.getAbsolutePath()); }
Example #7
Source File: PdfOcrBatchController.java From MyBox with Apache License 2.0 | 5 votes |
@Override public boolean makeActualParameters() { if (!super.makeActualParameters()) { return false; } separator = separatorInput.getText(); if (!separatorCheck.isSelected() || separator == null || separator.isEmpty()) { separator = null; } try { OCRinstance = new Tesseract(); // https://stackoverflow.com/questions/58286373/tess4j-pdf-to-tiff-to-tesseract-warning-invalid-resolution-0-dpi-using-70/58296472#58296472 if (convertRadio.isSelected()) { OCRinstance.setTessVariable("user_defined_dpi", dpi + ""); } else { OCRinstance.setTessVariable("user_defined_dpi", "96"); } OCRinstance.setTessVariable("debug_file", "/dev/null"); String path = AppVariables.getUserConfigValue("TessDataPath", null); if (path != null) { OCRinstance.setDatapath(path); } OCRinstance.setLanguage(selectedLanguages); return true; } catch (Exception e) { logger.error(e.toString()); return false; } }
Example #8
Source File: TessrJExample.java From Java-for-Data-Science with MIT License | 5 votes |
public static void main(String[] args) { ITesseract instance = new Tesseract(); instance.setLanguage("eng"); try { String result; result = instance.doOCR(new File("OCRExample.png")); System.out.println(result); } catch (TesseractException e) { System.err.println(e.getMessage()); } }
Example #9
Source File: Tess4jUtil.java From javautils with Apache License 2.0 | 5 votes |
/** * 从图片中提取文字 * @param path * @param dataPath * @param language * @return */ public static String take(String path, String dataPath, String language){ File imageFile = new File(path); ITesseract instance = new Tesseract(); instance.setDatapath(dataPath); //英文库识别数字比较准确 instance.setLanguage(language); return getOCRText(instance, imageFile); }
Example #10
Source File: ImageUtil.java From JewelCrawler with GNU General Public License v3.0 | 5 votes |
public static void main(String[] args){ try { boolean load = true; load = false; // BufferedImage image = ImageIO.read(new URL("http://www.miitbeian.gov.cn/captcha.jpg")) ; // if(load){ // ImageIO.write(image, "jpg", new File("E:/captcha.jpg") ); // }else{ // image = ImageIO.read(new File("D:\\爬虫测试\\yzm\\111.png")) ; // } BufferedImage image = ImageIO.read(new File("D:\\爬虫测试\\yzm\\11.jpg")) ; // image = ImageUtil.grayFilter(image); image = ImageUtil.binaryFilter(image); image = ImageUtil.lineFilter(image); // image = ImageUtil.lineFilter(image); // image = ImageUtil.line2Filter(image); // image = ImageUtil.point2Filter(image); // image = ImageUtil.lineFilter(image); image = ImageUtil.meanFilter(image); // image = ImageUtil.lineFilter(image); // image = ImageUtil.binaryFilter(image); File imageFile = new File("E:/captcha5.jpg"); // imageFile = new File("E:/test/test.jpg"); ImageIO.write(image, "jpg", imageFile); Tesseract tesseract = Tesseract.getInstance(); tesseract.setLanguage("eng"); String code = tesseract.doOCR(imageFile); System.out.println(code); } catch (Exception e) { e.printStackTrace(); } }
Example #11
Source File: PdfViewController.java From MyBox with Apache License 2.0 | 4 votes |
@FXML public void startOCR() { checkLanguages(); if (imageView.getImage() == null || selectedLanguages == null || selectedLanguages.isEmpty()) { return; } synchronized (this) { if (task != null) { return; } task = new SingletonTask<Void>() { private String result; @Override protected boolean handle() { try { ITesseract instance = new Tesseract(); instance.setTessVariable("user_defined_dpi", "96"); instance.setTessVariable("debug_file", "/dev/null"); String path = AppVariables.getUserConfigValue("TessDataPath", null); if (path != null) { instance.setDatapath(path); } if (selectedLanguages != null) { instance.setLanguage(selectedLanguages); } Image selected = cropImage(); if (selected == null) { selected = imageView.getImage(); } BufferedImage bufferedImage = SwingFXUtils.fromFXImage(selected, null); if (task == null || isCancelled()) { return false; } result = instance.doOCR(bufferedImage); return result != null; } catch (Exception e) { error = e.toString(); return false; } } @Override protected void whenSucceeded() { if (result.length() == 0) { popText(message("OCRMissComments"), 5000, "white", "1.1em", null); } ocrArea.setText(result); resultLabel.setText(MessageFormat.format(message("OCRresults"), result.length(), DateTools.showTime(cost))); orcPage = currentPage; } }; openHandlingStage(task, Modality.WINDOW_MODAL); Thread thread = new Thread(task); thread.setDaemon(true); thread.start(); } }
Example #12
Source File: OCR.java From justtestlah with Apache License 2.0 | 4 votes |
@Autowired public OCR(Tesseract ocr) { this.ocr = ocr; }
Example #13
Source File: VisualConfig.java From justtestlah with Apache License 2.0 | 4 votes |
@Bean public Tesseract tesseract() { Tesseract tess = new Tesseract(); tess.setDatapath(tesseractDataPath); return tess; }
Example #14
Source File: OCRTest.java From justtestlah with Apache License 2.0 | 4 votes |
public void helloWorldTest() throws TesseractException { TakesScreenshot driver = mock(TakesScreenshot.class); target = new OCR(new Tesseract()).withDriver(driver); when(driver.getScreenshotAs(OutputType.FILE)).thenReturn(getPath("helloworld.png")); assertThat(target.getText().trim()).isEqualTo("hello world"); }
Example #15
Source File: TesseractOcr.java From opentest with MIT License | 4 votes |
public TesseractOcr() { this.tess = new Tesseract(); this.tess.setDatapath(System.getProperty("user.dir")); // this.tess.setOcrEngineMode(ITessAPI.TessOcrEngineMode.OEM_CUBE_ONLY); }