Java Code Examples for net.sourceforge.tess4j.Tesseract

The following are top voted examples for showing how to use net.sourceforge.tess4j.Tesseract. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: hadoop-video-ocr   File: HadoopOCR.java   Source Code and License 10 votes vote down vote up
public void map(LongWritable key, Text url, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {

            File videoDownloadDir = Files.createTempDir();
            VGet v = new VGet(new URL(url.toString()), videoDownloadDir);
            v.download();
            System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
            File[] videoFiles = videoDownloadDir.listFiles();
            Arrays.sort(videoFiles);
            File[] videoFramesFiles = VideoProcessing.parseVideo(videoFiles[0], 70);
            File[] processedVideoFrames = VideoProcessing.cutImages(videoFramesFiles);

            Tesseract instance = Tesseract.getInstance();
            instance.setDatapath("/usr/share/tesseract-ocr");
            instance.setTessVariable("LC_NUMERIC", "C");

            for (File image: processedVideoFrames) {
                String result = null;
                try {
                    result = instance.doOCR(image);
                } catch (TesseractException e) {
                    e.printStackTrace();
                }
                if (!result.isEmpty()) {
                    word.set(result);
                    output.collect(url, word);
                }
            }
        }
 
Example 2
Project: maker   File: Tess4Java.java   Source Code and License 7 votes vote down vote up
public static void main(final String[] args) {
  try {
    String _property = System.getProperty("java.io.tmpdir");
    System.out.println(_property);
    String _env = System.getenv("TESSDATA_PREFIX");
    System.out.println(_env);
    Image _image = new Image("d:\\test\\pdf\\test10.png");
    BufferedImage orgin = _image.getAsBufferedImage();
    BufferedImage textImage = ImageHelper.convertImageToGrayscale(orgin);
    int _width = textImage.getWidth();
    int _multiply = (_width * 5);
    int _height = textImage.getHeight();
    int _multiply_1 = (_height * 5);
    BufferedImage _scaledInstance = ImageHelper.getScaledInstance(textImage, _multiply, _multiply_1);
    textImage = _scaledInstance;
    Tesseract instance = Tesseract.getInstance();
    instance.setLanguage("chi_sim");
    System.out.println("instance done");
    String result = instance.doOCR(textImage);
    System.out.println(result);
  } catch (Throwable _e) {
    throw Exceptions.sneakyThrow(_e);
  }
}
 
Example 3
Project: chart-recognition-library   File: OCRReader.java   Source Code and License 6 votes vote down vote up
/**
 * Zooms the text image to make it easier to read
 * */
public static String recognizeText(Image image) {
	LibraryLoaderSingleton.getInstance();
	Image scaledImage = image.scale(8);
	Tesseract instance = Tesseract.getInstance(); // JNA Interface Mapping
	instance.setLanguage("eng");
	System.setProperty("jna.encoding", "UTF8");
	instance.setOcrEngineMode(TessAPI.TessOcrEngineMode.OEM_DEFAULT);
	try {
		String result = instance.doOCR(scaledImage.getInnerImage());
		return result;
	} catch (TesseractException e) {
	throw new IllegalStateException(e);
	}
	catch(Exception ex){
		throw new IllegalStateException("An error during text recognition was encountered.");
	}
	
}
 
Example 4
Project: chart-recognition-library   File: OCRReader.java   Source Code and License 6 votes vote down vote up
public static String recognizeYText(Image image) {
	LibraryLoaderSingleton.getInstance();
	Image scaledImage = image.scale(8);
	Tesseract instance = Tesseract.getInstance(); // JNA Interface Mapping
	instance.setLanguage("eng");
	System.setProperty("jna.encoding", "UTF8");
	instance.setOcrEngineMode(TessAPI.TessOcrEngineMode.OEM_DEFAULT);
	try {
		String result = instance.doOCR(scaledImage.getInnerImage());
		return result;
	} catch (TesseractException e) {
		throw new IllegalStateException(e);
	}
	catch(Exception ex){
		throw new IllegalStateException("An error during text recognition was encountered.");
	}
	
}
 
Example 5
Project: Machine-Learning-End-to-Endguide-for-Java-developers   File: TessrJExample.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) {
    ITesseract instance = new Tesseract();
    instance.setLanguage("eng");
    try {
        String result;
        result = instance.doOCR(new File("OCRExample.png"));
        System.out.println(result);
    } catch (TesseractException e) {
        System.err.println(e.getMessage());
    }
}
 
Example 6
Project: Java-for-Data-Science   File: TessrJExample.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) {
    ITesseract instance = new Tesseract();
    instance.setLanguage("eng");
    try {
        String result;
        result = instance.doOCR(new File("OCRExample.png"));
        System.out.println(result);
    } catch (TesseractException e) {
        System.err.println(e.getMessage());
    }
}
 
Example 7
Project: JewelCrawler   File: ImageUtil.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args){
		try {
			boolean load = true;
			load = false;
//			BufferedImage image = ImageIO.read(new URL("http://www.miitbeian.gov.cn/captcha.jpg")) ;
//			if(load){
//				ImageIO.write(image, "jpg", new File("E:/captcha.jpg") );
//			}else{
//				image = ImageIO.read(new File("D:\\爬虫测试\\yzm\\111.png")) ;
//			}
			BufferedImage image = ImageIO.read(new File("D:\\爬虫测试\\yzm\\11.jpg")) ;
//			image = ImageUtil.grayFilter(image);
			image = ImageUtil.binaryFilter(image);
			image = ImageUtil.lineFilter(image);
//			image = ImageUtil.lineFilter(image);
//			image = ImageUtil.line2Filter(image);
//			image = ImageUtil.point2Filter(image);
//			image = ImageUtil.lineFilter(image);
			image = ImageUtil.meanFilter(image);
//			image = ImageUtil.lineFilter(image);
//			image = ImageUtil.binaryFilter(image);
			
			
			File imageFile = new File("E:/captcha5.jpg");
//			imageFile = new File("E:/test/test.jpg");
			
			ImageIO.write(image, "jpg", imageFile);
			
			Tesseract tesseract = Tesseract.getInstance();
			tesseract.setLanguage("eng");
			String code = tesseract.doOCR(imageFile);

			System.out.println(code);
			
		} catch (Exception e) {
			e.printStackTrace();
		}

	}
 
Example 8
Project: OCR-libraries   File: TesseractDetection.java   Source Code and License 5 votes vote down vote up
@Override
public String detect(String filePath) {
    File imageFile = new File(filePath);
    Tesseract tess = new Tesseract();

    tess.setLanguage("hun");

    try {
        String result = tess.doOCR(imageFile);
        return result;
    } catch (TesseractException e) {
        return "ERROR";
    }
}
 
Example 9
Project: WordamentPlayer   File: BoardConstructor.java   Source Code and License 5 votes vote down vote up
public static char[][] getConvertedBoard(BufferedImage[] tiles) throws TesseractException {
	Tesseract reader = new Tesseract();
	char[][] ret = new char[Player.BOARD_HEIGHT][Player.BOARD_WIDTH];
	BufferedImage processedImage;
	String convertedTile;
	for(int j = 0; j < Player.BOARD_HEIGHT; j++) {
		for(int k = 0; k < Player.BOARD_WIDTH; k++) {
			processedImage = ImageHelper.convertImageToGrayscale(tiles[j*Player.BOARD_HEIGHT+k]);
			convertedTile = reader.doOCR(processedImage);
			ret[j][k] = BoardConstructor.getLastAlpha(convertedTile);
		}
	}
	return ret;
}
 
Example 10
Project: project-bianca   File: TesseractOCR.java   Source Code and License 5 votes vote down vote up
public String OCR(SerializableImage image) {
	try {
		String hh = Tesseract.getInstance().doOCR(image.getImage());
		// System.out.println(hh);
		log.info("Read: " + hh);
		return hh;
	} catch (TesseractException e) {
		e.printStackTrace();
	}
	return null;

}
 
Example 11
Project: tesseract-ocr-demo   File: App.java   Source Code and License 5 votes vote down vote up
public static String ocr(File file) {
    Tesseract instance = Tesseract.getInstance();  // JNA Interface Mapping
    instance.setDatapath(tessdataPath);
    instance.setLanguage("eng");
    //instance.setLanguage("number");
    String result = "";
    try {
        result = instance.doOCR(file);
    } catch (TesseractException e) {
        System.err.println(e.getMessage());
    } finally {

    }
    return result;
}
 
Example 12
Project: myrobotlab   File: TesseractOcr.java   Source Code and License 5 votes vote down vote up
public String ocr(BufferedImage image) {
  try {
    String hh = Tesseract.getInstance().doOCR(image);
    // System.out.println(hh);
    log.info("Read: " + hh);
    return hh;
  } catch (TesseractException e) {
    e.printStackTrace();
  }
  return null;
}
 
Example 13
Project: chart-recognition-library   File: OCRReader.java   Source Code and License 5 votes vote down vote up
public static String recognizeXText(Image image) {
	LibraryLoaderSingleton.getInstance();
	Tesseract instance = Tesseract.getInstance(); // JNA Interface Mapping
	instance.setOcrEngineMode(TessAPI.TessOcrEngineMode.OEM_TESSERACT_ONLY);
	BufferedImage img = getScaledImage(image.getInnerImage(), image.getInnerImage().getWidth()*2, image.getInnerImage().getHeight()*2);  
	img = thresholdImage(img, 165);
	
	try {
		String result = instance.doOCR(img);
		return result;
	} catch (TesseractException e) {
		throw new IllegalStateException(e);
	}
	catch(Exception ex){
		throw new IllegalStateException("An error during text recognition was encountered.");
	}
	
}
 
Example 14
Project: HearthStats.net-Uploader   File: OcrBase.java   Source Code and License 5 votes vote down vote up
/**
 * Perform the actual OCR using Tesseract.
 *
 * @param image An image to be processed by OCR. Should be cropped and filtered to ensure the contrast is sufficient.
 * @return The text that was recognised in the image
 */
protected String performOcr(BufferedImage image, int iteration) throws OcrException {
    try {
        Tesseract instance = Tesseract.getInstance();
        instance.setPageSegMode(getTesseractPageSegMode(iteration));
        String output = instance.doOCR(image);
        return output.trim();
    } catch (Exception e) {
        throw new OcrException("Error performing OCR", e);
    }
}
 
Example 15
Project: ocr-tess4j-rest   File: Tess4jV1.java   Source Code and License 5 votes vote down vote up
@RequestMapping(value = "ocr/v0.9/upload", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
public Status doOcrFile(@RequestBody final Image image) throws Exception {
    File tmpFile = File.createTempFile("ocr_image", image.getExtension());
    try {
        FileUtils.writeByteArrayToFile(tmpFile, Base64.decodeBase64(image.getImage()));
        Tesseract tesseract = Tesseract.getInstance(); // JNA Interface Mapping
        String imageText = tesseract.doOCR(tmpFile);
        LOGGER.debug("OCR Image Text = " + imageText);
    } catch (Exception e) {
        LOGGER.error("Exception while converting/uploading image: ", e);
        throw new TesseractException();
    } finally {
        tmpFile.delete();
    }
    return new Status("success");
}
 
Example 16
Project: ocr-tess4j-rest   File: Tess4jV1.java   Source Code and License 5 votes vote down vote up
@RequestMapping(value = "ocr/v1/upload", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
public Status doOcr(@RequestBody Image image) throws Exception {
    try {
        //FileUtils.writeByteArrayToFile(tmpFile, Base64.decodeBase64(image.getImage()));
        ByteArrayInputStream bis = new ByteArrayInputStream(Base64.decodeBase64(image.getImage()));
        Tesseract tesseract = Tesseract.getInstance(); // JNA Interface Mapping
        String imageText = tesseract.doOCR(ImageIO.read(bis));
        image.setText(imageText);
        repository.save(image);
        LOGGER.debug("OCR Result = " + imageText);
    } catch (Exception e) {
        LOGGER.error("TessearctException while converting/uploading image: ", e);
        throw new TesseractException();
    }

    return new Status("success");
}
 
Example 17
Project: Java-Data-Science-Made-Easy   File: TessrJExample.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) {
    ITesseract instance = new Tesseract();
    instance.setLanguage("eng");
    try {
        String result;
        result = instance.doOCR(new File("OCRExample.png"));
        System.out.println(result);
    } catch (TesseractException e) {
        System.err.println(e.getMessage());
    }
}
 
Example 18
Project: BiliLiveLib   File: OCRUtil.java   Source Code and License 4 votes vote down vote up
public OCRUtil() {
    tesseract = new Tesseract();
    tesseract.setLanguage("captcha");
    tesseract.setPageSegMode(SINGLE_LINE_MODE);
}
 
Example 19
Project: WebcamParser   File: Reader.java   Source Code and License 4 votes vote down vote up
public static void parse(BufferedImage image) throws Exception {
	Tesseract tesseract = Tesseract.getInstance();
	tesseract.setLanguage(language);
	
	// ImageIO.write(image, "PNG", new File("img/" + System.currentTimeMillis() + ".png"));
	
	String text = tesseract.doOCR(image).trim();
	
	text = text.replaceAll("[^\\w\n.,;!?\'\":»«„”\\(\\) ]", "");
	
	System.out.println(text);
	
	read(text);
}
 
Example 20
Project: opentest   File: TesseractOcr.java   Source Code and License 3 votes vote down vote up
public TesseractOcr() {
        this.tess = new Tesseract();
        this.tess.setDatapath(System.getProperty("user.dir"));
//        this.tess.setOcrEngineMode(ITessAPI.TessOcrEngineMode.OEM_CUBE_ONLY);
    }
 
Example 21
Project: esct-messenger-bot   File: ImageParser.java   Source Code and License 3 votes vote down vote up
public static String fileToString(File imageFile) throws TesseractException {
    ITesseract instance = new Tesseract();
    instance.setDatapath("");
    instance.setLanguage("eng");


    return instance.doOCR(imageFile);


}