Java Code Examples for org.apache.pdfbox.pdfparser.PDFParser#getDocument()

The following examples show how to use org.apache.pdfbox.pdfparser.PDFParser#getDocument() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PDFReader.java    From swcv with MIT License 6 votes vote down vote up
private boolean getFile(String url)
{
    try
    {
        URL u = new URL(url);
        URLConnection con = u.openConnection();
        InputStream in = con.getInputStream();
        PDFParser p = new PDFParser(in);
        p.parse();
        PDDocument pdoc = new PDDocument(p.getDocument());
        PDFTextStripper pts = new PDFTextStripper();
        text = pts.getText(pdoc);
        pdoc.close();

        return true;
    }
    catch (Exception e)
    {
        e.printStackTrace();
        return false;
    }
}
 
Example 2
Source File: ExtractTextTools.java    From o2oa with GNU Affero General Public License v3.0 5 votes vote down vote up
public static String pdf(byte[] bytes) {
	try {
		PDFParser parser = new PDFParser(new RandomAccessBuffer(bytes));
		parser.parse();
		try (COSDocument cos = parser.getDocument(); PDDocument pd = new PDDocument(cos)) {
			PDFTextStripper stripper = new PDFTextStripper();
			stripper.setStartPage(1);
			stripper.setEndPage(pd.getNumberOfPages());
			return stripper.getText(pd);
		}
	} catch (Exception e) {
		logger.error(e);
	}
	return null;
}
 
Example 3
Source File: ExtractTextHelper.java    From o2oa with GNU Affero General Public License v3.0 5 votes vote down vote up
public static String pdf(byte[] bytes) {
	try {
		PDFParser parser = new PDFParser(new RandomAccessBuffer(bytes));
		parser.parse();
		try (COSDocument cos = parser.getDocument(); PDDocument pd = new PDDocument(cos)) {
			PDFTextStripper stripper = new PDFTextStripper();
			stripper.setStartPage(1);
			stripper.setEndPage(pd.getNumberOfPages());
			return stripper.getText(pd);
		}
	} catch (Exception e) {
		logger.error(e);
	}
	return null;
}
 
Example 4
Source File: ExtractTextHelper.java    From o2oa with GNU Affero General Public License v3.0 5 votes vote down vote up
public static String pdf(byte[] bytes) {
	try {
		PDFParser parser = new PDFParser(new RandomAccessBuffer(bytes));
		parser.parse();
		try (COSDocument cos = parser.getDocument(); PDDocument pd = new PDDocument(cos)) {
			PDFTextStripper stripper = new PDFTextStripper();
			stripper.setStartPage(1);
			stripper.setEndPage(pd.getNumberOfPages());
			return stripper.getText(pd);
		}
	} catch (Exception e) {
		logger.error(e);
	}
	return null;
}
 
Example 5
Source File: SignatureOptions.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
private void initFromRandomAccessRead(RandomAccessRead rar) throws IOException
{
    pdfSource = rar;
    PDFParser parser = new PDFParser(pdfSource);
    parser.parse();
    visualSignature = parser.getDocument();
}
 
Example 6
Source File: PDF2TextExample.java    From tutorials with MIT License 5 votes vote down vote up
private static void generateTxtFromPDF(String filename) throws IOException {
	File f = new File(filename);
	String parsedText;
	PDFParser parser = new PDFParser(new RandomAccessFile(f, "r"));
	parser.parse();

	COSDocument cosDoc = parser.getDocument();

	PDFTextStripper pdfStripper = new PDFTextStripper();
	PDDocument pdDoc = new PDDocument(cosDoc);

	parsedText = pdfStripper.getText(pdDoc);

	if (cosDoc != null)
		cosDoc.close();
	if (pdDoc != null)
		pdDoc.close();

	PrintWriter pw = new PrintWriter("src/output/pdf.txt");
	pw.print(parsedText);
	pw.close();
}