Java Code Examples for org.apache.pdfbox.pdfparser.PDFParser

The following are top voted examples for showing how to use org.apache.pdfbox.pdfparser.PDFParser. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: filterit   File: FileObject.java   Source Code and License 11 votes vote down vote up
private void readPDFDocument() {
	try {
		FileInputStream fs = new FileInputStream(f);
		String text = "";
		PDFParser parser = new PDFParser(fs);
		parser.parse();
		COSDocument cosDoc = parser.getDocument();
		PDFTextStripper pdfStripper = new PDFTextStripper();
		PDDocument pdDoc = new PDDocument(cosDoc);
		text = pdfStripper.getText(pdDoc);
		String[] docxLines = text.split(System.lineSeparator());
		for (String line : docxLines) {
			lines.add(line);
		}
		fs.close();
	} catch (Exception e) {
		JOptionPane.showMessageDialog(null, "Fehler in readPDFDocument",
				"Fehler", JOptionPane.ERROR_MESSAGE);
		e.printStackTrace();
	}
}
 
Example 2
Project: OSCAR-ConCert   File: SplitDocumentAction.java   Source Code and License 7 votes vote down vote up
public ActionForward rotate180(ActionMapping mapping, ActionForm form, HttpServletRequest request, HttpServletResponse response) throws Exception {
	Document doc = documentDao.getDocument(request.getParameter("document"));

	String docdownload = oscar.OscarProperties.getInstance().getProperty("DOCUMENT_DIR");

	FileInputStream input = new FileInputStream(docdownload + doc.getDocfilename());
	PDFParser parser = new PDFParser(input);
	parser.parse();
	PDDocument pdf = parser.getPDDocument();
	int x = 1;
	for (Object p : pdf.getDocumentCatalog().getAllPages()) {
		PDPage pg = (PDPage)p;
		Integer r = (pg.getRotation() != null ? pg.getRotation() : 0);
		pg.setRotation((r+180)%360);

		ManageDocumentAction.deleteCacheVersion(doc, x);
		x++;
	}

	pdf.save(docdownload + doc.getDocfilename());
	pdf.close();

	input.close();

	return null;
}
 
Example 3
Project: OSCAR-ConCert   File: SplitDocumentAction.java   Source Code and License 7 votes vote down vote up
public ActionForward rotate90(ActionMapping mapping, ActionForm form, HttpServletRequest request, HttpServletResponse response) throws Exception {
	Document doc = documentDao.getDocument(request.getParameter("document"));

	String docdownload = oscar.OscarProperties.getInstance().getProperty("DOCUMENT_DIR");

	FileInputStream input = new FileInputStream(docdownload + doc.getDocfilename());
	PDFParser parser = new PDFParser(input);
	parser.parse();
	PDDocument pdf = parser.getPDDocument();
	int x = 1;
	for (Object p : pdf.getDocumentCatalog().getAllPages()) {
		PDPage pg = (PDPage)p;
		Integer r = (pg.getRotation() != null ? pg.getRotation() : 0);
		pg.setRotation((r+90)%360);

		ManageDocumentAction.deleteCacheVersion(doc, x);
		x++;
	}

	pdf.save(docdownload + doc.getDocfilename());
	pdf.close();

	input.close();

	return null;
}
 
Example 4
Project: arks-api   File: PDFTextParser.java   Source Code and License 7 votes vote down vote up
static String pdftoText(String fileName,int pageno) throws IOException, CryptographyException 
{
		
		File file = new File(fileName);
		
		
			PDFParser parser = new PDFParser(new FileInputStream(file));
			parser.parse();
			COSDocument cosDoc = parser.getDocument();
			PDFTextStripper pdfStripper = new PDFTextStripper();
			PDDocument pdDoc = new PDDocument(cosDoc);
			/*pdDoc.decrypt("");
			pdDoc.setAllSecurityToBeRemoved(true);*/
			pdfStripper.setStartPage(pageno);
			pdfStripper.setEndPage(pageno);
			String parsedText = pdfStripper.getText(pdDoc);
	
		
				if (cosDoc != null)
					cosDoc.close();
				if (pdDoc != null)
					pdDoc.close();
		
		return parsedText;
	}
 
Example 5
Project: trello-to-markdown   File: PdfTest.java   Source Code and License 7 votes vote down vote up
@Test
public void testPdfFromStringTo() throws Exception {

    // GIVEN a html template containing special characters that java stores in utf-16 internally
    Pdf pdf = new Pdf();
    pdf.addPage("<html><head><meta charset=\"utf-8\"></head><h1>Müller</h1></html>", PageType.htmlAsString);

    // WHEN
    byte[] pdfBytes = pdf.getPDF();

    PDFParser parser = new PDFParser(new ByteArrayInputStream(pdfBytes));

    // that is a valid PDF (otherwise an IOException occurs)
    parser.parse();
    PDFTextStripper pdfTextStripper = new PDFTextStripper();
    String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument()));

    Assert.assertThat("document should contain the creditorName", pdfText, containsString("Müller"));
}
 
Example 6
Project: trello-to-markdown   File: PdfTest.java   Source Code and License 7 votes vote down vote up
@Test
public void testMultiplePages() throws Exception {
    Pdf pdf = new Pdf();
    pdf.addPage("<html><head><meta charset=\"utf-8\"></head><h1>Page 1</h1></html>", PageType.htmlAsString);
    pdf.addPage("<html><head><meta charset=\"utf-8\"></head><h1>Page 2</h1></html>", PageType.htmlAsString);
    pdf.addPage("http://www.google.com", PageType.url);
    pdf.addPage("<html><head><meta charset=\"utf-8\"></head><h1>Page 4</h1></html>", PageType.htmlAsString);

    // WHEN
    byte[] pdfBytes = pdf.getPDF();

    PDFParser parser = new PDFParser(new ByteArrayInputStream(pdfBytes));

    // that is a valid PDF (otherwise an IOException occurs)
    parser.parse();
    PDFTextStripper pdfTextStripper = new PDFTextStripper();
    String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument()));

    Assert.assertThat("document should contain the fourth page name", pdfText, containsString("Page 4"));
}
 
Example 7
Project: JavadocOfflineSearch   File: PdfAttempter.java   Source Code and License 7 votes vote down vote up
public  String pdftoText(InputStream is, boolean stats) throws IOException {
    PDDocument pdDoc = null;
    COSDocument cosDoc = null;
    try {
        PDFParser parser = new PDFParser(is);
        parser.parse();
        cosDoc = parser.getDocument();
        PDFTextStripper pdfStripper = new PDFTextStripper();
        pdDoc = new PDDocument(cosDoc);
        String text = pdfStripper.getText(pdDoc);
        if (stats) {
            vc.addAll(text);
        }
        return text;
    } finally {
        if (cosDoc != null) {
            cosDoc.close();
        }
        if (pdDoc != null) {
            pdDoc.close();
        }
    }
}
 
Example 8
Project: cms   File: PdfTest.java   Source Code and License 6 votes vote down vote up
@Test
public void testPdfFromStringTo() throws Exception {

    // GIVEN an html template containing special characters that java stores in utf-16 internally
    Pdf pdf = pdfBuilder.build();
    pdf.addPage("<html><head><meta charset=\"utf-8\"></head><h1>Müller</h1></html>", PageType.htmlAsString);

    String tempFolder = temporaryFolder.newFolder().getPath();
    pdf.saveAs(tempFolder+"/output.pdf");

    // WHEN
    byte[] pdfBytes = pdf.getPDF();

    PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(new ByteArrayInputStream(pdfBytes)));

    // that is a valid PDF (otherwise an IOException occurs)
    parser.parse();
    PDFTextStripper pdfTextStripper = new PDFTextStripper();
    String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument()));

    assertThat("document should contain the creditorName", pdfText, containsString("Müller"));
}
 
Example 9
Project: trello-to-markdown   File: XvfbTest.java   Source Code and License 6 votes vote down vote up
@Test
public void testPdfWithXvfb() throws Exception {

    XvfbConfig xc = new XvfbConfig();
    xc.addParams(new Param("--auto-servernum"), new Param("--server-num=1"));

    WrapperConfig wc = new WrapperConfig();
    wc.setXvfbConfig(xc);

    Pdf pdf = new Pdf(wc);
    pdf.addPage("http://www.google.com", PageType.url);

    pdf.saveAs("output.pdf");

    // WHEN
    byte[] pdfBytes = pdf.getPDF();

    PDFParser parser = new PDFParser(new ByteArrayInputStream(pdfBytes));

    // that is a valid PDF (otherwise an IOException occurs)
    parser.parse();
    PDFTextStripper pdfTextStripper = new PDFTextStripper();
    String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument()));

    Assert.assertThat("document should be generated", pdfText, containsString("Google"));
}
 
Example 10
Project: MedicamentDB   File: InteractionService.java   Source Code and License 6 votes vote down vote up
private List<SubstanceInteraction> readThesaurus(File dir) throws IOException {
    File file = new File(dir, "thesaurus.pdf");

    PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(file));
    parser.parse();
    COSDocument cosDoc = parser.getDocument();
    ThesaurusPDFStripper pdfStripper = new ThesaurusPDFStripper();
    PDDocument pdDoc = new PDDocument(cosDoc);
    pdfStripper.setStartPage(2);
    pdfStripper.setEndPage(pdDoc.getNumberOfPages());

    pdfStripper.getText(pdDoc);

    cosDoc.close();

    return pdfStripper.substances;
}
 
Example 11
Project: MedicamentDB   File: PDFTest.java   Source Code and License 6 votes vote down vote up
@Test
public void parse() throws IOException {
    File file = new File(MedicamentTest.class.getClassLoader().getResource(".").getFile(), "thesaurus.pdf");

    PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(file));
    parser.parse();
    COSDocument cosDoc = parser.getDocument();
    AdvancedPDFStripper pdfStripper = new AdvancedPDFStripper();
    PDDocument pdDoc = new PDDocument(cosDoc);
    pdfStripper.setStartPage(2);
    pdfStripper.setEndPage(pdDoc.getNumberOfPages());

    pdfStripper.getText(pdDoc);

    System.out.println(pdfStripper.substances);
}
 
Example 12
Project: java-wkhtmltopdf-wrapper   File: PdfTest.java   Source Code and License 6 votes vote down vote up
@Test
public void testPdfFromStringTo() throws Exception {

    // GIVEN a html template containing special characters that java stores in utf-16 internally
    Pdf pdf = new Pdf();
    pdf.addPageFromString("<html><head><meta charset=\"utf-8\"></head><h1>Müller</h1></html>");

    // WHEN
    byte[] pdfBytes = pdf.getPDF();

    PDFParser parser = new PDFParser(new ByteArrayInputStream(pdfBytes));

    // that is a valid PDF (otherwise an IOException occurs)
    parser.parse();
    PDFTextStripper pdfTextStripper = new PDFTextStripper();
    String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument()));

    Assert.assertThat("document should contain the creditorName", pdfText, containsString("Müller"));
}
 
Example 13
Project: java-wkhtmltopdf-wrapper   File: PdfTest.java   Source Code and License 6 votes vote down vote up
@Test
public void testMultiplePages() throws Exception {
    Pdf pdf = new Pdf();
    pdf.addPageFromString("<html><head><meta charset=\"utf-8\"></head><h1>Page 1</h1></html>");
    pdf.addPageFromString("<html><head><meta charset=\"utf-8\"></head><h1>Page 2</h1></html>");
    pdf.addPageFromUrl("http://www.google.com");
    pdf.addPageFromString("<html><head><meta charset=\"utf-8\"></head><h1>Page 4</h1></html>");

    // WHEN
    byte[] pdfBytes = pdf.getPDF();

    PDFParser parser = new PDFParser(new ByteArrayInputStream(pdfBytes));

    // that is a valid PDF (otherwise an IOException occurs)
    parser.parse();
    PDFTextStripper pdfTextStripper = new PDFTextStripper();
    String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument()));

    Assert.assertThat("document should contain the fourth page name", pdfText, containsString("Page 4"));
}
 
Example 14
Project: java-wkhtmltopdf-wrapper   File: XvfbTest.java   Source Code and License 6 votes vote down vote up
@Test
public void testPdfWithXvfb() throws Exception {

    XvfbConfig xc = new XvfbConfig();
    xc.addParams(new Param("--auto-servernum"), new Param("--server-num=1"));

    WrapperConfig wc = new WrapperConfig();
    wc.setXvfbConfig(xc);

    Pdf pdf = new Pdf(wc);
    pdf.addPage("http://www.google.com", PageType.url);

    pdf.saveAs("output.pdf");

    // WHEN
    byte[] pdfBytes = pdf.getPDF();

    PDFParser parser = new PDFParser(new ByteArrayInputStream(pdfBytes));

    // that is a valid PDF (otherwise an IOException occurs)
    parser.parse();
    PDFTextStripper pdfTextStripper = new PDFTextStripper();
    String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument()));

    Assert.assertThat("document should be generated", pdfText, containsString("Google"));
}
 
Example 15
Project: swcv   File: PDFReader.java   Source Code and License 6 votes vote down vote up
private boolean getFile(String url)
{
    try
    {
        URL u = new URL(url);
        URLConnection con = u.openConnection();
        InputStream in = con.getInputStream();
        PDFParser p = new PDFParser(in);
        p.parse();
        PDDocument pdoc = new PDDocument(p.getDocument());
        PDFTextStripper pts = new PDFTextStripper();
        text = pts.getText(pdoc);
        pdoc.close();

        return true;
    }
    catch (Exception e)
    {
        e.printStackTrace();
        return false;
    }
}
 
Example 16
Project: shrink-pdf   File: ShrinkPDF.java   Source Code and License 6 votes vote down vote up
/**
 * Shrink a PDF
 * @param f {@code File} pointing to the PDF to shrink
 * @param compQual Compression quality parameter. 0 is
 *                 smallest file, 1 is highest quality.
 * @return The compressed {@code PDDocument}
 * @throws FileNotFoundException
 * @throws IOException 
 */
private PDDocument shrinkMe() 
        throws FileNotFoundException, IOException {
     if(compQual < 0)
         compQual = compQualDefault;
     final FileInputStream fis = new FileInputStream(input);
     final PDFParser parser = new PDFParser(fis);
     parser.parse();
     final PDDocument doc = parser.getPDDocument();
     List pages = doc.getDocumentCatalog().getAllPages();
     for(Object p : pages) {
          if(!(p instanceof PDPage))
               continue;
          PDPage page = (PDPage) p;
          scanResources(page.getResources(), doc);
     }
     return doc;
}
 
Example 17
Project: oscar-old   File: SplitDocumentAction.java   Source Code and License 6 votes vote down vote up
public ActionForward rotate90(ActionMapping mapping, ActionForm form, HttpServletRequest request, HttpServletResponse response) throws Exception {
	Document doc = documentDAO.getDocument(request.getParameter("document"));

	FileInputStream input = new FileInputStream(EDocUtil.getDocumentPath(doc.getDocfilename()));
	PDFParser parser = new PDFParser(input);
	parser.parse();
	PDDocument pdf = parser.getPDDocument();
	int x = 1;
	for (Object p : pdf.getDocumentCatalog().getAllPages()) {
		PDPage pg = (PDPage)p;
		Integer r = (pg.getRotation() != null ? pg.getRotation() : 0);
		pg.setRotation((r+90)%360);

		ManageDocumentAction.deleteCacheVersion(doc, x);
		x++;
	}

	pdf.save(EDocUtil.getDocumentPath(doc.getDocfilename()));
	pdf.close();

	input.close();

	return null;

}
 
Example 18
Project: flint   File: PDFBoxWrapper.java   Source Code and License 6 votes vote down vote up
/**
 * Loads and saves a PDF
 * @param pFile PDF file to load
 * @return whether the file loads and saves successfully or not
 */
public static boolean loadSavePDF(File pFile) {
	boolean ret = false;

	try {

		// Note that this test passes files that fail to open in Acrobat
		// The files are saved with the same errors as the original
		// i.e. this is not an effective test for validity

		PDFParser parser = new PDFParser(new FileInputStream(pFile));
		parser.parse();
		File temp = File.createTempFile("flint-temp-"+pFile.getName()+"-", ".pdf");
		parser.getPDDocument().save(temp);
		parser.getDocument().close();
		temp.deleteOnExit();
		ret = true;
	} catch (Exception e) {
		e.printStackTrace();
		// See comments in https://issues.apache.org/jira/browse/PDFBOX-1757
		// PDFBox state that these files have errors and their parser is correct
		// The only way to find out that the parser doesn't like it is to catch
		// a general Exception.
	}
	return ret;
}
 
Example 19
Project: DocBleach   File: PdfBleach.java   Source Code and License 5 votes vote down vote up
@SuppressFBWarnings(value = "EXS_EXCEPTION_SOFTENING_RETURN_FALSE", justification = "This method is an helper to check the password")
private PDDocument testPassword(ScratchFile inFile, RandomAccessRead source, String password) throws IOException {
    PDFParser parser = new PDFParser(source, password, inFile);
    try {
        parser.parse();
        return parser.getPDDocument();
    } catch (InvalidPasswordException e) {
        LOGGER.error("The tested password is invalid");
        return null;
    } finally {
        rewind(source);
    }
}
 
Example 20
Project: arks-api   File: PageByPageSearch.java   Source Code and License 5 votes vote down vote up
public List<Container> findpages(String path, String searchKeyword) throws IOException 
{
	// TODO Auto-generated method stub
	
	int i; // page no.
	
	File file = new File(path);
	PDFParser parser = new PDFParser(new FileInputStream(file));
	parser.parse();
	
       COSDocument cosDoc = parser.getDocument();
       PDFTextStripper reader = new PDFTextStripper();
       PDDocument doc = new PDDocument(cosDoc);
       
       List<Container> list = new ArrayList<Container>();
       
	for(i=0;i<=doc.getNumberOfPages();i++)
	{
		reader.setStartPage(i);
		reader.setEndPage(i);
		
		if(reader.getText(doc).contains(searchKeyword))
		{
			Container container = new Container();
			container.setContent(reader.getText(doc));
			container.setFilepath(path);
			container.setPageno(i);
			list.add(container);
		}
		
	}
	
	return list;
	
}
 
Example 21
Project: oscar-old   File: SplitDocumentAction.java   Source Code and License 5 votes vote down vote up
public ActionForward removeFirstPage(ActionMapping mapping, ActionForm form, HttpServletRequest request, HttpServletResponse response) throws Exception {
		Document doc = documentDAO.getDocument(request.getParameter("document"));

//		String docdownload = oscar.OscarProperties.getInstance().getProperty("DOCUMENT_DIR");
		String docdownload = EDocUtil.getDocumentPath(doc.getDocfilename());

//		FileInputStream input = new FileInputStream(docdownload + doc.getDocfilename());
		FileInputStream input = new FileInputStream(docdownload);
		PDFParser parser = new PDFParser(input);
		parser.parse();
		PDDocument pdf = parser.getPDDocument();

		// Documents must have at least 2 pages, for the first page to be removed.
		if (pdf.getNumberOfPages() <= 1) { return null; }

		int x = 1;
		for (Object p : pdf.getDocumentCatalog().getAllPages()) {
			ManageDocumentAction.deleteCacheVersion(doc, x);
			x++;
		}

		pdf.removePage(0);


		EDocUtil.subtractOnePage(request.getParameter("document"));

//		pdf.save(docdownload + doc.getDocfilename());
		System.gc(); //avoid Win channel lock problem
		pdf.save(docdownload);
		pdf.close();

		input.close();

		return null;
	}
 
Example 22
Project: brigen-base   File: PDFBoxDelegaterImpl.java   Source Code and License 5 votes vote down vote up
private static void check() {
    try {
        Class.forName(PDFParser.class.getName());
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    }
}
 
Example 23
Project: brigen-base   File: PDFBoxDelegaterImpl.java   Source Code and License 5 votes vote down vote up
@Override
public boolean isPdf(InputStream inputStream) {
    boolean ret = false;
    try (InputStream is = inputStream) {
        new PDFParser(is).parse();
        ret = true;
    } catch (IOException e) {
    }
    return ret;
}
 
Example 24
Project: arks-api   File: PageByPageSearch.java   Source Code and License 4 votes vote down vote up
public List<SearchResult> findpages(String path,
        List<String> searchKeywordList, int fileCounter) throws IOException {

    int i; // page no.
    boolean hasKeywords = false;

    PDDocument finalDocument = new PDDocument();
    List<PDPage> pageList = new ArrayList<PDPage>();

    File file = new File(path);
    PDFParser parser = new PDFParser(new RandomAccessBuffer(
            new FileInputStream(file)));
    parser.parse();

    COSDocument cosDoc = parser.getDocument();
    PDFTextStripper reader = new PDFTextStripper();
    PDDocument doc = new PDDocument(cosDoc);

    List<SearchResult> list = new ArrayList<SearchResult>();

    for (i = 0; i <= doc.getNumberOfPages(); i++) {
        reader.setStartPage(i);
        reader.setEndPage(i);
        hasKeywords = true;

        for (String keyword : searchKeywordList) {

            if (!reader.getText(doc).toLowerCase()
                    .contains(keyword.toLowerCase())) {
                hasKeywords = false;
                break;
            }
        }

        if (hasKeywords) {

            if (falseCounter > 1) {
                SearchResult result = new PageResult();
                result.setFileContent(reader.getText(doc));
                result.setFilePath(path);
                result.setPageNumber(i);
                list.add(result);
                pageList.add(doc.getPage(i));
            }

            falseCounter++;
        }

    }

    for (PDPage page : pageList) {
        finalDocument.addPage(page);
    }

    finalDocument
            .save(ConfigCBSI.getResultPdfPath() + fileCounter + ".pdf");
    finalDocument.close();
    logger.info("Result Saved");

    return list;

}
 
Example 25
Project: arks-api   File: WordCount.java   Source Code and License 4 votes vote down vote up
public static Map<String, Integer> buildWordMap(String fileName) 
  {
      
      Map<String, Integer> wordMap = new HashMap<String,Integer>();
try 
{
	File file = new File(fileName);
	PDFParser parser = new PDFParser(new FileInputStream(file));
	parser.parse();
	
       COSDocument cosDoc = parser.getDocument();
       PDFTextStripper reader = new PDFTextStripper();
       PDDocument doc = new PDDocument(cosDoc);
       
	StringBuilder sb = new StringBuilder();
	for (int i = 0; i < doc.getNumberOfPages(); i++)
	{
		reader.setStartPage(i);
		reader.setEndPage(i);
		sb.append(reader.getText(doc));
	}
	String pdffulltext = sb.toString();
	String processedtext = pdffulltext.replaceAll("\\p{Punct}|\\d", "").toLowerCase();
	String[] words = processedtext.split(" ");
	for (String word : words) 
	{
              if (wordMap.containsKey(word))
              {
                  wordMap.put(word, (wordMap.get(word) + 1));
              }
              else
              {
                  wordMap.put(word, 1);
              }
          }
	
} 
catch (IOException e)
{
	// TODO Auto-generated catch block
	e.printStackTrace();
}
      return wordMap;
  }
 
Example 26
Project: oscar-old   File: SplitDocumentAction.java   Source Code and License 4 votes vote down vote up
public ActionForward rotate180(ActionMapping mapping, ActionForm form, HttpServletRequest request, HttpServletResponse response) throws Exception {
		Document doc = documentDAO.getDocument(request.getParameter("document"));

//		String docdownload = oscar.OscarProperties.getInstance().getProperty("DOCUMENT_DIR");
		String docdownload = EDocUtil.getDocumentPath(doc.getDocfilename());

		if(doc.getContenttype().equals("application/pdf")) {
			FileInputStream input = null;
			PDDocument pdf = null;
			try {
//		FileInputStream input = new FileInputStream(docdownload + doc.getDocfilename());
		input = new FileInputStream(docdownload);
		PDFParser parser = new PDFParser(input);
		parser.parse();
		pdf = parser.getPDDocument();
		int x = 1;
		for (Object p : pdf.getDocumentCatalog().getAllPages()) {
			PDPage pg = (PDPage)p;
			Integer r = (pg.getRotation() != null ? pg.getRotation() : 0);
			pg.setRotation((r+180)%360);

			ManageDocumentAction.deleteCacheVersion(doc, x);
			x++;
		}

//		pdf.save(docdownload + doc.getDocfilename());
		pdf.save(docdownload);

			} finally {
				if(pdf != null) pdf.close();
				input.close();
			}

		} else if(doc.getContenttype().equals("image/jpg")
				|| doc.getContenttype().equals("image/png")
				|| doc.getContenttype().equals("image/gif")){
	        String documentDir = EDocUtil.getDocumentDir(doc.getDocfilename());
			File file = new File(documentDir + doc.getDocfilename());
			BufferedImage image = ImageIO.read(file);
			if(image == null) return null;
			 BufferedImage rotatedImage = new BufferedImage(image.getHeight(), image.getWidth(), BufferedImage.TYPE_INT_ARGB);

			String suffix = null;
			String contentType = doc.getContenttype();
			if(contentType.equalsIgnoreCase("image/jpg") || contentType.equalsIgnoreCase("image/jpeg")) {
				suffix = "jpg";
			} else if(contentType.equalsIgnoreCase("image/png")) {
				suffix = "png";
			} else if(contentType.equalsIgnoreCase("image/gif")) {
				suffix = "gif";
			}
			AffineTransform tx = AffineTransform.getScaleInstance(1, -1);
			tx = AffineTransform.getScaleInstance(-1, -1);
			tx.translate(-image.getWidth(null), -image.getHeight(null));
			AffineTransformOp op = new AffineTransformOp(tx, AffineTransformOp.TYPE_NEAREST_NEIGHBOR);
			image = op.filter(image, null);
			ImageIO.write(image, suffix, file);
		}else {
//umknown type - does nothing
		}

		return null;
	}
 
Example 27
Project: OSCAR-ConCert   File: SplitDocumentAction.java   Source Code and License 3 votes vote down vote up
public ActionForward removeFirstPage(ActionMapping mapping, ActionForm form, HttpServletRequest request, HttpServletResponse response) throws Exception {
	Document doc = documentDao.getDocument(request.getParameter("document"));

	String docdownload = oscar.OscarProperties.getInstance().getProperty("DOCUMENT_DIR");

	FileInputStream input = new FileInputStream(docdownload + doc.getDocfilename());
	PDFParser parser = new PDFParser(input);
	parser.parse();
	PDDocument pdf = parser.getPDDocument();

	// Documents must have at least 2 pages, for the first page to be removed.
	if (pdf.getNumberOfPages() <= 1) { return null; }

	int x = 1;
	for (Object p : pdf.getDocumentCatalog().getAllPages()) {
		ManageDocumentAction.deleteCacheVersion(doc, x);
		x++;
	}

	pdf.removePage(0);


	EDocUtil.subtractOnePage(request.getParameter("document"));

	pdf.save(docdownload + doc.getDocfilename());
	pdf.close();

	input.close();

	return null;
}
 
Example 28
Project: arks-api   File: PageByPageSearch.java   Source Code and License 3 votes vote down vote up
public List<SearchResult> findpages(String path,
		List<String> searchKeywordList, int fileCounter) throws IOException {

	int i; // page no.
	boolean hasKeywords = false;
	boolean hasSingleKeyword = false;

	PDDocument finalDocument = new PDDocument();
	List<PDPage> pageList = new ArrayList<PDPage>();

	File file = new File(path);
	PDFParser parser = new PDFParser(new RandomAccessBuffer(
			new FileInputStream(file)));
	parser.parse();

	COSDocument cosDoc = parser.getDocument();
	PDFTextStripper reader = new PDFTextStripper();
	PDDocument doc = new PDDocument(cosDoc);

	List<SearchResult> list = new ArrayList<SearchResult>();

	for (i = 0; i <= doc.getNumberOfPages() - 1; i++) {
		reader.setStartPage(i);
		reader.setEndPage(i);
		hasKeywords = true;
		hasSingleKeyword = false;

		for (String keyword : searchKeywordList) {

			if (!reader.getText(doc).toLowerCase()
					.contains(keyword.toLowerCase())) {
				hasKeywords = false;
				break;
			}

		}

		if (hasKeywords) {

			/* if (falseCounter > 1) { */
			SearchResult result = new PageResult();
			result.setFileContent(reader.getText(doc));
			result.setFilePath(path);
			result.setPageNumber(i);
			list.add(result);
			pageList.add(doc.getPage(i));

			/* } */

			falseCounter++;
		}

	}

	for (PDPage page : pageList) {
		finalDocument.addPage(page);
		validResult = true;
	}

	if (validResult) {
		finalDocument.save(ConfigCBSI.getResultPdfPath() + fileCounter
				+ ".pdf");
		finalDocument.close();
		logger.info("Result Saved");
		validResult = false;
	}

	return list;

}
 
Example 29
Project: MedicamentDB   File: InteractionService.java   Source Code and License 3 votes vote down vote up
private Map<String, List<Interaction>> readInteractions(File dir) throws IOException {
    File file = new File(dir, "interactions.pdf");

    PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(file));

    parser.parse();
    COSDocument cosDoc = parser.getDocument();
    InteractionPDFStripper pdfStripper = new InteractionPDFStripper();
    PDDocument pdDoc = new PDDocument(cosDoc);
    pdfStripper.setStartPage(2);
    pdfStripper.setEndPage(pdDoc.getNumberOfPages());

    pdfStripper.getText(pdDoc);

    cosDoc.close();

    Map<String, List<Interaction>> interactions = pdfStripper.interactions;
    Map<String, List<Interaction>> newInteractions = new HashMap<>();

    for (List<Interaction> tmp : interactions.values()) {
        for (Interaction interaction : tmp) {
            String famille2 = normalize(interaction.getFamille2(), true);

            // cas particuliers
            switch (famille2) {
                case "medicaments hyponatremiants":
                    famille2 = "hyponatremiants";
                    break;
            }

            List<Interaction> interactions2 = interactions.get(famille2);
            if (interactions2 == null) {
                LOG.warn("interaction " + interaction.getFamille2() + " inconnu");

                Interaction newInteraction;
                if (newInteractions.get(famille2) == null) {
                    newInteractions.put(famille2, new ArrayList<>());

                    newInteraction = pdfStripper.createEmptyInteraction(-1, interaction.getFamille2());
                } else {
                    newInteraction = pdfStripper.createEmptyInteraction(Integer.parseInt(newInteractions.get(famille2).get(0).getId1()), interaction.getFamille2());
                }

                newInteraction.setId2(interaction.getId1());
                newInteraction.setFamille2(interaction.getFamille1());
                newInteraction.setDescription(interaction.getDescription());
                newInteraction.setConseil(interaction.getConseil());

                newInteractions.get(famille2).add(newInteraction);

                interaction.setId2(newInteraction.getId1());
            } else {
                interaction.setId2(interactions2.get(0).getId1());
            }
        }
    }

    for (String key : newInteractions.keySet()) {
        interactions.put(key, newInteractions.get(key));
    }

    return interactions;

}