package mkl.testarea.pdfbox2.extract;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;

import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.BeforeClass;
import org.junit.Test;

/**
 * @author mkl
 */
public class ExtractColorText {
    final static File RESULT_FOLDER = new File("target/test-outputs", "extract");

    @BeforeClass
    public static void setUpBeforeClass() throws Exception {
        RESULT_FOLDER.mkdirs();
    }

    /**
     * <a href="https://stackoverflow.com/questions/59031734/get-text-color-in-pdfbox">
     * Get text color in PDFBox
     * </a>
     * <p>
     * This test has already been executed for the original color text stripper class from my answer to
     * <a href="https://stackoverflow.com/questions/21430341/identifying-the-text-based-on-the-output-in-pdf-using-pdfbox">
     * Identifying the text based on the output in PDF using PDFBOX
     * </a>
     * </p>
     * 
     * @throws IOException
     */
    @Test
    public void testExtractFromFurzoSample() throws IOException {
        try (   InputStream resource = getClass().getResourceAsStream("furzo Sample.pdf");
                PDDocument document = Loader.loadPDF(resource) ) {
            PDFTextStripper stripper = new ColorTextStripper();
            String text = stripper.getText(document);

            Files.write(new File(RESULT_FOLDER, "furzo Sample.txt").toPath(), text.getBytes("UTF-8"));

            System.out.println("/// furzo Sample.pdf ///");
            System.out.println("Stripped text with color:");
            System.out.println(">>>");
            System.out.println(text);
            System.out.println("<<<");
        }
    }
}