org.apache.tika.io.IOUtils Java Examples

The following examples show how to use org.apache.tika.io.IOUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TikaTest.java    From tika-server with Apache License 2.0 7 votes vote down vote up
@Override
public void handle(String filename, MediaType mediaType,
                   InputStream stream) {
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    if (! stream.markSupported()) {
        stream = TikaInputStream.get(stream);
    }
    stream.mark(0);
    try {
        IOUtils.copy(stream, os);
        bytes.add(os.toByteArray());
        stream.reset();
    } catch (IOException e) {
        //swallow
    }
}
 
Example #2
Source File: UserWrapper.java    From wandora with GNU General Public License v3.0 6 votes vote down vote up
private void addPicture(TopicMap tm, Topic userTopic) {
    try {
        URL imageUrl = new URL(AbstractFBGraphExtractor.URL_ROOT + this.user.getId() + "/picture");
        String contentType = imageUrl.openConnection().getContentType();
        byte[] data = IOUtils.toByteArray(imageUrl.openStream());
        DataURL u = new DataURL(contentType, data);
        
        Topic picType = getOrCreateType(tm, "Profile Picture");
        Topic langTopic = getOrCreateTopic(tm, XTMPSI.LANG_INDEPENDENT);
        userTopic.setData(picType, langTopic, u.toExternalForm());
        
        
    } catch (IOException | TopicMapException e) {
        UserWrapper.logger.log(e);
    } 
    
}
 
Example #3
Source File: RepositoryRefactor.java    From urule with Apache License 2.0 6 votes vote down vote up
public List<String> getReferenceFiles(Node rootNode,String path,String searchText) throws Exception{
	List<String> referenceFiles=new ArrayList<String>();
	List<String> files=getFiles(rootNode, path);
	for(String nodePath:files){
		InputStream inputStream=repositoryService.readFile(nodePath,null);
		try {
			String content = IOUtils.toString(inputStream);
			inputStream.close();
			boolean containPath=content.contains(path);
			boolean containText=content.contains(searchText);
			if(containPath && containText){
				referenceFiles.add(nodePath);
			}
		} catch (IOException e) {
			throw new RuleException(e);
		}
	}
	return referenceFiles;
}
 
Example #4
Source File: PDFGenerationItemProcessor.java    From CogStack-Pipeline with Apache License 2.0 6 votes vote down vote up
private void logStream(final InputStream stream) {
    new Thread() {
        public void run() {
            Reader reader = new InputStreamReader(stream, IOUtils.UTF_8);
            StringBuilder out = new StringBuilder();
            char[] buffer = new char[1024];
            try {
                for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) {
                    out.append(buffer, 0, n);
                }
            } catch (Exception e) {
                LOG.error(e.getMessage());
            } finally {
                IOUtils.closeQuietly(stream);
                IOUtils.closeQuietly(reader);
            }
            LOG.debug(out.toString());
        }
    }.start();
}
 
Example #5
Source File: ThumbnailGenerationItemProcessor.java    From CogStack-Pipeline with Apache License 2.0 6 votes vote down vote up
private void logStream(final InputStream stream) {
    new Thread() {
        public void run() {
            Reader reader = new InputStreamReader(stream, IOUtils.UTF_8);
            StringBuilder out = new StringBuilder();
            char[] buffer = new char[1024];
            try {
                for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) {
                    out.append(buffer, 0, n);
                }
            } catch (Exception e) {
                LOG.error(e.getMessage());
            } finally {
                IOUtils.closeQuietly(stream);
                IOUtils.closeQuietly(reader);
            }
            LOG.debug(out.toString());
        }
    }.start();
}
 
Example #6
Source File: PDFPreprocessorParser.java    From CogStack-Pipeline with Apache License 2.0 6 votes vote down vote up
/**
 * Starts a thread that reads the contents of the standard output or error
 * stream of the given process to not block the process. The stream is
 * closed once fully processed.
 */
private void logStream(final String logType, final InputStream stream, final File file) {
    new Thread() {
        public void run() {
            Reader reader = new InputStreamReader(stream);
            StringBuilder out = new StringBuilder();
            char[] buffer = new char[1024];
            try {
                for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) {
                    out.append(buffer, 0, n);
                }
            } catch (IOException e) {

            } finally {
                IOUtils.closeQuietly(stream);
            }

            String msg = out.toString();
            LogFactory.getLog(PDFPreprocessorParser.class).debug(msg);
        }
    }.start();
}
 
Example #7
Source File: XMLContentExporterTest.java    From syncope with Apache License 2.0 6 votes vote down vote up
/**
 * Also checks for SYNCOPE-1307.
 *
 * @throws Exception exception thrown when dealing with IO.
 */
@Test
public void issueSYNCOPE1128() throws Exception {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();

    exporter.export("Master", baos, null, null, null);

    String exported = baos.toString(Charset.defaultCharset());
    assertTrue(StringUtils.isNotBlank(exported));

    List<String> realms = IOUtils.readLines(
            IOUtils.toInputStream(exported), StandardCharsets.UTF_8.name()).stream().
            filter(row -> row.trim().startsWith("<Realm")).collect(Collectors.toList());
    assertEquals(4, realms.size());
    assertTrue(realms.get(0).contains("name=\"/\""));
    assertTrue(realms.get(1).contains("name=\"odd\""));
    assertTrue(realms.get(2).contains("name=\"even\""));
    assertTrue(realms.get(3).contains("name=\"two\""));
}
 
Example #8
Source File: TestPackageInstall.java    From jackrabbit-filevault with Apache License 2.0 6 votes vote down vote up
/**
 * Installs a binary properties.
 */
@Test
public void testBinaryProperties() throws RepositoryException, IOException, PackageException {
    JcrPackage pack = packMgr.upload(getStream("/test-packages/tmp_binary.zip"), false);
    assertNotNull(pack);
    pack.install(getDefaultOptions());

    Property p = admin.getProperty("/tmp/binary/test/jcr:data");
    assertEquals(PropertyType.BINARY, p.getType());

    StringBuilder buffer = new StringBuilder(8192);
    while (buffer.length() < 8192) {
        buffer.append("0123456789abcdef");
    }
    String result = IOUtils.toString(p.getBinary().getStream());

    assertEquals(buffer.toString(), result);
}
 
Example #9
Source File: CSVDetector.java    From data-prep with Apache License 2.0 6 votes vote down vote up
/**
 * Reads an input stream and checks if it has a CSV format.
 *
 * The general contract of a detector is to not close the specified stream before returning. It is to the
 * responsibility of the caller to close it. The detector should leverage the mark/reset feature of the specified
 * {@see TikaInputStream} in order to let the stream always return the same bytes.
 *
 * @param metadata the specified TIKA {@link Metadata}
 * @param inputStream the specified input stream
 * @return either null or an CSV format
 * @throws IOException
 */
@Override
public Format detect(Metadata metadata, TikaInputStream inputStream) throws IOException {

    Format result = detectText(metadata, inputStream);

    if (result == null) {
        inputStream.mark(FormatUtils.META_TAG_BUFFER_SIZE);
        byte[] buffer = new byte[FormatUtils.META_TAG_BUFFER_SIZE];
        int n = 0;

        for (int m = inputStream.read(buffer); m != -1 && n < buffer.length; m =
                inputStream.read(buffer, n, buffer.length - n)) {
            n += m;
        }

        inputStream.reset();
        String head = FormatUtils.readFromBuffer(buffer, 0, n);

        try (InputStream stream = TikaInputStream.get(IOUtils.toInputStream(head))) {
            result = detectText(new Metadata(), stream);
        }
    }
    return result;
}
 
Example #10
Source File: DynRealmITCase.java    From syncope with Apache License 2.0 6 votes vote down vote up
private static ArrayNode fetchDynRealmsFromElasticsearch(final String userKey) throws Exception {
    String body =
        '{'
            + "    \"query\": {"
            + "        \"match\": {\"_id\": \"" + userKey + "\"}"
            + "    }"
            + '}';

    HttpClient httpClient = new HttpClient();
    httpClient.start();
    ContentResponse response = httpClient.newRequest("http://localhost:9200/master_user/_search").
            method(HttpMethod.GET).
            header(HttpHeader.CONTENT_TYPE, MediaType.APPLICATION_JSON).
            content(new InputStreamContentProvider(IOUtils.toInputStream(body))).
            send();
    assertEquals(HttpStatus.OK_200, response.getStatus());

    return (ArrayNode) OBJECT_MAPPER.readTree(response.getContent()).
            get("hits").get("hits").get(0).get("_source").get("dynRealms");
}
 
Example #11
Source File: ResultsUtils.java    From allure-java with Apache License 2.0 5 votes vote down vote up
private static Optional<String> readResource(final ClassLoader classLoader, final String resourceName) {
    try (InputStream is = classLoader.getResourceAsStream(resourceName)) {
        if (Objects.isNull(is)) {
            return Optional.empty();
        }
        final byte[] bytes = IOUtils.toByteArray(is);
        return Optional.of(new String(bytes, StandardCharsets.UTF_8));
    } catch (IOException e) {
        LOGGER.warn("Unable to process description resource file", e);
    }
    return Optional.empty();
}
 
Example #12
Source File: SAML2SPMetadataTest.java    From syncope with Apache License 2.0 5 votes vote down vote up
private SAML2SPMetadata create(final String owner) throws Exception {
    SAML2SPMetadata saml2SPMetadata = entityFactory.newEntity(SAML2SPMetadata.class);
    saml2SPMetadata.setOwner(owner);
    String metadata = IOUtils.toString(new ClassPathResource("sp-metadata.xml").getInputStream());
    saml2SPMetadata.setMetadata(metadata);
    saml2SPMetadataDAO.save(saml2SPMetadata);
    assertNotNull(saml2SPMetadata);
    assertNotNull(saml2SPMetadata.getKey());
    assertNotNull(saml2SPMetadataDAO.findByOwner(saml2SPMetadata.getOwner()));
    return saml2SPMetadata;
}
 
Example #13
Source File: TestPackageInstall.java    From jackrabbit-filevault with Apache License 2.0 5 votes vote down vote up
/**
 * Installs a package with no properties
 */
@Test
public void testNoProperties() throws RepositoryException, IOException, PackageException {
    File tmpFile = File.createTempFile("vlttest", "zip");
    IOUtils.copy(getStream("/test-packages/tmp_no_properties.zip"), FileUtils.openOutputStream(tmpFile));
    JcrPackage pack = packMgr.upload(tmpFile, true, true, "testpackage", false);
    assertNotNull(pack);

    pack.install(getDefaultOptions());
}
 
Example #14
Source File: TestPackageInstall.java    From jackrabbit-filevault with Apache License 2.0 5 votes vote down vote up
/**
 * Installs a binary properties twice to check if it doesn't report an update.
 * TODO: this is not implemented yet. see JCRVLT-110
 */
@Test
@Ignore
public void testBinaryPropertyTwice() throws RepositoryException, IOException, PackageException {
    JcrPackage pack = packMgr.upload(getStream("/test-packages/tmp_binary.zip"), false);
    assertNotNull(pack);
    pack.install(getDefaultOptions());

    Property p = admin.getProperty("/tmp/binary/test/jcr:data");
    assertEquals(PropertyType.BINARY, p.getType());

    StringBuilder buffer = new StringBuilder(8192);
    while (buffer.length() < 8192) {
        buffer.append("0123456789abcdef");
    }
    String result = IOUtils.toString(p.getBinary().getStream());

    assertEquals(buffer.toString(), result);

    // install again to check if binary data is not updated
    ImportOptions opts = getDefaultOptions();
    TrackingListener listener = new TrackingListener(opts.getListener());
    opts.setListener(listener);

    pack.install(opts);

    //TODO: assertEquals("-", listener.getActions().get("/tmp/binary/test"));
    assertEquals("U", listener.getActions().get("/tmp/binary/test"));
}
 
Example #15
Source File: HtmlDetector.java    From data-prep with Apache License 2.0 5 votes vote down vote up
/**
 * Reads an input stream and checks if it has a HTML format.
 * 
 * The general contract of a detector is to not close the specified stream before returning. It is to the
 * responsibility of the caller to close it. The detector should leverage the mark/reset feature of the specified
 * {@see TikaInputStream} in order to let the stream always return the same bytes.
 * 
 * 
 * @param metadata the specified TIKA {@link Metadata}
 * @param inputStream the specified input stream
 * @return either null or an HTML format
 * @throws IOException
 */
@Override
public Format detect(Metadata metadata, TikaInputStream inputStream) throws IOException {
    if (inputStream == null) {
        return null;
    } else {
        inputStream.mark(FormatUtils.META_TAG_BUFFER_SIZE);
        byte[] buffer = new byte[FormatUtils.META_TAG_BUFFER_SIZE];
        int n = 0;

        for (int m = inputStream.read(buffer); m != -1 && n < buffer.length; m =
                inputStream.read(buffer, n, buffer.length - n)) {
            n += m;
        }

        inputStream.reset();
        String head = FormatUtils.readFromBuffer(buffer, 0, n);
        try (InputStream stream = TikaInputStream.get(IOUtils.toInputStream(head))) {
            Charset charset = htmlEncodingDetector.detect(stream, metadata);

            if (charset != null) {
                return new Format(htmlFormatFamily, charset.name());
            }
        }
        return null;
    }

}
 
Example #16
Source File: HelmITSupport.java    From nexus-repository-helm with Eclipse Public License 1.0 4 votes vote down vote up
protected void checkYamlIncludesContent(InputStream is, String expectedContent) throws Exception {
  String downloadedPackageData = IOUtils.toString(is);
  assertThat(downloadedPackageData, containsString(expectedContent));
}
 
Example #17
Source File: XMLContentExporter.java    From syncope with Apache License 2.0 4 votes vote down vote up
private static String getValues(final ResultSet rs, final String columnName, final Integer columnType)
        throws SQLException {

    String res = null;

    try {
        switch (columnType) {
            case Types.BINARY:
            case Types.VARBINARY:
            case Types.LONGVARBINARY:
                final InputStream is = rs.getBinaryStream(columnName);
                if (is != null) {
                    res = DatatypeConverter.printHexBinary(IOUtils.toByteArray(is));
                }
                break;

            case Types.BLOB:
                final Blob blob = rs.getBlob(columnName);
                if (blob != null) {
                    res = DatatypeConverter.printHexBinary(IOUtils.toByteArray(blob.getBinaryStream()));
                }
                break;

            case Types.BIT:
            case Types.BOOLEAN:
                if (rs.getBoolean(columnName)) {
                    res = "1";
                } else {
                    res = "0";
                }
                break;

            case Types.DATE:
            case Types.TIME:
            case Types.TIMESTAMP:
                final Timestamp timestamp = rs.getTimestamp(columnName);
                if (timestamp != null) {
                    res = FormatUtils.format(new Date(timestamp.getTime()));
                }
                break;

            default:
                res = rs.getString(columnName);
        }
    } catch (IOException e) {
        LOG.error("Error retrieving hexadecimal string", e);
    }

    return res;
}
 
Example #18
Source File: TestSubPackages.java    From jackrabbit-filevault with Apache License 2.0 4 votes vote down vote up
/**
 * Test if installing and re-creating a package with sub-packages on an alternative path results in the same package again.
 */
@Test
public void testRoundTrip() throws IOException, RepositoryException, PackageException {
    JcrPackage pack = packMgr.upload(getStream("/test-packages/subtest.zip"), false);
    assertNotNull(pack);

    // install
    ImportOptions opts = getDefaultOptions();
    opts.setNonRecursive(true);
    pack.install(opts);

    // create new package
    JcrPackage pkg = packMgr.open(PACKAGE_ID_SUB_TEST);
    packMgr.assemble(pkg, new DefaultProgressListener());

    try (ZipInputStream in = new ZipInputStream(pkg.getData().getBinary().getStream())) {
        ZipEntry e;
        List<String> entries = new ArrayList<>();
        String filter = "";
        while ((e = in.getNextEntry()) != null) {
            entries.add(e.getName());
            if ("META-INF/vault/filter.xml".equals(e.getName())) {
                filter = IOUtils.toString(in, "utf-8");
            }
        }
        Collections.sort(entries);
        StringBuffer result = new StringBuffer();
        for (String name: entries) {
            // exclude some of the entries that depend on the repository setup
            if ("jcr_root/etc/.content.xml".equals(name)
                    || "jcr_root/etc/packages/my_packages/.content.xml".equals(name)
                    || "jcr_root/etc/packages/.content.xml".equals(name)) {
                continue;
            }
            result.append(name).append("\n");
        }
    
        assertEquals("Filter must be correct",
                "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
                "<workspaceFilter version=\"1.0\">\n" +
                "    <filter root=\"/etc/packages/my_packages/sub_a.zip\"/>\n" +
                "    <filter root=\"/etc/packages/my_packages/sub_b.zip\"/>\n" +
                "</workspaceFilter>\n", filter);

        assertEquals("Package must contain proper entries.",
                "META-INF/\n" +
                "META-INF/MANIFEST.MF\n" +
                "META-INF/vault/\n" +
                "META-INF/vault/config.xml\n" +
                "META-INF/vault/definition/\n" +
                "META-INF/vault/definition/.content.xml\n" +
                "META-INF/vault/filter.xml\n" +
                "META-INF/vault/nodetypes.cnd\n" +
                "META-INF/vault/properties.xml\n" +
                "jcr_root/.content.xml\n" +
                "jcr_root/etc/\n" +
                "jcr_root/etc/packages/\n" +
                "jcr_root/etc/packages/my_packages/\n" +
                "jcr_root/etc/packages/my_packages/sub_a.zip\n" +
                "jcr_root/etc/packages/my_packages/sub_a.zip.dir/\n" +
                "jcr_root/etc/packages/my_packages/sub_a.zip.dir/.content.xml\n" +
                "jcr_root/etc/packages/my_packages/sub_a.zip.dir/_jcr_content/\n" +
                "jcr_root/etc/packages/my_packages/sub_a.zip.dir/_jcr_content/_vlt_definition/\n" +
                "jcr_root/etc/packages/my_packages/sub_a.zip.dir/_jcr_content/_vlt_definition/.content.xml\n" +
                "jcr_root/etc/packages/my_packages/sub_b.zip\n" +
                "jcr_root/etc/packages/my_packages/sub_b.zip.dir/\n" +
                "jcr_root/etc/packages/my_packages/sub_b.zip.dir/.content.xml\n" +
                "jcr_root/etc/packages/my_packages/sub_b.zip.dir/_jcr_content/\n" +
                "jcr_root/etc/packages/my_packages/sub_b.zip.dir/_jcr_content/_vlt_definition/\n" +
                "jcr_root/etc/packages/my_packages/sub_b.zip.dir/_jcr_content/_vlt_definition/.content.xml\n", result.toString());
    }
}
 
Example #19
Source File: RITSupport.java    From nexus-repository-r with Eclipse Public License 1.0 4 votes vote down vote up
protected void verifyTextGzipContent(Matcher<String> expectedContent, InputStream is) throws Exception {
  try (InputStream cin = new CompressorStreamFactory().createCompressorInputStream(GZIP, is)) {
    final String downloadedPackageData = IOUtils.toString(cin);
    assertThat(downloadedPackageData, expectedContent);
  }
}
 
Example #20
Source File: TestPackageInstall.java    From jackrabbit-filevault with Apache License 2.0 3 votes vote down vote up
/**
 * Installs a package with non-child filter doesn't remove the root.
 *
 * <pre>
 *   <workspaceFilter version="1.0">
 *   <filter root="/etc">
 *     <include pattern="/etc"/>
 *     <include pattern="/etc/clientlibs"/>
 *     <include pattern="/etc/clientlibs/granite"/>
 *     <include pattern="/etc/clientlibs/granite/test(/.*)?"/>
 *   </filter>
 *  </workspaceFilter>
 */
@Test
public void testNoChildFilter() throws RepositoryException, IOException, PackageException {
    File tmpFile = File.createTempFile("vlttest", "zip");
    IOUtils.copy(getStream("/test-packages/test-package-with-etc.zip"), FileUtils.openOutputStream(tmpFile));
    JcrPackage pack = packMgr.upload(tmpFile, true, true, "test-package-with-etc", false);
    assertNodeExists("/etc");
    admin.getNode("/etc").addNode("foo", NodeType.NT_FOLDER);
    admin.save();
    pack.install(getDefaultOptions());
    assertNodeExists("/etc/foo");
}