Java Code Examples for org.apache.tika.io.IOUtils

The following examples show how to use org.apache.tika.io.IOUtils. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: tika-server   Source File: TikaTest.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void handle(String filename, MediaType mediaType,
                   InputStream stream) {
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    if (! stream.markSupported()) {
        stream = TikaInputStream.get(stream);
    }
    stream.mark(0);
    try {
        IOUtils.copy(stream, os);
        bytes.add(os.toByteArray());
        stream.reset();
    } catch (IOException e) {
        //swallow
    }
}
 
Example 2
Source Project: urule   Source File: RepositoryRefactor.java    License: Apache License 2.0 6 votes vote down vote up
public List<String> getReferenceFiles(Node rootNode,String path,String searchText) throws Exception{
	List<String> referenceFiles=new ArrayList<String>();
	List<String> files=getFiles(rootNode, path);
	for(String nodePath:files){
		InputStream inputStream=repositoryService.readFile(nodePath,null);
		try {
			String content = IOUtils.toString(inputStream);
			inputStream.close();
			boolean containPath=content.contains(path);
			boolean containText=content.contains(searchText);
			if(containPath && containText){
				referenceFiles.add(nodePath);
			}
		} catch (IOException e) {
			throw new RuleException(e);
		}
	}
	return referenceFiles;
}
 
Example 3
private void logStream(final InputStream stream) {
    new Thread() {
        public void run() {
            Reader reader = new InputStreamReader(stream, IOUtils.UTF_8);
            StringBuilder out = new StringBuilder();
            char[] buffer = new char[1024];
            try {
                for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) {
                    out.append(buffer, 0, n);
                }
            } catch (Exception e) {
                LOG.error(e.getMessage());
            } finally {
                IOUtils.closeQuietly(stream);
                IOUtils.closeQuietly(reader);
            }
            LOG.debug(out.toString());
        }
    }.start();
}
 
Example 4
private void logStream(final InputStream stream) {
    new Thread() {
        public void run() {
            Reader reader = new InputStreamReader(stream, IOUtils.UTF_8);
            StringBuilder out = new StringBuilder();
            char[] buffer = new char[1024];
            try {
                for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) {
                    out.append(buffer, 0, n);
                }
            } catch (Exception e) {
                LOG.error(e.getMessage());
            } finally {
                IOUtils.closeQuietly(stream);
                IOUtils.closeQuietly(reader);
            }
            LOG.debug(out.toString());
        }
    }.start();
}
 
Example 5
Source Project: CogStack-Pipeline   Source File: PDFPreprocessorParser.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Starts a thread that reads the contents of the standard output or error
 * stream of the given process to not block the process. The stream is
 * closed once fully processed.
 */
private void logStream(final String logType, final InputStream stream, final File file) {
    new Thread() {
        public void run() {
            Reader reader = new InputStreamReader(stream);
            StringBuilder out = new StringBuilder();
            char[] buffer = new char[1024];
            try {
                for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) {
                    out.append(buffer, 0, n);
                }
            } catch (IOException e) {

            } finally {
                IOUtils.closeQuietly(stream);
            }

            String msg = out.toString();
            LogFactory.getLog(PDFPreprocessorParser.class).debug(msg);
        }
    }.start();
}
 
Example 6
Source Project: jackrabbit-filevault   Source File: TestPackageInstall.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Installs a binary properties.
 */
@Test
public void testBinaryProperties() throws RepositoryException, IOException, PackageException {
    JcrPackage pack = packMgr.upload(getStream("/test-packages/tmp_binary.zip"), false);
    assertNotNull(pack);
    pack.install(getDefaultOptions());

    Property p = admin.getProperty("/tmp/binary/test/jcr:data");
    assertEquals(PropertyType.BINARY, p.getType());

    StringBuilder buffer = new StringBuilder(8192);
    while (buffer.length() < 8192) {
        buffer.append("0123456789abcdef");
    }
    String result = IOUtils.toString(p.getBinary().getStream());

    assertEquals(buffer.toString(), result);
}
 
Example 7
Source Project: data-prep   Source File: CSVDetector.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Reads an input stream and checks if it has a CSV format.
 *
 * The general contract of a detector is to not close the specified stream before returning. It is to the
 * responsibility of the caller to close it. The detector should leverage the mark/reset feature of the specified
 * {@see TikaInputStream} in order to let the stream always return the same bytes.
 *
 * @param metadata the specified TIKA {@link Metadata}
 * @param inputStream the specified input stream
 * @return either null or an CSV format
 * @throws IOException
 */
@Override
public Format detect(Metadata metadata, TikaInputStream inputStream) throws IOException {

    Format result = detectText(metadata, inputStream);

    if (result == null) {
        inputStream.mark(FormatUtils.META_TAG_BUFFER_SIZE);
        byte[] buffer = new byte[FormatUtils.META_TAG_BUFFER_SIZE];
        int n = 0;

        for (int m = inputStream.read(buffer); m != -1 && n < buffer.length; m =
                inputStream.read(buffer, n, buffer.length - n)) {
            n += m;
        }

        inputStream.reset();
        String head = FormatUtils.readFromBuffer(buffer, 0, n);

        try (InputStream stream = TikaInputStream.get(IOUtils.toInputStream(head))) {
            result = detectText(new Metadata(), stream);
        }
    }
    return result;
}
 
Example 8
Source Project: syncope   Source File: DynRealmITCase.java    License: Apache License 2.0 6 votes vote down vote up
private static ArrayNode fetchDynRealmsFromElasticsearch(final String userKey) throws Exception {
    String body =
        '{'
            + "    \"query\": {"
            + "        \"match\": {\"_id\": \"" + userKey + "\"}"
            + "    }"
            + '}';

    HttpClient httpClient = new HttpClient();
    httpClient.start();
    ContentResponse response = httpClient.newRequest("http://localhost:9200/master_user/_search").
            method(HttpMethod.GET).
            header(HttpHeader.CONTENT_TYPE, MediaType.APPLICATION_JSON).
            content(new InputStreamContentProvider(IOUtils.toInputStream(body))).
            send();
    assertEquals(HttpStatus.OK_200, response.getStatus());

    return (ArrayNode) OBJECT_MAPPER.readTree(response.getContent()).
            get("hits").get("hits").get(0).get("_source").get("dynRealms");
}
 
Example 9
Source Project: syncope   Source File: XMLContentExporterTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Also checks for SYNCOPE-1307.
 *
 * @throws Exception exception thrown when dealing with IO.
 */
@Test
public void issueSYNCOPE1128() throws Exception {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();

    exporter.export("Master", baos, null, null, null);

    String exported = baos.toString(Charset.defaultCharset());
    assertTrue(StringUtils.isNotBlank(exported));

    List<String> realms = IOUtils.readLines(
            IOUtils.toInputStream(exported), StandardCharsets.UTF_8.name()).stream().
            filter(row -> row.trim().startsWith("<Realm")).collect(Collectors.toList());
    assertEquals(4, realms.size());
    assertTrue(realms.get(0).contains("name=\"/\""));
    assertTrue(realms.get(1).contains("name=\"odd\""));
    assertTrue(realms.get(2).contains("name=\"even\""));
    assertTrue(realms.get(3).contains("name=\"two\""));
}
 
Example 10
Source Project: wandora   Source File: UserWrapper.java    License: GNU General Public License v3.0 6 votes vote down vote up
private void addPicture(TopicMap tm, Topic userTopic) {
    try {
        URL imageUrl = new URL(AbstractFBGraphExtractor.URL_ROOT + this.user.getId() + "/picture");
        String contentType = imageUrl.openConnection().getContentType();
        byte[] data = IOUtils.toByteArray(imageUrl.openStream());
        DataURL u = new DataURL(contentType, data);
        
        Topic picType = getOrCreateType(tm, "Profile Picture");
        Topic langTopic = getOrCreateTopic(tm, XTMPSI.LANG_INDEPENDENT);
        userTopic.setData(picType, langTopic, u.toExternalForm());
        
        
    } catch (IOException | TopicMapException e) {
        UserWrapper.logger.log(e);
    } 
    
}
 
Example 11
Source Project: allure-java   Source File: ResultsUtils.java    License: Apache License 2.0 5 votes vote down vote up
private static Optional<String> readResource(final ClassLoader classLoader, final String resourceName) {
    try (InputStream is = classLoader.getResourceAsStream(resourceName)) {
        if (Objects.isNull(is)) {
            return Optional.empty();
        }
        final byte[] bytes = IOUtils.toByteArray(is);
        return Optional.of(new String(bytes, StandardCharsets.UTF_8));
    } catch (IOException e) {
        LOGGER.warn("Unable to process description resource file", e);
    }
    return Optional.empty();
}
 
Example 12
Source Project: jackrabbit-filevault   Source File: TestPackageInstall.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Installs a package with no properties
 */
@Test
public void testNoProperties() throws RepositoryException, IOException, PackageException {
    File tmpFile = File.createTempFile("vlttest", "zip");
    IOUtils.copy(getStream("/test-packages/tmp_no_properties.zip"), FileUtils.openOutputStream(tmpFile));
    JcrPackage pack = packMgr.upload(tmpFile, true, true, "testpackage", false);
    assertNotNull(pack);

    pack.install(getDefaultOptions());
}
 
Example 13
Source Project: jackrabbit-filevault   Source File: TestPackageInstall.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Installs a binary properties twice to check if it doesn't report an update.
 * TODO: this is not implemented yet. see JCRVLT-110
 */
@Test
@Ignore
public void testBinaryPropertyTwice() throws RepositoryException, IOException, PackageException {
    JcrPackage pack = packMgr.upload(getStream("/test-packages/tmp_binary.zip"), false);
    assertNotNull(pack);
    pack.install(getDefaultOptions());

    Property p = admin.getProperty("/tmp/binary/test/jcr:data");
    assertEquals(PropertyType.BINARY, p.getType());

    StringBuilder buffer = new StringBuilder(8192);
    while (buffer.length() < 8192) {
        buffer.append("0123456789abcdef");
    }
    String result = IOUtils.toString(p.getBinary().getStream());

    assertEquals(buffer.toString(), result);

    // install again to check if binary data is not updated
    ImportOptions opts = getDefaultOptions();
    TrackingListener listener = new TrackingListener(opts.getListener());
    opts.setListener(listener);

    pack.install(opts);

    //TODO: assertEquals("-", listener.getActions().get("/tmp/binary/test"));
    assertEquals("U", listener.getActions().get("/tmp/binary/test"));
}
 
Example 14
Source Project: data-prep   Source File: HtmlDetector.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Reads an input stream and checks if it has a HTML format.
 * 
 * The general contract of a detector is to not close the specified stream before returning. It is to the
 * responsibility of the caller to close it. The detector should leverage the mark/reset feature of the specified
 * {@see TikaInputStream} in order to let the stream always return the same bytes.
 * 
 * 
 * @param metadata the specified TIKA {@link Metadata}
 * @param inputStream the specified input stream
 * @return either null or an HTML format
 * @throws IOException
 */
@Override
public Format detect(Metadata metadata, TikaInputStream inputStream) throws IOException {
    if (inputStream == null) {
        return null;
    } else {
        inputStream.mark(FormatUtils.META_TAG_BUFFER_SIZE);
        byte[] buffer = new byte[FormatUtils.META_TAG_BUFFER_SIZE];
        int n = 0;

        for (int m = inputStream.read(buffer); m != -1 && n < buffer.length; m =
                inputStream.read(buffer, n, buffer.length - n)) {
            n += m;
        }

        inputStream.reset();
        String head = FormatUtils.readFromBuffer(buffer, 0, n);
        try (InputStream stream = TikaInputStream.get(IOUtils.toInputStream(head))) {
            Charset charset = htmlEncodingDetector.detect(stream, metadata);

            if (charset != null) {
                return new Format(htmlFormatFamily, charset.name());
            }
        }
        return null;
    }

}
 
Example 15
Source Project: syncope   Source File: SAML2SPMetadataTest.java    License: Apache License 2.0 5 votes vote down vote up
private SAML2SPMetadata create(final String owner) throws Exception {
    SAML2SPMetadata saml2SPMetadata = entityFactory.newEntity(SAML2SPMetadata.class);
    saml2SPMetadata.setOwner(owner);
    String metadata = IOUtils.toString(new ClassPathResource("sp-metadata.xml").getInputStream());
    saml2SPMetadata.setMetadata(metadata);
    saml2SPMetadataDAO.save(saml2SPMetadata);
    assertNotNull(saml2SPMetadata);
    assertNotNull(saml2SPMetadata.getKey());
    assertNotNull(saml2SPMetadataDAO.findByOwner(saml2SPMetadata.getOwner()));
    return saml2SPMetadata;
}
 
Example 16
protected void checkYamlIncludesContent(InputStream is, String expectedContent) throws Exception {
  String downloadedPackageData = IOUtils.toString(is);
  assertThat(downloadedPackageData, containsString(expectedContent));
}
 
Example 17
Source Project: nexus-repository-r   Source File: RITSupport.java    License: Eclipse Public License 1.0 4 votes vote down vote up
protected void verifyTextGzipContent(Matcher<String> expectedContent, InputStream is) throws Exception {
  try (InputStream cin = new CompressorStreamFactory().createCompressorInputStream(GZIP, is)) {
    final String downloadedPackageData = IOUtils.toString(cin);
    assertThat(downloadedPackageData, expectedContent);
  }
}
 
Example 18
Source Project: jackrabbit-filevault   Source File: TestSubPackages.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Test if installing and re-creating a package with sub-packages on an alternative path results in the same package again.
 */
@Test
public void testRoundTrip() throws IOException, RepositoryException, PackageException {
    JcrPackage pack = packMgr.upload(getStream("/test-packages/subtest.zip"), false);
    assertNotNull(pack);

    // install
    ImportOptions opts = getDefaultOptions();
    opts.setNonRecursive(true);
    pack.install(opts);

    // create new package
    JcrPackage pkg = packMgr.open(PACKAGE_ID_SUB_TEST);
    packMgr.assemble(pkg, new DefaultProgressListener());

    try (ZipInputStream in = new ZipInputStream(pkg.getData().getBinary().getStream())) {
        ZipEntry e;
        List<String> entries = new ArrayList<>();
        String filter = "";
        while ((e = in.getNextEntry()) != null) {
            entries.add(e.getName());
            if ("META-INF/vault/filter.xml".equals(e.getName())) {
                filter = IOUtils.toString(in, "utf-8");
            }
        }
        Collections.sort(entries);
        StringBuffer result = new StringBuffer();
        for (String name: entries) {
            // exclude some of the entries that depend on the repository setup
            if ("jcr_root/etc/.content.xml".equals(name)
                    || "jcr_root/etc/packages/my_packages/.content.xml".equals(name)
                    || "jcr_root/etc/packages/.content.xml".equals(name)) {
                continue;
            }
            result.append(name).append("\n");
        }
    
        assertEquals("Filter must be correct",
                "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
                "<workspaceFilter version=\"1.0\">\n" +
                "    <filter root=\"/etc/packages/my_packages/sub_a.zip\"/>\n" +
                "    <filter root=\"/etc/packages/my_packages/sub_b.zip\"/>\n" +
                "</workspaceFilter>\n", filter);

        assertEquals("Package must contain proper entries.",
                "META-INF/\n" +
                "META-INF/MANIFEST.MF\n" +
                "META-INF/vault/\n" +
                "META-INF/vault/config.xml\n" +
                "META-INF/vault/definition/\n" +
                "META-INF/vault/definition/.content.xml\n" +
                "META-INF/vault/filter.xml\n" +
                "META-INF/vault/nodetypes.cnd\n" +
                "META-INF/vault/properties.xml\n" +
                "jcr_root/.content.xml\n" +
                "jcr_root/etc/\n" +
                "jcr_root/etc/packages/\n" +
                "jcr_root/etc/packages/my_packages/\n" +
                "jcr_root/etc/packages/my_packages/sub_a.zip\n" +
                "jcr_root/etc/packages/my_packages/sub_a.zip.dir/\n" +
                "jcr_root/etc/packages/my_packages/sub_a.zip.dir/.content.xml\n" +
                "jcr_root/etc/packages/my_packages/sub_a.zip.dir/_jcr_content/\n" +
                "jcr_root/etc/packages/my_packages/sub_a.zip.dir/_jcr_content/_vlt_definition/\n" +
                "jcr_root/etc/packages/my_packages/sub_a.zip.dir/_jcr_content/_vlt_definition/.content.xml\n" +
                "jcr_root/etc/packages/my_packages/sub_b.zip\n" +
                "jcr_root/etc/packages/my_packages/sub_b.zip.dir/\n" +
                "jcr_root/etc/packages/my_packages/sub_b.zip.dir/.content.xml\n" +
                "jcr_root/etc/packages/my_packages/sub_b.zip.dir/_jcr_content/\n" +
                "jcr_root/etc/packages/my_packages/sub_b.zip.dir/_jcr_content/_vlt_definition/\n" +
                "jcr_root/etc/packages/my_packages/sub_b.zip.dir/_jcr_content/_vlt_definition/.content.xml\n", result.toString());
    }
}
 
Example 19
Source Project: syncope   Source File: XMLContentExporter.java    License: Apache License 2.0 4 votes vote down vote up
private static String getValues(final ResultSet rs, final String columnName, final Integer columnType)
        throws SQLException {

    String res = null;

    try {
        switch (columnType) {
            case Types.BINARY:
            case Types.VARBINARY:
            case Types.LONGVARBINARY:
                final InputStream is = rs.getBinaryStream(columnName);
                if (is != null) {
                    res = DatatypeConverter.printHexBinary(IOUtils.toByteArray(is));
                }
                break;

            case Types.BLOB:
                final Blob blob = rs.getBlob(columnName);
                if (blob != null) {
                    res = DatatypeConverter.printHexBinary(IOUtils.toByteArray(blob.getBinaryStream()));
                }
                break;

            case Types.BIT:
            case Types.BOOLEAN:
                if (rs.getBoolean(columnName)) {
                    res = "1";
                } else {
                    res = "0";
                }
                break;

            case Types.DATE:
            case Types.TIME:
            case Types.TIMESTAMP:
                final Timestamp timestamp = rs.getTimestamp(columnName);
                if (timestamp != null) {
                    res = FormatUtils.format(new Date(timestamp.getTime()));
                }
                break;

            default:
                res = rs.getString(columnName);
        }
    } catch (IOException e) {
        LOG.error("Error retrieving hexadecimal string", e);
    }

    return res;
}
 
Example 20
Source Project: jackrabbit-filevault   Source File: TestPackageInstall.java    License: Apache License 2.0 3 votes vote down vote up
/**
 * Installs a package with non-child filter doesn't remove the root.
 *
 * <pre>
 *   <workspaceFilter version="1.0">
 *   <filter root="/etc">
 *     <include pattern="/etc"/>
 *     <include pattern="/etc/clientlibs"/>
 *     <include pattern="/etc/clientlibs/granite"/>
 *     <include pattern="/etc/clientlibs/granite/test(/.*)?"/>
 *   </filter>
 *  </workspaceFilter>
 */
@Test
public void testNoChildFilter() throws RepositoryException, IOException, PackageException {
    File tmpFile = File.createTempFile("vlttest", "zip");
    IOUtils.copy(getStream("/test-packages/test-package-with-etc.zip"), FileUtils.openOutputStream(tmpFile));
    JcrPackage pack = packMgr.upload(tmpFile, true, true, "test-package-with-etc", false);
    assertNodeExists("/etc");
    admin.getNode("/etc").addNode("foo", NodeType.NT_FOLDER);
    admin.save();
    pack.install(getDefaultOptions());
    assertNodeExists("/etc/foo");
}