Java Code Examples for org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream

The following examples show how to use org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
byte[] uncompress(byte[] bdata) {
  ByteArrayOutputStream out = new ByteArrayOutputStream(20 * bdata.length);
  ByteArrayInputStream in = new ByteArrayInputStream(bdata);
  try (BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(in, false)) {
    int bytesRead;
    byte[] decoded = new byte[524288];
    while ((bytesRead = bzIn.read(decoded)) != -1) {
      out.write(decoded, 0, bytesRead);
    }
    out.close();

  } catch (Exception e) {
    e.printStackTrace();
  }

  return out.toByteArray();
}
 
Example 2
Source Project: kafka-connect-fs   Source File: TextFileReader.java    License: Apache License 2.0 6 votes vote down vote up
private Reader getFileReader(InputStream inputStream) throws IOException {
    final InputStreamReader isr;
    switch (this.compression) {
        case BZIP2:
            isr = new InputStreamReader(new BZip2CompressorInputStream(inputStream,
                    this.compression.isConcatenated()), this.charset);
            break;
        case GZIP:
            isr = new InputStreamReader(new GzipCompressorInputStream(inputStream,
                    this.compression.isConcatenated()), this.charset);
            break;
        default:
            isr = new InputStreamReader(inputStream, this.charset);
            break;
    }
    return isr;
}
 
Example 3
Source Project: compress   Source File: Bzip2Compress.java    License: MIT License 6 votes vote down vote up
@Override
public byte[] uncompress(byte[] data) throws IOException {
	ByteArrayOutputStream out = new ByteArrayOutputStream();
	ByteArrayInputStream in = new ByteArrayInputStream(data);

	try {
		@SuppressWarnings("resource")
		BZip2CompressorInputStream ungzip = new BZip2CompressorInputStream(in);
		byte[] buffer = new byte[2048];
		int n;
		while ((n = ungzip.read(buffer)) >= 0) {
			out.write(buffer, 0, n);
		}
	} catch (IOException e) {
		e.printStackTrace();
	}

	return out.toByteArray();
}
 
Example 4
Source Project: spring-boot-doma2-sample   Source File: CompressUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * 入力したバイト配列をBZip2で展開して返します。
 * 
 * @param input
 * @return
 */
public static byte[] decompress(byte[] input) {
    ByteArrayOutputStream ref = null;

    try (val bais = new ByteArrayInputStream(input);
            val bzip2cis = new BZip2CompressorInputStream(bais);
            val baos = new ByteArrayOutputStream()) {
        IOUtils.copy(bzip2cis, baos);
        ref = baos;
    } catch (IOException e) {
        log.error("failed to decode.", e);
        throw new RuntimeException(e);
    }

    return ref.toByteArray();
}
 
Example 5
Source Project: hadoop   Source File: TestLineRecordReader.java    License: Apache License 2.0 6 votes vote down vote up
public String[] readRecordsDirectly(URL testFileUrl, boolean bzip)
    throws IOException {
  int MAX_DATA_SIZE = 1024 * 1024;
  byte[] data = new byte[MAX_DATA_SIZE];
  FileInputStream fis = new FileInputStream(testFileUrl.getFile());
  int count;
  if (bzip) {
    BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(fis);
    count = bzIn.read(data);
    bzIn.close();
  } else {
    count = fis.read(data);
  }
  fis.close();
  assertTrue("Test file data too big for buffer", count < data.length);
  return new String(data, 0, count, "UTF-8").split("\n");
}
 
Example 6
Source Project: hadoop   Source File: TestLineRecordReader.java    License: Apache License 2.0 6 votes vote down vote up
public String[] readRecordsDirectly(URL testFileUrl, boolean bzip)
    throws IOException {
  int MAX_DATA_SIZE = 1024 * 1024;
  byte[] data = new byte[MAX_DATA_SIZE];
  FileInputStream fis = new FileInputStream(testFileUrl.getFile());
  int count;
  if (bzip) {
    BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(fis);
    count = bzIn.read(data);
    bzIn.close();
  } else {
    count = fis.read(data);
  }
  fis.close();
  assertTrue("Test file data too big for buffer", count < data.length);
  return new String(data, 0, count, "UTF-8").split("\n");
}
 
Example 7
Source Project: big-c   Source File: TestLineRecordReader.java    License: Apache License 2.0 6 votes vote down vote up
public String[] readRecordsDirectly(URL testFileUrl, boolean bzip)
    throws IOException {
  int MAX_DATA_SIZE = 1024 * 1024;
  byte[] data = new byte[MAX_DATA_SIZE];
  FileInputStream fis = new FileInputStream(testFileUrl.getFile());
  int count;
  if (bzip) {
    BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(fis);
    count = bzIn.read(data);
    bzIn.close();
  } else {
    count = fis.read(data);
  }
  fis.close();
  assertTrue("Test file data too big for buffer", count < data.length);
  return new String(data, 0, count, "UTF-8").split("\n");
}
 
Example 8
Source Project: big-c   Source File: TestLineRecordReader.java    License: Apache License 2.0 6 votes vote down vote up
public String[] readRecordsDirectly(URL testFileUrl, boolean bzip)
    throws IOException {
  int MAX_DATA_SIZE = 1024 * 1024;
  byte[] data = new byte[MAX_DATA_SIZE];
  FileInputStream fis = new FileInputStream(testFileUrl.getFile());
  int count;
  if (bzip) {
    BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(fis);
    count = bzIn.read(data);
    bzIn.close();
  } else {
    count = fis.read(data);
  }
  fis.close();
  assertTrue("Test file data too big for buffer", count < data.length);
  return new String(data, 0, count, "UTF-8").split("\n");
}
 
Example 9
Source Project: runelite   Source File: BZip2.java    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
public static byte[] decompress(byte[] bytes, int len) throws IOException
{
	byte[] data = new byte[len + BZIP_HEADER.length];

	// add header
	System.arraycopy(BZIP_HEADER, 0, data, 0, BZIP_HEADER.length);
	System.arraycopy(bytes, 0, data, BZIP_HEADER.length, len);

	ByteArrayOutputStream os = new ByteArrayOutputStream();

	try (InputStream is = new BZip2CompressorInputStream(new ByteArrayInputStream(data)))
	{
		IOUtils.copy(is, os);
	}

	return os.toByteArray();
}
 
Example 10
Source Project: beam   Source File: AvroSource.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Decodes a byte array as an InputStream. The byte array may be compressed using some codec.
 * Reads from the returned stream will result in decompressed bytes.
 *
 * <p>This supports the same codecs as Avro's {@link CodecFactory}, namely those defined in
 * {@link DataFileConstants}.
 *
 * <ul>
 *   <li>"snappy" : Google's Snappy compression
 *   <li>"deflate" : deflate compression
 *   <li>"bzip2" : Bzip2 compression
 *   <li>"xz" : xz compression
 *   <li>"null" (the string, not the value): Uncompressed data
 * </ul>
 */
private static InputStream decodeAsInputStream(byte[] data, String codec) throws IOException {
  ByteArrayInputStream byteStream = new ByteArrayInputStream(data);
  switch (codec) {
    case DataFileConstants.SNAPPY_CODEC:
      return new SnappyCompressorInputStream(byteStream, 1 << 16 /* Avro uses 64KB blocks */);
    case DataFileConstants.DEFLATE_CODEC:
      // nowrap == true: Do not expect ZLIB header or checksum, as Avro does not write them.
      Inflater inflater = new Inflater(true);
      return new InflaterInputStream(byteStream, inflater);
    case DataFileConstants.XZ_CODEC:
      return new XZCompressorInputStream(byteStream);
    case DataFileConstants.BZIP2_CODEC:
      return new BZip2CompressorInputStream(byteStream);
    case DataFileConstants.NULL_CODEC:
      return byteStream;
    default:
      throw new IllegalArgumentException("Unsupported codec: " + codec);
  }
}
 
Example 11
public static void indexStreamMem(Indexer index, String url) throws IOException {
	Set<String> downloads = getDownloadsOfUrl(url, DOWNLOAD_SUFFIX);
	SameAsCollectorStreamMem sink = new SameAsCollectorStreamMem();
	for (String download : downloads) {
		URL streamUrl = new URL(download);

		try (InputStream fi = streamUrl.openStream();
				InputStream bi = new BufferedInputStream(fi);
				InputStream bzip2is = new BZip2CompressorInputStream(bi)) {
			LOGGER.info("Searching in {} ...", download);
			indexStreamMem(index, bzip2is, sink);
			LOGGER.info("...finished");
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}
 
Example 12
/**
 * File constructor with batchsize
 * @param path file to read from
 * @param batchsize the size of a batch
 */
public SinglestreamXmlDumpParser(File path, int batchsize) {
    this.pageInput = path;
    this.batchsize = batchsize;
    try {
        if(path.getAbsolutePath().toLowerCase().endsWith(".bz2")) {
            this.input = new BZip2CompressorInputStream(new FileInputStream(path), true);
        }
        else
        {
            this.input = new FileInputStream(path);
        }
    } catch (IOException e) {
        throw new IOError(e);
    }

    parser = new XmlDumpParser(input);

}
 
Example 13
private Header readHeader() throws IOException {
    byte[] header = next().buffer;

    ByteArrayInputStream bais = new ByteArrayInputStream(header);
    BZip2CompressorInputStream bcis = new BZip2CompressorInputStream(bais);

    StringBuilder sb = new StringBuilder();
    BufferedReader reader = new BufferedReader(new InputStreamReader(bcis,"UTF-8"));
    String line;
    while( (line = reader.readLine()) != null)
    {
        sb.append(line).append("\n");
    }

    sb.append("</mediawiki>");

    reader.close();

    try {
        return parseHeader(sb.toString());
    } catch (XMLStreamException e) {
        throw new IOError(e);
    }
}
 
Example 14
Source Project: Wikidata-Toolkit   Source File: DirectoryManagerImpl.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns an input stream that applies the required decompression to the
 * given input stream.
 *
 * @param inputStream
 *            the input stream with the (possibly compressed) data
 * @param compressionType
 *            the kind of compression
 * @return an input stream with decompressed data
 * @throws IOException
 *             if there was a problem creating the decompression streams
 */
protected InputStream getCompressorInputStream(InputStream inputStream,
		CompressionType compressionType) throws IOException {
	switch (compressionType) {
	case NONE:
		return inputStream;
	case GZIP:
		return new GZIPInputStream(inputStream);
	case BZ2:
		return new BZip2CompressorInputStream(new BufferedInputStream(
				inputStream));
	default:
		throw new IllegalArgumentException("Unsupported compression type: "
				+ compressionType);
	}
}
 
Example 15
@Test
public void export_compressed_bz2() throws Exception {

    new TableCreator(methodWatcher.getOrCreateConnection())
            .withCreate("create table export_compressed_bz2(a smallint,b double, c time,d varchar(20))")
            .withInsert("insert into export_compressed_bz2 values(?,?,?,?)")
            .withRows(getTestRows()).create();

    String exportSQL = buildExportSQL("select * from export_compressed_bz2 order by a asc", "BZ2");

    exportAndAssertExportResults(exportSQL, 6);
    File[] files = temporaryFolder.listFiles(new PatternFilenameFilter(".*csv.bz2"));
    assertEquals(1, files.length);
    assertEquals("" +
                    "25,3.14159,14:31:20,varchar1\n" +
                    "26,3.14159,14:31:20,varchar1\n" +
                    "27,3.14159,14:31:20,varchar1 space\n" +
                    "28,3.14159,14:31:20,\"varchar1 , comma\"\n" +
                    "29,3.14159,14:31:20,\"varchar1 \"\" quote\"\n" +
                    "30,3.14159,14:31:20,varchar1\n",
            IOUtils.toString(new BZip2CompressorInputStream(new FileInputStream(files[0]))));
}
 
Example 16
Source Project: myrrix-recommender   Source File: IOUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Opens an {@link InputStream} to the file. If it appears to be compressed, because its file name ends in
 * ".gz" or ".zip" or ".deflate", then it will be decompressed accordingly
 *
 * @param file file, possibly compressed, to open
 * @return {@link InputStream} on uncompressed contents
 * @throws IOException if the stream can't be opened or is invalid or can't be read
 */
public static InputStream openMaybeDecompressing(File file) throws IOException {
  String name = file.getName();
  InputStream in = new FileInputStream(file);
  if (name.endsWith(".gz")) {
    return new GZIPInputStream(in);
  }
  if (name.endsWith(".zip")) {
    return new ZipInputStream(in);
  }
  if (name.endsWith(".deflate")) {
    return new InflaterInputStream(in);
  }
  if (name.endsWith(".bz2") || name.endsWith(".bzip2")) {
    return new BZip2CompressorInputStream(in);
  }
  return in;
}
 
Example 17
Source Project: feast   Source File: BZip2Decompressor.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public T decompress(byte[] compressed) throws IOException {
  try (ByteArrayInputStream inputStream = new ByteArrayInputStream(compressed);
      BZip2CompressorInputStream bzip2Input = new BZip2CompressorInputStream(inputStream)) {
    return inputStreamConverter.readStream(bzip2Input);
  }
}
 
Example 18
Source Project: feast   Source File: BZip2CompressorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void shouldHavBZip2CompatibleOutput() throws IOException {
  BZip2Compressor<String> compressor = new BZip2Compressor<>(String::getBytes);
  String origString = "somestring";
  try (ByteArrayInputStream inputStream =
          new ByteArrayInputStream(compressor.compress(origString));
      BZip2CompressorInputStream bzip2Input = new BZip2CompressorInputStream(inputStream);
      BufferedReader reader = new BufferedReader(new InputStreamReader(bzip2Input))) {
    Assert.assertEquals(origString, reader.readLine());
  }
}
 
Example 19
Source Project: steady   Source File: ArchivePrinter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * It reads data from a an input file, then write and compress to a bzip2 file using BZip2CompressorOutputStream
 * If a special input file with repeating inputs is provided, the vulnerability in BZip2CompressorOutputStream will result in endless writing and consuming lots of resources
 * Please refer to https://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2012-2098
 * @param _in
 * @param _out
 * @throws Exception
 */
public static void compressExploitability(Path _in, Path _out) throws Exception {
	FileInputStream fin = new FileInputStream(_in.toString());
	BufferedInputStream in = new BufferedInputStream(fin);
	BZip2CompressorOutputStream out = new BZip2CompressorOutputStream(new FileOutputStream(_out.toString()));
	BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(in);
	final byte[] buffer = new byte[1024*10];
	int n = 0;
	while (-1 != (n = bzIn.read(buffer))) {
	    out.write(buffer, 0, n);
	}
	out.close();
	bzIn.close();
}
 
Example 20
Source Project: steady   Source File: ArchivePrinter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * It reads data from a an input file, then write and compress to a bzip2 file using BZip2CompressorOutputStream
 * If a special input file with repeating inputs is provided, the vulnerability in BZip2CompressorOutputStream will result in endless writing and consuming lots of resources
 * Please refer to https://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2012-2098
 * @param _in
 * @param _out
 * @throws Exception
 */
public static void compressExploitability(Path _in, Path _out) throws Exception {
	FileInputStream fin = new FileInputStream(_in.toString());
	BufferedInputStream in = new BufferedInputStream(fin);
	BZip2CompressorOutputStream out = new BZip2CompressorOutputStream(new FileOutputStream(_out.toString()));
	BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(in);
	final byte[] buffer = new byte[1024*10];
	int n = 0;
	while (-1 != (n = bzIn.read(buffer))) {
	    out.write(buffer, 0, n);
	}
	out.close();
	bzIn.close();
}
 
Example 21
Source Project: JQF   Source File: DecompressTest.java    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Fuzz
public void bzip2(InputStream in){
    byte[] destBuffer = new byte[1024];
    try {
        new BZip2CompressorInputStream(in)
            .read(destBuffer, 0, destBuffer.length);
    } catch (IOException e){
        // Ignore
    }

}
 
Example 22
Source Project: rscplus   Source File: BZip2.java    License: GNU General Public License v3.0 5 votes vote down vote up
public static byte[] decompress(byte data[], int offset, int length, int uncompressedLength) {
  byte uncompressedData[] = new byte[uncompressedLength];
  try {
    BZip2CompressorInputStream in =
        new BZip2CompressorInputStream(new ByteArrayInputStream(data, 2, length + 4));
    in.read(uncompressedData);
    in.close();
  } catch (Exception e) {
    return null;
  }
  return uncompressedData;
}
 
Example 23
Source Project: beam   Source File: FileBasedSinkTest.java    License: Apache License 2.0 5 votes vote down vote up
/** {@link Compression#BZIP2} correctly writes BZip2 data. */
@Test
public void testCompressionBZIP2() throws FileNotFoundException, IOException {
  final File file = writeValuesWithCompression(Compression.BZIP2, "abc", "123");
  // Read Bzip2ed data back in using Apache commons API (de facto standard).
  assertReadValues(
      new BufferedReader(
          new InputStreamReader(
              new BZip2CompressorInputStream(new FileInputStream(file)), StandardCharsets.UTF_8)),
      "abc",
      "123");
}
 
Example 24
Source Project: phoenicis   Source File: Tar.java    License: GNU Lesser General Public License v3.0 5 votes vote down vote up
List<File> uncompressTarBz2File(File inputFile, File outputDir, Consumer<ProgressEntity> stateCallback) {
    try (CountingInputStream countingInputStream = new CountingInputStream(new FileInputStream(inputFile));
            InputStream inputStream = new BZip2CompressorInputStream(countingInputStream)) {
        final long finalSize = FileUtils.sizeOf(inputFile);
        return uncompress(inputStream, countingInputStream, outputDir, finalSize, stateCallback);
    } catch (IOException e) {
        throw new ArchiveException(TAR_ERROR_MESSAGE, e);
    }
}
 
Example 25
Source Project: phoenicis   Source File: Tar.java    License: GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * Bunzip2 a file
 * 
 * @param inputFile
 *            source file
 * @param outputFile
 *            destionation file
 * @return the destionation file
 * @throws ArchiveException
 *             if any error occurs
 */
public File bunzip2(final File inputFile, final File outputFile) {
    LOGGER.info(
            String.format("Ungzipping %s to dir %s.", inputFile.getAbsolutePath(), outputFile.getAbsolutePath()));
    try (BZip2CompressorInputStream in = new BZip2CompressorInputStream(new FileInputStream(inputFile));
            FileOutputStream out = new FileOutputStream(outputFile)) {
        IOUtils.copy(in, out);
        return outputFile;
    } catch (IOException e) {
        throw new ArchiveException("Unable to gunzip file", e);
    }
}
 
Example 26
Source Project: p4ic4idea   Source File: P4ExtFileUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static void extractResource(@Nullable ClassLoader cl, @Nullable Object parentObject,
        @Nonnull String resourceLocation, @Nonnull File outputFile, boolean uncompress)
        throws IOException {
    // if (outputFile.exists()) {
    //     throw new IOException("Cannot overwrite existing file: " + outputFile);
    // }
    File parent = outputFile.getParentFile();
    if (parent != null && !parent.exists()) {
        if (!parent.mkdirs()) {
            throw new IOException("Could not create directory " + parent);
        }
    }
    InputStream inp = new BufferedInputStream(getStream(cl, parentObject, resourceLocation));
    if (uncompress) {
        if (resourceLocation.endsWith(".tar.bz2")) {
            extractArchive(new TarArchiveInputStream(new BZip2CompressorInputStream(inp)), outputFile);
            return;
        }
        if (resourceLocation.endsWith(".tar.xz")) {
            extractArchive(new TarArchiveInputStream(new XZCompressorInputStream(inp)), outputFile);
            return;
        }
        if (resourceLocation.endsWith(".tar.gz") || resourceLocation.endsWith(".tgz")) {
            extractArchive(new TarArchiveInputStream(new GzipCompressorInputStream(inp)), outputFile);
            return;
        }
        if (resourceLocation.endsWith(".tar")) {
            extractArchive(new TarArchiveInputStream(inp), outputFile);
            return;
        }
        if (resourceLocation.endsWith(".zip")) {
            extractArchive(new ZipArchiveInputStream(inp), outputFile);
            return;
        }
    }
    extractFile(inp, outputFile);
}
 
Example 27
public static void indexStream(Indexer index, String url) throws IOException, GerbilException {
	Set<String> downloads = getDownloadsOfUrl(url, DOWNLOAD_SUFFIX);
	String fileName = UUID.randomUUID().toString();
	SameAsCollectorStreamFile sink = new SameAsCollectorStreamFile(fileName);
	for (String download : downloads) {
		File current = null;
		try {
			LOGGER.info("Searching in {} ...", download);
			current = downloadUrl(new URL(download));
			try (InputStream fi = Files.newInputStream(current.toPath());
					InputStream bi = new BufferedInputStream(fi);
					InputStream bzip2is = new BZip2CompressorInputStream(bi)) {
				indexStream(index, bzip2is, sink);
				LOGGER.info("...finished");

			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (current != null)
				current.delete();
		}
	}
	sink.close();
	// sort that file
	File sorted = new File(sink.getFile().getName() + "_sorted");
	ExternalSort.mergeSortedFiles(ExternalSort.sortInBatch(sink.getFile()), sorted);
	// then index that file
	indexSortedFile(index, sorted.getAbsolutePath());
	sink.getFile().delete();
}
 
Example 28
public IndexReader(File indexFile, File pageFile, int bufferAhead) {
    try {
        this.pageFileSize = pageFile.length();
        this.buffer = new ArrayDeque<Block>();
        this.bufferAhead = bufferAhead;
        this.indexReader =
                new BufferedReader(
                    new InputStreamReader(
                        new BZip2CompressorInputStream(
                                new BufferedInputStream(
                                        new FileInputStream(indexFile)))));
    } catch (IOException e) {
        throw new IOError(e);
    }
}
 
Example 29
Source Project: Wikidata-Toolkit   Source File: MockDirectoryManager.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public InputStream getInputStreamForFile(String fileName,
		CompressionType compressionType) throws IOException {
	if (compressionType == CompressionType.GZIP) {
		return new GZIPInputStream(getInputStreamForMockFile(fileName));
	} else if (compressionType == CompressionType.BZ2) {
		return new BZip2CompressorInputStream(
				getInputStreamForMockFile(fileName));
	} else {
		return getInputStreamForMockFile(fileName);
	}
}
 
Example 30
private void extractBz2File(Path file, Path toDirectory) throws IOException {
    String extractedFilename = FilenameUtils.getBaseName(file.toString());
    Path fileToExtract = toDirectory.resolve(extractedFilename);
    try (FileInputStream fin = new FileInputStream(file.toFile())) {
        try (BufferedInputStream bin = new BufferedInputStream(fin)) {
            try (BZip2CompressorInputStream bzip2Archive = new BZip2CompressorInputStream(bin)) {
                Files.copy(bzip2Archive, fileToExtract);
            }
        }
    }
}