org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream Java Examples

The following examples show how to use org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestGribCompressByBit.java    From netcdf-java with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
byte[] uncompress(byte[] bdata) {
  ByteArrayOutputStream out = new ByteArrayOutputStream(20 * bdata.length);
  ByteArrayInputStream in = new ByteArrayInputStream(bdata);
  try (BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(in, false)) {
    int bytesRead;
    byte[] decoded = new byte[524288];
    while ((bytesRead = bzIn.read(decoded)) != -1) {
      out.write(decoded, 0, bytesRead);
    }
    out.close();

  } catch (Exception e) {
    e.printStackTrace();
  }

  return out.toByteArray();
}
 
Example #2
Source File: IOUtils.java    From myrrix-recommender with Apache License 2.0 6 votes vote down vote up
/**
 * Opens an {@link InputStream} to the file. If it appears to be compressed, because its file name ends in
 * ".gz" or ".zip" or ".deflate", then it will be decompressed accordingly
 *
 * @param file file, possibly compressed, to open
 * @return {@link InputStream} on uncompressed contents
 * @throws IOException if the stream can't be opened or is invalid or can't be read
 */
public static InputStream openMaybeDecompressing(File file) throws IOException {
  String name = file.getName();
  InputStream in = new FileInputStream(file);
  if (name.endsWith(".gz")) {
    return new GZIPInputStream(in);
  }
  if (name.endsWith(".zip")) {
    return new ZipInputStream(in);
  }
  if (name.endsWith(".deflate")) {
    return new InflaterInputStream(in);
  }
  if (name.endsWith(".bz2") || name.endsWith(".bzip2")) {
    return new BZip2CompressorInputStream(in);
  }
  return in;
}
 
Example #3
Source File: AvroSource.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Decodes a byte array as an InputStream. The byte array may be compressed using some codec.
 * Reads from the returned stream will result in decompressed bytes.
 *
 * <p>This supports the same codecs as Avro's {@link CodecFactory}, namely those defined in
 * {@link DataFileConstants}.
 *
 * <ul>
 *   <li>"snappy" : Google's Snappy compression
 *   <li>"deflate" : deflate compression
 *   <li>"bzip2" : Bzip2 compression
 *   <li>"xz" : xz compression
 *   <li>"null" (the string, not the value): Uncompressed data
 * </ul>
 */
private static InputStream decodeAsInputStream(byte[] data, String codec) throws IOException {
  ByteArrayInputStream byteStream = new ByteArrayInputStream(data);
  switch (codec) {
    case DataFileConstants.SNAPPY_CODEC:
      return new SnappyCompressorInputStream(byteStream, 1 << 16 /* Avro uses 64KB blocks */);
    case DataFileConstants.DEFLATE_CODEC:
      // nowrap == true: Do not expect ZLIB header or checksum, as Avro does not write them.
      Inflater inflater = new Inflater(true);
      return new InflaterInputStream(byteStream, inflater);
    case DataFileConstants.XZ_CODEC:
      return new XZCompressorInputStream(byteStream);
    case DataFileConstants.BZIP2_CODEC:
      return new BZip2CompressorInputStream(byteStream);
    case DataFileConstants.NULL_CODEC:
      return byteStream;
    default:
      throw new IllegalArgumentException("Unsupported codec: " + codec);
  }
}
 
Example #4
Source File: BZip2.java    From runelite with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public static byte[] decompress(byte[] bytes, int len) throws IOException
{
	byte[] data = new byte[len + BZIP_HEADER.length];

	// add header
	System.arraycopy(BZIP_HEADER, 0, data, 0, BZIP_HEADER.length);
	System.arraycopy(bytes, 0, data, BZIP_HEADER.length, len);

	ByteArrayOutputStream os = new ByteArrayOutputStream();

	try (InputStream is = new BZip2CompressorInputStream(new ByteArrayInputStream(data)))
	{
		IOUtils.copy(is, os);
	}

	return os.toByteArray();
}
 
Example #5
Source File: TestLineRecordReader.java    From big-c with Apache License 2.0 6 votes vote down vote up
public String[] readRecordsDirectly(URL testFileUrl, boolean bzip)
    throws IOException {
  int MAX_DATA_SIZE = 1024 * 1024;
  byte[] data = new byte[MAX_DATA_SIZE];
  FileInputStream fis = new FileInputStream(testFileUrl.getFile());
  int count;
  if (bzip) {
    BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(fis);
    count = bzIn.read(data);
    bzIn.close();
  } else {
    count = fis.read(data);
  }
  fis.close();
  assertTrue("Test file data too big for buffer", count < data.length);
  return new String(data, 0, count, "UTF-8").split("\n");
}
 
Example #6
Source File: TestLineRecordReader.java    From big-c with Apache License 2.0 6 votes vote down vote up
public String[] readRecordsDirectly(URL testFileUrl, boolean bzip)
    throws IOException {
  int MAX_DATA_SIZE = 1024 * 1024;
  byte[] data = new byte[MAX_DATA_SIZE];
  FileInputStream fis = new FileInputStream(testFileUrl.getFile());
  int count;
  if (bzip) {
    BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(fis);
    count = bzIn.read(data);
    bzIn.close();
  } else {
    count = fis.read(data);
  }
  fis.close();
  assertTrue("Test file data too big for buffer", count < data.length);
  return new String(data, 0, count, "UTF-8").split("\n");
}
 
Example #7
Source File: InitialIndexTool.java    From gerbil with GNU Affero General Public License v3.0 6 votes vote down vote up
public static void indexStreamMem(Indexer index, String url) throws IOException {
	Set<String> downloads = getDownloadsOfUrl(url, DOWNLOAD_SUFFIX);
	SameAsCollectorStreamMem sink = new SameAsCollectorStreamMem();
	for (String download : downloads) {
		URL streamUrl = new URL(download);

		try (InputStream fi = streamUrl.openStream();
				InputStream bi = new BufferedInputStream(fi);
				InputStream bzip2is = new BZip2CompressorInputStream(bi)) {
			LOGGER.info("Searching in {} ...", download);
			indexStreamMem(index, bzip2is, sink);
			LOGGER.info("...finished");
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}
 
Example #8
Source File: TestLineRecordReader.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public String[] readRecordsDirectly(URL testFileUrl, boolean bzip)
    throws IOException {
  int MAX_DATA_SIZE = 1024 * 1024;
  byte[] data = new byte[MAX_DATA_SIZE];
  FileInputStream fis = new FileInputStream(testFileUrl.getFile());
  int count;
  if (bzip) {
    BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(fis);
    count = bzIn.read(data);
    bzIn.close();
  } else {
    count = fis.read(data);
  }
  fis.close();
  assertTrue("Test file data too big for buffer", count < data.length);
  return new String(data, 0, count, "UTF-8").split("\n");
}
 
Example #9
Source File: TestLineRecordReader.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public String[] readRecordsDirectly(URL testFileUrl, boolean bzip)
    throws IOException {
  int MAX_DATA_SIZE = 1024 * 1024;
  byte[] data = new byte[MAX_DATA_SIZE];
  FileInputStream fis = new FileInputStream(testFileUrl.getFile());
  int count;
  if (bzip) {
    BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(fis);
    count = bzIn.read(data);
    bzIn.close();
  } else {
    count = fis.read(data);
  }
  fis.close();
  assertTrue("Test file data too big for buffer", count < data.length);
  return new String(data, 0, count, "UTF-8").split("\n");
}
 
Example #10
Source File: CompressUtils.java    From spring-boot-doma2-sample with Apache License 2.0 6 votes vote down vote up
/**
 * 入力したバイト配列をBZip2で展開して返します。
 * 
 * @param input
 * @return
 */
public static byte[] decompress(byte[] input) {
    ByteArrayOutputStream ref = null;

    try (val bais = new ByteArrayInputStream(input);
            val bzip2cis = new BZip2CompressorInputStream(bais);
            val baos = new ByteArrayOutputStream()) {
        IOUtils.copy(bzip2cis, baos);
        ref = baos;
    } catch (IOException e) {
        log.error("failed to decode.", e);
        throw new RuntimeException(e);
    }

    return ref.toByteArray();
}
 
Example #11
Source File: SinglestreamXmlDumpParser.java    From wikiforia with GNU General Public License v2.0 6 votes vote down vote up
/**
 * File constructor with batchsize
 * @param path file to read from
 * @param batchsize the size of a batch
 */
public SinglestreamXmlDumpParser(File path, int batchsize) {
    this.pageInput = path;
    this.batchsize = batchsize;
    try {
        if(path.getAbsolutePath().toLowerCase().endsWith(".bz2")) {
            this.input = new BZip2CompressorInputStream(new FileInputStream(path), true);
        }
        else
        {
            this.input = new FileInputStream(path);
        }
    } catch (IOException e) {
        throw new IOError(e);
    }

    parser = new XmlDumpParser(input);

}
 
Example #12
Source File: Bzip2Compress.java    From compress with MIT License 6 votes vote down vote up
@Override
public byte[] uncompress(byte[] data) throws IOException {
	ByteArrayOutputStream out = new ByteArrayOutputStream();
	ByteArrayInputStream in = new ByteArrayInputStream(data);

	try {
		@SuppressWarnings("resource")
		BZip2CompressorInputStream ungzip = new BZip2CompressorInputStream(in);
		byte[] buffer = new byte[2048];
		int n;
		while ((n = ungzip.read(buffer)) >= 0) {
			out.write(buffer, 0, n);
		}
	} catch (IOException e) {
		e.printStackTrace();
	}

	return out.toByteArray();
}
 
Example #13
Source File: TextFileReader.java    From kafka-connect-fs with Apache License 2.0 6 votes vote down vote up
private Reader getFileReader(InputStream inputStream) throws IOException {
    final InputStreamReader isr;
    switch (this.compression) {
        case BZIP2:
            isr = new InputStreamReader(new BZip2CompressorInputStream(inputStream,
                    this.compression.isConcatenated()), this.charset);
            break;
        case GZIP:
            isr = new InputStreamReader(new GzipCompressorInputStream(inputStream,
                    this.compression.isConcatenated()), this.charset);
            break;
        default:
            isr = new InputStreamReader(inputStream, this.charset);
            break;
    }
    return isr;
}
 
Example #14
Source File: MultistreamBzip2XmlDumpParser.java    From wikiforia with GNU General Public License v2.0 6 votes vote down vote up
private Header readHeader() throws IOException {
    byte[] header = next().buffer;

    ByteArrayInputStream bais = new ByteArrayInputStream(header);
    BZip2CompressorInputStream bcis = new BZip2CompressorInputStream(bais);

    StringBuilder sb = new StringBuilder();
    BufferedReader reader = new BufferedReader(new InputStreamReader(bcis,"UTF-8"));
    String line;
    while( (line = reader.readLine()) != null)
    {
        sb.append(line).append("\n");
    }

    sb.append("</mediawiki>");

    reader.close();

    try {
        return parseHeader(sb.toString());
    } catch (XMLStreamException e) {
        throw new IOError(e);
    }
}
 
Example #15
Source File: DirectoryManagerImpl.java    From Wikidata-Toolkit with Apache License 2.0 6 votes vote down vote up
/**
 * Returns an input stream that applies the required decompression to the
 * given input stream.
 *
 * @param inputStream
 *            the input stream with the (possibly compressed) data
 * @param compressionType
 *            the kind of compression
 * @return an input stream with decompressed data
 * @throws IOException
 *             if there was a problem creating the decompression streams
 */
protected InputStream getCompressorInputStream(InputStream inputStream,
		CompressionType compressionType) throws IOException {
	switch (compressionType) {
	case NONE:
		return inputStream;
	case GZIP:
		return new GZIPInputStream(inputStream);
	case BZ2:
		return new BZip2CompressorInputStream(new BufferedInputStream(
				inputStream));
	default:
		throw new IllegalArgumentException("Unsupported compression type: "
				+ compressionType);
	}
}
 
Example #16
Source File: ExportOperationIT.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
@Test
public void export_compressed_bz2() throws Exception {

    new TableCreator(methodWatcher.getOrCreateConnection())
            .withCreate("create table export_compressed_bz2(a smallint,b double, c time,d varchar(20))")
            .withInsert("insert into export_compressed_bz2 values(?,?,?,?)")
            .withRows(getTestRows()).create();

    String exportSQL = buildExportSQL("select * from export_compressed_bz2 order by a asc", "BZ2");

    exportAndAssertExportResults(exportSQL, 6);
    File[] files = temporaryFolder.listFiles(new PatternFilenameFilter(".*csv.bz2"));
    assertEquals(1, files.length);
    assertEquals("" +
                    "25,3.14159,14:31:20,varchar1\n" +
                    "26,3.14159,14:31:20,varchar1\n" +
                    "27,3.14159,14:31:20,varchar1 space\n" +
                    "28,3.14159,14:31:20,\"varchar1 , comma\"\n" +
                    "29,3.14159,14:31:20,\"varchar1 \"\" quote\"\n" +
                    "30,3.14159,14:31:20,varchar1\n",
            IOUtils.toString(new BZip2CompressorInputStream(new FileInputStream(files[0]))));
}
 
Example #17
Source File: MultistreamBzip2XmlDumpParser.java    From wikiforia with GNU General Public License v2.0 5 votes vote down vote up
public IndexReader(File indexFile, File pageFile, int bufferAhead) {
    try {
        this.pageFileSize = pageFile.length();
        this.buffer = new ArrayDeque<Block>();
        this.bufferAhead = bufferAhead;
        this.indexReader =
                new BufferedReader(
                    new InputStreamReader(
                        new BZip2CompressorInputStream(
                                new BufferedInputStream(
                                        new FileInputStream(indexFile)))));
    } catch (IOException e) {
        throw new IOError(e);
    }
}
 
Example #18
Source File: MockDirectoryManager.java    From Wikidata-Toolkit with Apache License 2.0 5 votes vote down vote up
@Override
public InputStream getInputStreamForFile(String fileName,
		CompressionType compressionType) throws IOException {
	if (compressionType == CompressionType.GZIP) {
		return new GZIPInputStream(getInputStreamForMockFile(fileName));
	} else if (compressionType == CompressionType.BZ2) {
		return new BZip2CompressorInputStream(
				getInputStreamForMockFile(fileName));
	} else {
		return getInputStreamForMockFile(fileName);
	}
}
 
Example #19
Source File: FileExtractorImpl.java    From webdriverextensions-maven-plugin with Apache License 2.0 5 votes vote down vote up
private void extractBz2File(Path file, Path toDirectory) throws IOException {
    String extractedFilename = FilenameUtils.getBaseName(file.toString());
    Path fileToExtract = toDirectory.resolve(extractedFilename);
    try (FileInputStream fin = new FileInputStream(file.toFile())) {
        try (BufferedInputStream bin = new BufferedInputStream(fin)) {
            try (BZip2CompressorInputStream bzip2Archive = new BZip2CompressorInputStream(bin)) {
                Files.copy(bzip2Archive, fileToExtract);
            }
        }
    }
}
 
Example #20
Source File: FileExtractorImpl.java    From webdriverextensions-maven-plugin with Apache License 2.0 5 votes vote down vote up
private void extractTarBz2File(Path file, Path toDirectory) throws IOException {
    Files.createDirectories(toDirectory);
    try (FileInputStream fin = new FileInputStream(file.toFile())) {
        try (BufferedInputStream bin = new BufferedInputStream(fin)) {
            try (BZip2CompressorInputStream bzip2Archive = new BZip2CompressorInputStream(bin)) {
                try (TarArchiveInputStream tarArchive = new TarArchiveInputStream(bzip2Archive)) {
                    extractTar(toDirectory, tarArchive);
                }
            }
        }
    }
}
 
Example #21
Source File: InitialIndexTool.java    From gerbil with GNU Affero General Public License v3.0 5 votes vote down vote up
public static void indexStream(Indexer index, String url) throws IOException, GerbilException {
	Set<String> downloads = getDownloadsOfUrl(url, DOWNLOAD_SUFFIX);
	String fileName = UUID.randomUUID().toString();
	SameAsCollectorStreamFile sink = new SameAsCollectorStreamFile(fileName);
	for (String download : downloads) {
		File current = null;
		try {
			LOGGER.info("Searching in {} ...", download);
			current = downloadUrl(new URL(download));
			try (InputStream fi = Files.newInputStream(current.toPath());
					InputStream bi = new BufferedInputStream(fi);
					InputStream bzip2is = new BZip2CompressorInputStream(bi)) {
				indexStream(index, bzip2is, sink);
				LOGGER.info("...finished");

			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (current != null)
				current.delete();
		}
	}
	sink.close();
	// sort that file
	File sorted = new File(sink.getFile().getName() + "_sorted");
	ExternalSort.mergeSortedFiles(ExternalSort.sortInBatch(sink.getFile()), sorted);
	// then index that file
	indexSortedFile(index, sorted.getAbsolutePath());
	sink.getFile().delete();
}
 
Example #22
Source File: TarBz2Function.java    From bazel with Apache License 2.0 5 votes vote down vote up
@Override
protected InputStream getDecompressorStream(DecompressorDescriptor descriptor)
    throws IOException {
  return new BZip2CompressorInputStream(
      new BufferedInputStream(
          new FileInputStream(descriptor.archivePath().getPathFile()), BUFFER_SIZE));
}
 
Example #23
Source File: Tar.java    From writelatex-git-bridge with MIT License 5 votes vote down vote up
public static void unzip(
        InputStream tarbz2,
        File parentDir
) throws IOException {
    /* BZip2CompressorInputStream does not need closing
       Closing it would close tarbz2 which we should not do */
    InputStream tar = new BZip2CompressorInputStream(tarbz2);
    untar(tar, parentDir);
}
 
Example #24
Source File: P4ExtFileUtils.java    From p4ic4idea with Apache License 2.0 5 votes vote down vote up
public static void extractResource(@Nullable ClassLoader cl, @Nullable Object parentObject,
        @Nonnull String resourceLocation, @Nonnull File outputFile, boolean uncompress)
        throws IOException {
    // if (outputFile.exists()) {
    //     throw new IOException("Cannot overwrite existing file: " + outputFile);
    // }
    File parent = outputFile.getParentFile();
    if (parent != null && !parent.exists()) {
        if (!parent.mkdirs()) {
            throw new IOException("Could not create directory " + parent);
        }
    }
    InputStream inp = new BufferedInputStream(getStream(cl, parentObject, resourceLocation));
    if (uncompress) {
        if (resourceLocation.endsWith(".tar.bz2")) {
            extractArchive(new TarArchiveInputStream(new BZip2CompressorInputStream(inp)), outputFile);
            return;
        }
        if (resourceLocation.endsWith(".tar.xz")) {
            extractArchive(new TarArchiveInputStream(new XZCompressorInputStream(inp)), outputFile);
            return;
        }
        if (resourceLocation.endsWith(".tar.gz") || resourceLocation.endsWith(".tgz")) {
            extractArchive(new TarArchiveInputStream(new GzipCompressorInputStream(inp)), outputFile);
            return;
        }
        if (resourceLocation.endsWith(".tar")) {
            extractArchive(new TarArchiveInputStream(inp), outputFile);
            return;
        }
        if (resourceLocation.endsWith(".zip")) {
            extractArchive(new ZipArchiveInputStream(inp), outputFile);
            return;
        }
    }
    extractFile(inp, outputFile);
}
 
Example #25
Source File: WikipediaArticleReader.java    From json-wikipedia with Apache License 2.0 5 votes vote down vote up
private static BufferedReader getPlainOrCompressedReader(InputStream stream, String filename)
    throws IOException {
  if (filename.endsWith(".gz")) {
    return new BufferedReader(new InputStreamReader(new GZIPInputStream(stream)));
  }
  if (filename.endsWith(".bz2")) {
    return new BufferedReader(new InputStreamReader(new BZip2CompressorInputStream(stream)));
  }
  return new BufferedReader(new InputStreamReader(stream));
}
 
Example #26
Source File: FileBasedSinkTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** {@link Compression#BZIP2} correctly writes BZip2 data. */
@Test
public void testCompressionBZIP2() throws FileNotFoundException, IOException {
  final File file = writeValuesWithCompression(Compression.BZIP2, "abc", "123");
  // Read Bzip2ed data back in using Apache commons API (de facto standard).
  assertReadValues(
      new BufferedReader(
          new InputStreamReader(
              new BZip2CompressorInputStream(new FileInputStream(file)), StandardCharsets.UTF_8)),
      "abc",
      "123");
}
 
Example #27
Source File: Tar.java    From phoenicis with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * Bunzip2 a file
 * 
 * @param inputFile
 *            source file
 * @param outputFile
 *            destionation file
 * @return the destionation file
 * @throws ArchiveException
 *             if any error occurs
 */
public File bunzip2(final File inputFile, final File outputFile) {
    LOGGER.info(
            String.format("Ungzipping %s to dir %s.", inputFile.getAbsolutePath(), outputFile.getAbsolutePath()));
    try (BZip2CompressorInputStream in = new BZip2CompressorInputStream(new FileInputStream(inputFile));
            FileOutputStream out = new FileOutputStream(outputFile)) {
        IOUtils.copy(in, out);
        return outputFile;
    } catch (IOException e) {
        throw new ArchiveException("Unable to gunzip file", e);
    }
}
 
Example #28
Source File: Tar.java    From phoenicis with GNU Lesser General Public License v3.0 5 votes vote down vote up
List<File> uncompressTarBz2File(File inputFile, File outputDir, Consumer<ProgressEntity> stateCallback) {
    try (CountingInputStream countingInputStream = new CountingInputStream(new FileInputStream(inputFile));
            InputStream inputStream = new BZip2CompressorInputStream(countingInputStream)) {
        final long finalSize = FileUtils.sizeOf(inputFile);
        return uncompress(inputStream, countingInputStream, outputDir, finalSize, stateCallback);
    } catch (IOException e) {
        throw new ArchiveException(TAR_ERROR_MESSAGE, e);
    }
}
 
Example #29
Source File: BZip2.java    From rscplus with GNU General Public License v3.0 5 votes vote down vote up
public static byte[] decompress(byte data[], int offset, int length, int uncompressedLength) {
  byte uncompressedData[] = new byte[uncompressedLength];
  try {
    BZip2CompressorInputStream in =
        new BZip2CompressorInputStream(new ByteArrayInputStream(data, 2, length + 4));
    in.read(uncompressedData);
    in.close();
  } catch (Exception e) {
    return null;
  }
  return uncompressedData;
}
 
Example #30
Source File: DecompressTest.java    From JQF with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Fuzz
public void bzip2(InputStream in){
    byte[] destBuffer = new byte[1024];
    try {
        new BZip2CompressorInputStream(in)
            .read(destBuffer, 0, destBuffer.length);
    } catch (IOException e){
        // Ignore
    }

}