org.apache.hadoop.io.compress.CompressionCodecFactory Java Examples

The following examples show how to use org.apache.hadoop.io.compress.CompressionCodecFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestInsertQuery.java    From tajo with Apache License 2.0 6 votes vote down vote up
@Test
public final void testInsertOverwriteWithCompression() throws Exception {
  String tableName = IdentifierUtil.normalizeIdentifier("testInsertOverwriteWithCompression");
  ResultSet res = executeFile("testInsertOverwriteWithCompression_ddl.sql");
  res.close();

  CatalogService catalog = testingCluster.getMaster().getCatalog();
  assertTrue(catalog.existsTable(getCurrentDatabase(), tableName));

  res = executeQuery();
  res.close();
  TableDesc desc = catalog.getTableDesc(getCurrentDatabase(), tableName);
  if (!testingCluster.isHiveCatalogStoreRunning()) {
    assertEquals(2, desc.getStats().getNumRows().intValue());
  }

  FileSystem fs = FileSystem.get(testingCluster.getConfiguration());
  assertTrue(fs.exists(new Path(desc.getUri())));
  CompressionCodecFactory factory = new CompressionCodecFactory(testingCluster.getConfiguration());

  for (FileStatus file : fs.listStatus(new Path(desc.getUri()))) {
    CompressionCodec codec = factory.getCodec(file.getPath());
    assertTrue(codec instanceof DeflateCodec);
  }
  executeString("DROP TABLE " + tableName + " PURGE");
}
 
Example #2
Source File: WholeTextInputFormat.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (currentPath>=split.getNumPaths()) {
        return false;
    }

    Path path = split.getPath(currentPath);
    currentPath++;

    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    CompressionCodec codec = factory.getCodec(path);
    key = path.toString();
    FSDataInputStream fileIn = fs.open(path);

    value = codec!=null?codec.createInputStream(fileIn):fileIn;
    return true;
}
 
Example #3
Source File: JsonObjectMapperParser.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 * 
 * @param path 
 *          Path to the JSON data file, possibly compressed.
 * @param conf
 * @throws IOException
 */
public JsonObjectMapperParser(Path path, Class<? extends T> clazz,
    Configuration conf) throws IOException {
  mapper = new ObjectMapper();
  mapper.configure(
      DeserializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
  this.clazz = clazz;
  FileSystem fs = path.getFileSystem(conf);
  CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path);
  InputStream input;
  if (codec == null) {
    input = fs.open(path);
    decompressor = null;
  } else {
    FSDataInputStream fsdis = fs.open(path);
    decompressor = CodecPool.getDecompressor(codec);
    input = codec.createInputStream(fsdis, decompressor);
  }
  jsonParser = mapper.getJsonFactory().createJsonParser(input);
}
 
Example #4
Source File: XmlInputFormat.java    From Hive-XML-SerDe with Apache License 2.0 6 votes vote down vote up
public XmlRecordReader(FileSplit input, JobConf jobConf) throws IOException {
    Configuration conf = jobConf;
    this.startTag = conf.get(START_TAG_KEY).getBytes("utf-8");
    this.endTag = conf.get(END_TAG_KEY).getBytes("utf-8");
    FileSplit split = (FileSplit) input;

    Path file = split.getPath();
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf);
    CompressionCodec codec = compressionCodecs.getCodec(file);
    FileSystem fs = file.getFileSystem(conf);
    if (codec != null) {
        this.fsin = new DataInputStream(codec.createInputStream(fs.open(file)));
        //Data read only happens in first split and invalid other splits.
        //This is to avoid reading duplicate data for compressed files.
        this.start = (split.getStart() == 0) ? 0 : Long.MAX_VALUE;
        this.end = Long.MAX_VALUE;
    } else {
        this.start = split.getStart();
        this.end = this.start + split.getLength();
        FSDataInputStream fileIn = fs.open(file);
        fileIn.seek(this.start);
        this.fsin = fileIn;
    }
    this.recordStartPos = this.start;
    this.pos = this.start;
}
 
Example #5
Source File: Anonymizer.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private JsonGenerator createJsonGenerator(Configuration conf, Path path) 
throws IOException {
  FileSystem outFS = path.getFileSystem(conf);
  CompressionCodec codec =
    new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  Compressor compressor = null;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(outFS.create(path), compressor);
  } else {
    output = outFS.create(path);
  }

  JsonGenerator outGen = outFactory.createJsonGenerator(output, 
                                                        JsonEncoding.UTF8);
  outGen.useDefaultPrettyPrinter();
  
  return outGen;
}
 
Example #6
Source File: PossiblyDecompressedInputStream.java    From big-c with Apache License 2.0 6 votes vote down vote up
public PossiblyDecompressedInputStream(Path inputPath, Configuration conf)
    throws IOException {
  CompressionCodecFactory codecs = new CompressionCodecFactory(conf);
  CompressionCodec inputCodec = codecs.getCodec(inputPath);

  FileSystem ifs = inputPath.getFileSystem(conf);
  FSDataInputStream fileIn = ifs.open(inputPath);

  if (inputCodec == null) {
    decompressor = null;
    coreInputStream = fileIn;
  } else {
    decompressor = CodecPool.getDecompressor(inputCodec);
    coreInputStream = inputCodec.createInputStream(fileIn, decompressor);
  }
}
 
Example #7
Source File: PossiblyDecompressedInputStream.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public PossiblyDecompressedInputStream(Path inputPath, Configuration conf)
    throws IOException {
  CompressionCodecFactory codecs = new CompressionCodecFactory(conf);
  CompressionCodec inputCodec = codecs.getCodec(inputPath);

  FileSystem ifs = inputPath.getFileSystem(conf);
  FSDataInputStream fileIn = ifs.open(inputPath);

  if (inputCodec == null) {
    decompressor = null;
    coreInputStream = fileIn;
  } else {
    decompressor = CodecPool.getDecompressor(inputCodec);
    coreInputStream = inputCodec.createInputStream(fileIn, decompressor);
  }
}
 
Example #8
Source File: Anonymizer.java    From big-c with Apache License 2.0 6 votes vote down vote up
private JsonGenerator createJsonGenerator(Configuration conf, Path path) 
throws IOException {
  FileSystem outFS = path.getFileSystem(conf);
  CompressionCodec codec =
    new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  Compressor compressor = null;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(outFS.create(path), compressor);
  } else {
    output = outFS.create(path);
  }

  JsonGenerator outGen = outFactory.createJsonGenerator(output, 
                                                        JsonEncoding.UTF8);
  outGen.useDefaultPrettyPrinter();
  
  return outGen;
}
 
Example #9
Source File: TestBootstrapStandbyWithBKJM.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 5);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, BKJMUtil
      .createJournalURI("/bootstrapStandby").toString());
  BKJMUtil.addJournalManagerDefinition(conf);
  conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true);
  conf.set(DFSConfigKeys.DFS_IMAGE_COMPRESSION_CODEC_KEY,
      SlowCodec.class.getCanonicalName());
  CompressionCodecFactory.setCodecClasses(conf,
      ImmutableList.<Class> of(SlowCodec.class));
  MiniDFSNNTopology topology = new MiniDFSNNTopology()
      .addNameservice(new MiniDFSNNTopology.NSConf("ns1").addNN(
          new MiniDFSNNTopology.NNConf("nn1").setHttpPort(10001)).addNN(
          new MiniDFSNNTopology.NNConf("nn2").setHttpPort(10002)));
  cluster = new MiniDFSCluster.Builder(conf).nnTopology(topology)
      .numDataNodes(1).manageNameDfsSharedDirs(false).build();
  cluster.waitActive();
}
 
Example #10
Source File: TestStandbyCheckpoints.java    From hadoop with Apache License 2.0 6 votes vote down vote up
protected Configuration setupCommonConfig() {
  tmpOivImgDir = Files.createTempDir();

  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 5);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  conf.set(DFSConfigKeys.DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY,
      tmpOivImgDir.getAbsolutePath());
  conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true);
  conf.set(DFSConfigKeys.DFS_IMAGE_COMPRESSION_CODEC_KEY,
      SlowCodec.class.getCanonicalName());
  CompressionCodecFactory.setCodecClasses(conf,
      ImmutableList.<Class>of(SlowCodec.class));
  return conf;
}
 
Example #11
Source File: TestInsertQuery.java    From tajo with Apache License 2.0 6 votes vote down vote up
@Test
public final void testInsertOverwriteLocationWithCompression() throws Exception {
  if (!testingCluster.isHiveCatalogStoreRunning()) {
    ResultSet res = executeQuery();
    res.close();
    FileSystem fs = FileSystem.get(testingCluster.getConfiguration());
    Path path = new Path("/tajo-data/testInsertOverwriteLocationWithCompression");
    assertTrue(fs.exists(path));
    assertEquals(1, fs.listStatus(path).length);

    CompressionCodecFactory factory = new CompressionCodecFactory(testingCluster.getConfiguration());
    for (FileStatus file : fs.listStatus(path)){
      CompressionCodec codec = factory.getCodec(file.getPath());
      assertTrue(codec instanceof DeflateCodec);
    }
  }
}
 
Example #12
Source File: GryoRecordReader.java    From tinkerpop with Apache License 2.0 6 votes vote down vote up
@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
    final FileSplit split = (FileSplit) genericSplit;
    final Configuration configuration = context.getConfiguration();
    if (configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null)
        this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER);
    this.gryoReader = GryoReader.build().mapper(
            GryoMapper.build().addRegistries(IoRegistryHelper.createRegistries(ConfUtil.makeApacheConfiguration(configuration))).create()).create();
    long start = split.getStart();
    final Path file = split.getPath();
    if (null != new CompressionCodecFactory(configuration).getCodec(file)) {
        throw new IllegalStateException("Compression is not supported for the (binary) Gryo format");
    }
    // open the file and seek to the start of the split
    this.inputStream = file.getFileSystem(configuration).open(split.getPath());
    this.splitLength = split.getLength();
    if (this.splitLength > 0) this.splitLength -= (seekToHeader(this.inputStream, start) - start);
}
 
Example #13
Source File: Spark2BitcoinBlockCounterSparkMasterIntegrationTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
private InputStream openFile(Path path) throws IOException {
        CompressionCodec codec=new CompressionCodecFactory(conf).getCodec(path);
 	FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
	// check if compressed
	if (codec==null) { // uncompressed
		return fileIn;
	} else { // compressed
		Decompressor decompressor = CodecPool.getDecompressor(codec);
		this.openDecompressors.add(decompressor); // to be returned later using close
		if (codec instanceof SplittableCompressionCodec) {
			long end = dfsCluster.getFileSystem().getFileStatus(path).getLen(); 
        		final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
					return cIn;
      		} else {
        		return codec.createInputStream(fileIn,decompressor);
      		}
	}
}
 
Example #14
Source File: MapReduceBitcoinBlockIntegrationTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
private InputStream openFile(Path path) throws IOException {
        CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
 	FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
	// check if compressed
	if (codec==null) { // uncompressed
		return fileIn;
	} else { // compressed
		Decompressor decompressor = CodecPool.getDecompressor(codec);
		this.openDecompressors.add(decompressor); // to be returned later using close
		if (codec instanceof SplittableCompressionCodec) {
			long end = dfsCluster.getFileSystem().getFileStatus(path).getLen(); 
        		final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
					return cIn;
      		} else {
        		return codec.createInputStream(fileIn,decompressor);
      		}
	}
}
 
Example #15
Source File: CompressionFactoryITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteCompressedFile() throws Exception {
	final File folder = TEMPORARY_FOLDER.newFolder();
	final Path testPath = Path.fromLocalFile(folder);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(100);

	DataStream<String> stream = env.addSource(
			new FiniteTestSource<>(testData),
			TypeInformation.of(String.class)
	);

	stream.map(str -> str).addSink(
			StreamingFileSink.forBulkFormat(
					testPath,
					CompressWriters.forExtractor(new DefaultExtractor<String>()).withHadoopCompression(TEST_CODEC_NAME)
			).build());

	env.execute();

	validateResults(folder, testData, new CompressionCodecFactory(configuration).getCodecByName(TEST_CODEC_NAME));
}
 
Example #16
Source File: AbstractEthereumRecordReader.java    From hadoopcryptoledger with Apache License 2.0 5 votes vote down vote up
/***
 * Initializes readers
 * 
 * @param split Split to be used (asssumed to be a file split)
 * ϟaram context context of the job
 * @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop
 * @throws java.lang.InterruptedException in case of thread interruption
 * 
 */

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
	   FileSplit fSplit = (FileSplit)split;
	   // Initialize start and end of split
	      start = fSplit.getStart();
	      end = start + fSplit.getLength();
	      final Path file = fSplit.getPath();
	      codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
	      final FileSystem fs = file.getFileSystem(context.getConfiguration());
	      FSDataInputStream fileIn = fs.open(file);
	      // open stream
	        if (isCompressedInput()) { // decompress
	        	decompressor = CodecPool.getDecompressor(codec);
	        	if (codec instanceof SplittableCompressionCodec) {
	  		
	          	final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
	  				ebr = new EthereumBlockReader(cIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
	  				start = cIn.getAdjustedStart();
	         		end = cIn.getAdjustedEnd();
	          	filePosition = cIn; // take pos from compressed stream
	        } else {
	        	ebr = new EthereumBlockReader(codec.createInputStream(fileIn,decompressor), this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
	        	filePosition = fileIn;
	        }
	      } else {
	        fileIn.seek(start);
	        ebr = new EthereumBlockReader(fileIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
	        filePosition = fileIn;
	      }
}
 
Example #17
Source File: JSONFileRecordReader.java    From ojai with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InputSplit arg0, TaskAttemptContext taskContext)
    throws IOException, InterruptedException {

  documentStream = null;
  it = null;
  documentCount = 0;
  key = new LongWritable();
  document = null;
  currentPos = 0;

  /* get the split */
  FileSplit split = (FileSplit) arg0;

  /* get configuration object */
  Configuration job = taskContext.getConfiguration();

  /* initialize file /input stream */
  final Path path = split.getPath();
  FileSystem fs = path.getFileSystem(job);
  inputStream = fs.open(path);

  CompressionCodec codec = new CompressionCodecFactory(job).getCodec(path);

  if (codec != null) {
    decompressor = CodecPool.getDecompressor(codec);
    inputStream = codec.createInputStream(inputStream, decompressor);
  }

  start = split.getStart();
  end = start + split.getLength();

  /* Initialize a stream reader so that it can read multiple documents from */
  /* the file */

  documentStream = (JsonDocumentStream)Json.newDocumentStream(inputStream);
  it = documentStream.iterator();

}
 
Example #18
Source File: JsonInputFormat.java    From hiped2 with Apache License 2.0 5 votes vote down vote up
@Override
protected boolean isSplitable(JobContext context, Path file) {
  CompressionCodec codec =
      new CompressionCodecFactory(HadoopCompat.getConfiguration(context))
          .getCodec(file);
  return codec == null;
}
 
Example #19
Source File: HdfsPartitioner.java    From sqoop-on-spark with Apache License 2.0 5 votes vote down vote up
private boolean isSplitable(Configuration conf, Path file) {
  final CompressionCodec codec =
      new CompressionCodecFactory(conf).getCodec(file);

  // This method might be improved for SplittableCompression codec when we
  // drop support for Hadoop 1.0
  return null == codec;

}
 
Example #20
Source File: HadoopFileReader.java    From hadoopoffice with Apache License 2.0 5 votes vote down vote up
/**
* Create a new HadoopFileReader
*
* @param conf filesystem configuration, can be taken over from the current job where the inputformat is used
*
*/

public HadoopFileReader(Configuration conf) throws IOException {
	this.conf=conf;
	this.compressionCodecs=  new CompressionCodecFactory(conf);
	this.openDecompressors = new ArrayList<>();
	this.fs = FileSystem.get(this.conf);
	
}
 
Example #21
Source File: DefaultOutputter.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public void init(Path path, Configuration conf) throws IOException {
  FileSystem fs = path.getFileSystem(conf);
  CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(fs.create(path), compressor);
  } else {
    output = fs.create(path);
  }
  writer = new JsonObjectMapperWriter<T>(output, 
      conf.getBoolean("rumen.output.pretty.print", true));
}
 
Example #22
Source File: TextInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
protected boolean isSplitable(JobContext context, Path file) {
  final CompressionCodec codec =
    new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
  if (null == codec) {
    return true;
  }
  return codec instanceof SplittableCompressionCodec;
}
 
Example #23
Source File: HiveColumnCardinalityUpdateJob.java    From Kylin with Apache License 2.0 5 votes vote down vote up
private static List<String> readLines(Path location, Configuration conf) throws Exception {
    FileSystem fileSystem = FileSystem.get(location.toUri(), conf);
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    FileStatus[] items = fileSystem.listStatus(location);
    if (items == null)
        return new ArrayList<String>();
    List<String> results = new ArrayList<String>();
    for (FileStatus item : items) {

        // ignoring files like _SUCCESS
        if (item.getPath().getName().startsWith("_")) {
            continue;
        }

        CompressionCodec codec = factory.getCodec(item.getPath());
        InputStream stream = null;

        // check if we have a compression codec we need to use
        if (codec != null) {
            stream = codec.createInputStream(fileSystem.open(item.getPath()));
        } else {
            stream = fileSystem.open(item.getPath());
        }

        StringWriter writer = new StringWriter();
        IOUtils.copy(stream, writer, "UTF-8");
        String raw = writer.toString();
        for (String str : raw.split("\n")) {
            results.add(str);
        }
    }
    return results;
}
 
Example #24
Source File: AbstractHadoopProcessor.java    From nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the configured CompressionCodec, or null if none is configured.
 *
 * @param context
 *            the ProcessContext
 * @param configuration
 *            the Hadoop Configuration
 * @return CompressionCodec or null
 */
protected org.apache.hadoop.io.compress.CompressionCodec getCompressionCodec(ProcessContext context, Configuration configuration) {
    org.apache.hadoop.io.compress.CompressionCodec codec = null;
    if (context.getProperty(COMPRESSION_CODEC).isSet()) {
        String compressionClassname = CompressionType.valueOf(context.getProperty(COMPRESSION_CODEC).getValue()).toString();
        CompressionCodecFactory ccf = new CompressionCodecFactory(configuration);
        codec = ccf.getCodecByClassName(compressionClassname);
    }

    return codec;
}
 
Example #25
Source File: SequenceFileBolt.java    From jstorm with Apache License 2.0 5 votes vote down vote up
@Override
public void doPrepare(Map conf, TopologyContext topologyContext, OutputCollector collector) throws IOException {
    LOG.info("Preparing Sequence File Bolt...");
    if (this.format == null) throw new IllegalStateException("SequenceFormat must be specified.");

    this.fs = FileSystem.get(URI.create(this.fsUrl), hdfsConfig);
    this.codecFactory = new CompressionCodecFactory(hdfsConfig);
}
 
Example #26
Source File: HiveColumnCardinalityUpdateJob.java    From kylin with Apache License 2.0 5 votes vote down vote up
private static List<String> readLines(Path location, Configuration conf) throws Exception {
    FileSystem fileSystem = HadoopUtil.getWorkingFileSystem();
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    FileStatus[] items = fileSystem.listStatus(location);
    if (items == null)
        return new ArrayList<String>();
    List<String> results = new ArrayList<String>();
    for (FileStatus item : items) {

        // ignoring files like _SUCCESS
        if (item.getPath().getName().startsWith("_")) {
            continue;
        }

        CompressionCodec codec = factory.getCodec(item.getPath());
        InputStream stream = null;

        // check if we have a compression codec we need to use
        if (codec != null) {
            stream = codec.createInputStream(fileSystem.open(item.getPath()));
        } else {
            stream = fileSystem.open(item.getPath());
        }

        StringWriter writer = new StringWriter();
        IOUtils.copy(stream, writer, "UTF-8");
        String raw = writer.toString();
        for (String str : StringUtil.split(raw, "\n")) {
            results.add(str);
        }
    }
    return results;
}
 
Example #27
Source File: FSImageCompression.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Create a compression instance using the codec specified by
 * <code>codecClassName</code>
 */
static FSImageCompression createCompression(Configuration conf,
                                                    String codecClassName)
  throws IOException {

  CompressionCodecFactory factory = new CompressionCodecFactory(conf);
  CompressionCodec codec = factory.getCodecByClassName(codecClassName);
  if (codec == null) {
    throw new IOException("Not a supported codec: " + codecClassName);
  }

  return new FSImageCompression(codec);
}
 
Example #28
Source File: QseqInputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
public QseqRecordReader(Configuration conf, FileSplit split) throws IOException
{
	setConf(conf);
	file = split.getPath();
	start = split.getStart();
	end = start + split.getLength();

	FileSystem fs = file.getFileSystem(conf);
	FSDataInputStream fileIn = fs.open(file);

	CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
	CompressionCodec        codec        = codecFactory.getCodec(file);

	if (codec == null) // no codec.  Uncompressed file.
	{
		positionAtFirstRecord(fileIn);
		inputStream = fileIn;
	}
	else
	{ // compressed file
		if (start != 0)
			throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");

		inputStream = codec.createInputStream(fileIn);
		end = Long.MAX_VALUE; // read until the end of the file
	}

	lineReader = new LineReader(inputStream);
}
 
Example #29
Source File: TextInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
protected boolean isSplitable(JobContext context, Path file) {
  final CompressionCodec codec =
    new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
  if (null == codec) {
    return true;
  }
  return codec instanceof SplittableCompressionCodec;
}
 
Example #30
Source File: DefaultOutputter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public void init(Path path, Configuration conf) throws IOException {
  FileSystem fs = path.getFileSystem(conf);
  CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(fs.create(path), compressor);
  } else {
    output = fs.create(path);
  }
  writer = new JsonObjectMapperWriter<T>(output, 
      conf.getBoolean("rumen.output.pretty.print", true));
}