org.apache.hadoop.io.SequenceFile.Metadata Java Examples

The following examples show how to use org.apache.hadoop.io.SequenceFile.Metadata. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestSequenceFile.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void writeMetadataTest(FileSystem fs, int count, int seed, Path file, 
                                      CompressionType compressionType, CompressionCodec codec, SequenceFile.Metadata metadata)
  throws IOException {
  fs.delete(file, true);
  LOG.info("creating " + count + " records with metadata and with " + compressionType +
           " compression");
  SequenceFile.Writer writer = 
    SequenceFile.createWriter(fs, conf, file, 
                              RandomDatum.class, RandomDatum.class, compressionType, codec, null, metadata);
  RandomDatum.Generator generator = new RandomDatum.Generator(seed);
  for (int i = 0; i < count; i++) {
    generator.next();
    RandomDatum key = generator.getKey();
    RandomDatum value = generator.getValue();

    writer.append(key, value);
  }
  writer.close();
}
 
Example #2
Source File: TestSequenceFile.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void testRecursiveSeqFileCreate() throws IOException {
  FileSystem fs = FileSystem.getLocal(conf);
  Path name = new Path(new Path(System.getProperty("test.build.data","."),
      "recursiveCreateDir") , "file");
  boolean createParent = false;

  try {
    SequenceFile.createWriter(fs, conf, name, RandomDatum.class,
        RandomDatum.class, 512, (short) 1, 4096, createParent,
        CompressionType.NONE, null, new Metadata());
    fail("Expected an IOException due to missing parent");
  } catch (IOException ioe) {
    // Expected
  }

  createParent = true;
  SequenceFile.createWriter(fs, conf, name, RandomDatum.class,
      RandomDatum.class, 512, (short) 1, 4096, createParent,
      CompressionType.NONE, null, new Metadata());
  // should succeed, fails if exception thrown
}
 
Example #3
Source File: TestSequenceFile.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void writeMetadataTest(FileSystem fs, int count, int seed, Path file, 
                                      CompressionType compressionType, CompressionCodec codec, SequenceFile.Metadata metadata)
  throws IOException {
  fs.delete(file, true);
  LOG.info("creating " + count + " records with metadata and with " + compressionType +
           " compression");
  SequenceFile.Writer writer = 
    SequenceFile.createWriter(fs, conf, file, 
                              RandomDatum.class, RandomDatum.class, compressionType, codec, null, metadata);
  RandomDatum.Generator generator = new RandomDatum.Generator(seed);
  for (int i = 0; i < count; i++) {
    generator.next();
    RandomDatum key = generator.getKey();
    RandomDatum value = generator.getValue();

    writer.append(key, value);
  }
  writer.close();
}
 
Example #4
Source File: TestSequenceFile.java    From big-c with Apache License 2.0 6 votes vote down vote up
public void testRecursiveSeqFileCreate() throws IOException {
  FileSystem fs = FileSystem.getLocal(conf);
  Path name = new Path(new Path(System.getProperty("test.build.data","."),
      "recursiveCreateDir") , "file");
  boolean createParent = false;

  try {
    SequenceFile.createWriter(fs, conf, name, RandomDatum.class,
        RandomDatum.class, 512, (short) 1, 4096, createParent,
        CompressionType.NONE, null, new Metadata());
    fail("Expected an IOException due to missing parent");
  } catch (IOException ioe) {
    // Expected
  }

  createParent = true;
  SequenceFile.createWriter(fs, conf, name, RandomDatum.class,
      RandomDatum.class, 512, (short) 1, 4096, createParent,
      CompressionType.NONE, null, new Metadata());
  // should succeed, fails if exception thrown
}
 
Example #5
Source File: RCFile.java    From tajo with Apache License 2.0 5 votes vote down vote up
void init(Configuration conf, FSDataOutputStream out,
          CompressionCodec codec, Metadata metadata) throws IOException {
  this.out = out;
  this.codec = codec;
  this.metadata = metadata;
  this.useNewMagic = conf.getBoolean(TajoConf.ConfVars.HIVEUSEEXPLICITRCFILEHEADER.varname, true);
}
 
Example #6
Source File: HiveRCOutputFormat.java    From spork with Apache License 2.0 5 votes vote down vote up
protected RCFile.Writer createRCFileWriter(TaskAttemptContext job,
                                           Text columnMetadata)
                                           throws IOException {
  Configuration conf = job.getConfiguration();

  // override compression codec if set.
  String codecOverride = conf.get(COMPRESSION_CODEC_CONF);
  if (codecOverride != null) {
    conf.setBoolean(MRConfiguration.OUTPUT_COMPRESS, true);
    conf.set(MRConfiguration.OUTPUT_COMPRESSION_CODEC, codecOverride);
  }

  CompressionCodec codec = null;
  if (getCompressOutput(job)) {
    Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, conf);
  }

  Metadata metadata = null;

  String ext = conf.get(EXTENSION_OVERRIDE_CONF, DEFAULT_EXTENSION);
  Path file = getDefaultWorkFile(job, ext.equalsIgnoreCase("none") ? null : ext);

  LOG.info("writing to rcfile " + file.toString());

  return new RCFile.Writer(file.getFileSystem(conf), conf, file, job, metadata, codec);
}
 
Example #7
Source File: RCFile.java    From incubator-tajo with Apache License 2.0 5 votes vote down vote up
void init(Configuration conf, FSDataOutputStream out,
          CompressionCodec codec, Metadata metadata) throws IOException {
  this.out = out;
  this.codec = codec;
  this.metadata = metadata;
  this.useNewMagic = conf.getBoolean(TajoConf.ConfVars.HIVEUSEEXPLICITRCFILEHEADER.varname, true);
}
 
Example #8
Source File: RCFile.java    From incubator-tajo with Apache License 2.0 5 votes vote down vote up
/**
 * Create a metadata object with alternating key-value pairs.
 * Eg. metadata(key1, value1, key2, value2)
 */
public static Metadata createMetadata(Text... values) {
  if (values.length % 2 != 0) {
    throw new IllegalArgumentException("Must have a matched set of " +
        "key-value pairs. " + values.length +
        " strings supplied.");
  }
  Metadata result = new Metadata();
  for (int i = 0; i < values.length; i += 2) {
    result.set(values[i], values[i + 1]);
  }
  return result;
}
 
Example #9
Source File: RCFile.java    From incubator-tajo with Apache License 2.0 5 votes vote down vote up
void init(Configuration conf, FSDataOutputStream out,
    CompressionCodec codec, Metadata metadata) throws IOException {
  this.conf = conf;
  this.out = out;
  this.codec = codec;
  this.metadata = metadata;
  this.useNewMagic =
      conf.getBoolean(TajoConf.ConfVars.HIVEUSEEXPLICITRCFILEHEADER.varname, true);
}
 
Example #10
Source File: RCFile.java    From incubator-tajo with Apache License 2.0 5 votes vote down vote up
/**
 * Create a metadata object with alternating key-value pairs.
 * Eg. metadata(key1, value1, key2, value2)
 */
public static Metadata createMetadata(Text... values) {
  if (values.length % 2 != 0) {
    throw new IllegalArgumentException("Must have a matched set of " +
                                       "key-value pairs. " + values.length+
                                       " strings supplied.");
  }
  Metadata result = new Metadata();
  for(int i=0; i < values.length; i += 2) {
    result.set(values[i], values[i+1]);
  }
  return result;
}
 
Example #11
Source File: RCFile.java    From tajo with Apache License 2.0 5 votes vote down vote up
/**
 * Create a metadata object with alternating key-value pairs.
 * Eg. metadata(key1, value1, key2, value2)
 */
public static Metadata createMetadata(Text... values) {
  if (values.length % 2 != 0) {
    throw new IllegalArgumentException("Must have a matched set of " +
        "key-value pairs. " + values.length +
        " strings supplied.");
  }
  Metadata result = new Metadata();
  for (int i = 0; i < values.length; i += 2) {
    result.set(values[i], values[i + 1]);
  }
  return result;
}
 
Example #12
Source File: TestSequenceFile.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Test that makes sure createWriter succeeds on a file that was 
 * already created
 * @throws IOException
 */
public void testCreateWriterOnExistingFile() throws IOException {
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.getLocal(conf);
  Path name = new Path(new Path(System.getProperty("test.build.data","."),
      "createWriterOnExistingFile") , "file");

  fs.create(name);
  SequenceFile.createWriter(fs, conf, name, RandomDatum.class,
      RandomDatum.class, 512, (short) 1, 4096, false,
      CompressionType.NONE, null, new Metadata());
}
 
Example #13
Source File: TestSequenceFile.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void sortMetadataTest(FileSystem fs, Path unsortedFile, Path sortedFile, SequenceFile.Metadata metadata)
  throws IOException {
  fs.delete(sortedFile, true);
  LOG.info("sorting: " + unsortedFile + " to: " + sortedFile);
  final WritableComparator comparator = WritableComparator.get(RandomDatum.class);
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, comparator, RandomDatum.class, RandomDatum.class, conf, metadata);
  sorter.sort(new Path[] { unsortedFile }, sortedFile, false);
}
 
Example #14
Source File: TestSequenceFile.java    From big-c with Apache License 2.0 5 votes vote down vote up
private SequenceFile.Metadata readMetadata(FileSystem fs, Path file)
  throws IOException {
  LOG.info("reading file: " + file.toString());
  SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);
  SequenceFile.Metadata meta = reader.getMetadata(); 
  reader.close();
  return meta;
}
 
Example #15
Source File: TestSequenceFile.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Test that makes sure createWriter succeeds on a file that was 
 * already created
 * @throws IOException
 */
public void testCreateWriterOnExistingFile() throws IOException {
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.getLocal(conf);
  Path name = new Path(new Path(System.getProperty("test.build.data","."),
      "createWriterOnExistingFile") , "file");

  fs.create(name);
  SequenceFile.createWriter(fs, conf, name, RandomDatum.class,
      RandomDatum.class, 512, (short) 1, 4096, false,
      CompressionType.NONE, null, new Metadata());
}
 
Example #16
Source File: TestSequenceFile.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void sortMetadataTest(FileSystem fs, Path unsortedFile, Path sortedFile, SequenceFile.Metadata metadata)
  throws IOException {
  fs.delete(sortedFile, true);
  LOG.info("sorting: " + unsortedFile + " to: " + sortedFile);
  final WritableComparator comparator = WritableComparator.get(RandomDatum.class);
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, comparator, RandomDatum.class, RandomDatum.class, conf, metadata);
  sorter.sort(new Path[] { unsortedFile }, sortedFile, false);
}
 
Example #17
Source File: TestSequenceFile.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private SequenceFile.Metadata readMetadata(FileSystem fs, Path file)
  throws IOException {
  LOG.info("reading file: " + file.toString());
  SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);
  SequenceFile.Metadata meta = reader.getMetadata(); 
  reader.close();
  return meta;
}
 
Example #18
Source File: TestSequenceFile.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/** Unit tests for SequenceFile metadata. */
public void testSequenceFileMetadata() throws Exception {
  LOG.info("Testing SequenceFile with metadata");
  int count = 1024 * 10;
  CompressionCodec codec = new DefaultCodec();
  Path file = new Path(System.getProperty("test.build.data",".")+"/test.seq.metadata");
  Path sortedFile =
    new Path(System.getProperty("test.build.data",".")+"/test.sorted.seq.metadata");
  Path recordCompressedFile = 
    new Path(System.getProperty("test.build.data",".")+"/test.rc.seq.metadata");
  Path blockCompressedFile = 
    new Path(System.getProperty("test.build.data",".")+"/test.bc.seq.metadata");
 
  FileSystem fs = FileSystem.getLocal(conf);
  SequenceFile.Metadata theMetadata = new SequenceFile.Metadata();
  theMetadata.set(new Text("name_1"), new Text("value_1"));
  theMetadata.set(new Text("name_2"), new Text("value_2"));
  theMetadata.set(new Text("name_3"), new Text("value_3"));
  theMetadata.set(new Text("name_4"), new Text("value_4"));
  
  int seed = new Random().nextInt();
  
  try {
    // SequenceFile.Writer
    writeMetadataTest(fs, count, seed, file, CompressionType.NONE, null, theMetadata);
    SequenceFile.Metadata aMetadata = readMetadata(fs, file);
    if (!theMetadata.equals(aMetadata)) {
      LOG.info("The original metadata:\n" + theMetadata.toString());
      LOG.info("The retrieved metadata:\n" + aMetadata.toString());
      throw new RuntimeException("metadata not match:  " + 1);
    }
    // SequenceFile.RecordCompressWriter
    writeMetadataTest(fs, count, seed, recordCompressedFile, CompressionType.RECORD, 
                      codec, theMetadata);
    aMetadata = readMetadata(fs, recordCompressedFile);
    if (!theMetadata.equals(aMetadata)) {
      LOG.info("The original metadata:\n" + theMetadata.toString());
      LOG.info("The retrieved metadata:\n" + aMetadata.toString());
      throw new RuntimeException("metadata not match:  " + 2);
    }
    // SequenceFile.BlockCompressWriter
    writeMetadataTest(fs, count, seed, blockCompressedFile, CompressionType.BLOCK,
                      codec, theMetadata);
    aMetadata =readMetadata(fs, blockCompressedFile);
    if (!theMetadata.equals(aMetadata)) {
      LOG.info("The original metadata:\n" + theMetadata.toString());
      LOG.info("The retrieved metadata:\n" + aMetadata.toString());
      throw new RuntimeException("metadata not match:  " + 3);
    }
    // SequenceFile.Sorter
    sortMetadataTest(fs, file, sortedFile, theMetadata);
    aMetadata = readMetadata(fs, recordCompressedFile);
    if (!theMetadata.equals(aMetadata)) {
      LOG.info("The original metadata:\n" + theMetadata.toString());
      LOG.info("The retrieved metadata:\n" + aMetadata.toString());
      throw new RuntimeException("metadata not match:  " + 4);
    }
  } finally {
    fs.close();
  }
  LOG.info("Successfully tested SequenceFile with metadata");
}
 
Example #19
Source File: RCFile.java    From incubator-tajo with Apache License 2.0 4 votes vote down vote up
/** Constructs a RCFile Writer. */
public Writer(FileSystem fs, Configuration conf, Path name) throws IOException {
  this(fs, conf, name, null, new Metadata(), null);
}
 
Example #20
Source File: TestSequenceFile.java    From big-c with Apache License 2.0 4 votes vote down vote up
/** Unit tests for SequenceFile metadata. */
public void testSequenceFileMetadata() throws Exception {
  LOG.info("Testing SequenceFile with metadata");
  int count = 1024 * 10;
  CompressionCodec codec = new DefaultCodec();
  Path file = new Path(System.getProperty("test.build.data",".")+"/test.seq.metadata");
  Path sortedFile =
    new Path(System.getProperty("test.build.data",".")+"/test.sorted.seq.metadata");
  Path recordCompressedFile = 
    new Path(System.getProperty("test.build.data",".")+"/test.rc.seq.metadata");
  Path blockCompressedFile = 
    new Path(System.getProperty("test.build.data",".")+"/test.bc.seq.metadata");
 
  FileSystem fs = FileSystem.getLocal(conf);
  SequenceFile.Metadata theMetadata = new SequenceFile.Metadata();
  theMetadata.set(new Text("name_1"), new Text("value_1"));
  theMetadata.set(new Text("name_2"), new Text("value_2"));
  theMetadata.set(new Text("name_3"), new Text("value_3"));
  theMetadata.set(new Text("name_4"), new Text("value_4"));
  
  int seed = new Random().nextInt();
  
  try {
    // SequenceFile.Writer
    writeMetadataTest(fs, count, seed, file, CompressionType.NONE, null, theMetadata);
    SequenceFile.Metadata aMetadata = readMetadata(fs, file);
    if (!theMetadata.equals(aMetadata)) {
      LOG.info("The original metadata:\n" + theMetadata.toString());
      LOG.info("The retrieved metadata:\n" + aMetadata.toString());
      throw new RuntimeException("metadata not match:  " + 1);
    }
    // SequenceFile.RecordCompressWriter
    writeMetadataTest(fs, count, seed, recordCompressedFile, CompressionType.RECORD, 
                      codec, theMetadata);
    aMetadata = readMetadata(fs, recordCompressedFile);
    if (!theMetadata.equals(aMetadata)) {
      LOG.info("The original metadata:\n" + theMetadata.toString());
      LOG.info("The retrieved metadata:\n" + aMetadata.toString());
      throw new RuntimeException("metadata not match:  " + 2);
    }
    // SequenceFile.BlockCompressWriter
    writeMetadataTest(fs, count, seed, blockCompressedFile, CompressionType.BLOCK,
                      codec, theMetadata);
    aMetadata =readMetadata(fs, blockCompressedFile);
    if (!theMetadata.equals(aMetadata)) {
      LOG.info("The original metadata:\n" + theMetadata.toString());
      LOG.info("The retrieved metadata:\n" + aMetadata.toString());
      throw new RuntimeException("metadata not match:  " + 3);
    }
    // SequenceFile.Sorter
    sortMetadataTest(fs, file, sortedFile, theMetadata);
    aMetadata = readMetadata(fs, recordCompressedFile);
    if (!theMetadata.equals(aMetadata)) {
      LOG.info("The original metadata:\n" + theMetadata.toString());
      LOG.info("The retrieved metadata:\n" + aMetadata.toString());
      throw new RuntimeException("metadata not match:  " + 4);
    }
  } finally {
    fs.close();
  }
  LOG.info("Successfully tested SequenceFile with metadata");
}
 
Example #21
Source File: RCFile.java    From incubator-tajo with Apache License 2.0 4 votes vote down vote up
private void init() throws IOException {
  byte[] magic = new byte[MAGIC.length];
  sin.readFully(magic);

  if (Arrays.equals(magic, ORIGINAL_MAGIC)) {
    byte vers = sin.readByte();
    if (vers != ORIGINAL_MAGIC_VERSION_WITH_METADATA) {
      throw new IOException(file + " is a version " + vers +
                            " SequenceFile instead of an RCFile.");
    }
    version = ORIGINAL_VERSION;
  } else {
    if (!Arrays.equals(magic, MAGIC)) {
      throw new IOException(file + " not a RCFile and has magic of " +
                            new String(magic));
    }

    // Set 'version'
    version = sin.readByte();
    if (version > CURRENT_VERSION) {
      throw new VersionMismatchException((byte) CURRENT_VERSION, version);
    }
  }

  if (version == ORIGINAL_VERSION) {
    try {
      Class<?> keyCls = conf.getClassByName(Text.readString(sin));
      Class<?> valCls = conf.getClassByName(Text.readString(sin));
      if (!keyCls.equals(KeyBuffer.class)
          || !valCls.equals(ValueBuffer.class)) {
        throw new IOException(file + " not a RCFile");
      }
    } catch (ClassNotFoundException e) {
      throw new IOException(file + " not a RCFile", e);
    }
  }

  decompress = sin.readBoolean(); // is compressed?

  if (version == ORIGINAL_VERSION) {
    // is block-compressed? it should be always false.
    boolean blkCompressed = sin.readBoolean();
    if (blkCompressed) {
      throw new IOException(file + " not a RCFile.");
    }
  }

  // setup the compression codec
  if (decompress) {
    String codecClassname = Text.readString(sin);
    try {
      Class<? extends CompressionCodec> codecClass = conf.getClassByName(
          codecClassname).asSubclass(CompressionCodec.class);
      codec = ReflectionUtils.newInstance(codecClass, conf);
    } catch (ClassNotFoundException cnfe) {
      throw new IllegalArgumentException(
          "Unknown codec: " + codecClassname, cnfe);
    }
    keyDecompressor = CodecPool.getDecompressor(codec);
  }

  metadata = new Metadata();
  metadata.readFields(sin);

  sin.readFully(sync); // read sync bytes
  headerEnd = sin.getPos();
}
 
Example #22
Source File: RCFile.java    From incubator-tajo with Apache License 2.0 3 votes vote down vote up
/**
 * Constructs a RCFile Writer.
 *
 * @param fs
 *          the file system used
 * @param conf
 *          the configuration file
 * @param name
 *          the file name
 * @param progress a progress meter to update as the file is written
 * @param metadata a string to string map in the file header
 * @throws java.io.IOException
 */
public Writer(FileSystem fs, Configuration conf, Path name,
    Progressable progress, Metadata metadata, CompressionCodec codec) throws IOException {
  this(fs, conf, name, fs.getConf().getInt("io.file.buffer.size", 4096),
      fs.getDefaultReplication(), fs.getDefaultBlockSize(), progress,
      metadata, codec);
}
 
Example #23
Source File: RCFile.java    From tajo with Apache License 2.0 2 votes vote down vote up
/**
 * Return the metadata (Text to Text map) that was written into the
 * file.
 */
public Metadata getMetadata() {
  return metadata;
}
 
Example #24
Source File: RCFile.java    From incubator-tajo with Apache License 2.0 2 votes vote down vote up
/**
 * Constructs a RCFile Writer.
 *
 * @param fs
 *          the file system used
 * @param conf
 *          the configuration file
 * @param name
 *          the file name
 * @throws java.io.IOException
 */
public Writer(FileSystem fs, Configuration conf, Path name,
    Progressable progress, CompressionCodec codec) throws IOException {
  this(fs, conf, name, progress, new Metadata(), codec);
}
 
Example #25
Source File: RCFile.java    From incubator-tajo with Apache License 2.0 2 votes vote down vote up
/**
 * Return the metadata (Text to Text map) that was written into the
 * file.
 */
public Metadata getMetadata() {
  return metadata;
}
 
Example #26
Source File: RCFile.java    From incubator-tajo with Apache License 2.0 2 votes vote down vote up
/**
 * Return the metadata (Text to Text map) that was written into the
 * file.
 */
public Metadata getMetadata() {
  return metadata;
}