Java Code Examples for org.apache.hadoop.io.serializer.SerializationFactory#getSerializer()

The following examples show how to use org.apache.hadoop.io.serializer.SerializationFactory#getSerializer() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MRHelpers.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
@Private
public static <T extends org.apache.hadoop.mapreduce.InputSplit> MRSplitProto createSplitProto(
    T newSplit, SerializationFactory serializationFactory)
    throws IOException, InterruptedException {
  MRSplitProto.Builder builder = MRSplitProto
      .newBuilder();
  
  builder.setSplitClassName(newSplit.getClass().getName());

  @SuppressWarnings("unchecked")
  Serializer<T> serializer = serializationFactory
      .getSerializer((Class<T>) newSplit.getClass());
  ByteString.Output out = ByteString
      .newOutput(SPLIT_SERIALIZED_LENGTH_ESTIMATE);
  serializer.open(out);
  serializer.serialize(newSplit);
  // TODO MR Compat: Check against max block locations per split.
  ByteString splitBs = out.toByteString();
  builder.setSplitBytes(splitBs);

  return builder.build();
}
 
Example 2
Source File: ReflectionUtils.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Make a copy of the writable object using serialization to a buffer
 * @param src the object to copy from
 * @param dst the object to copy into, which is destroyed
 * @return dst param (the copy)
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public static <T> T copy(Configuration conf, 
                              T src, T dst) throws IOException {
  CopyInCopyOutBuffer buffer = cloneBuffers.get();
  buffer.outBuffer.reset();
  SerializationFactory factory = getFactory(conf);
  Class<T> cls = (Class<T>) src.getClass();
  Serializer<T> serializer = factory.getSerializer(cls);
  serializer.open(buffer.outBuffer);
  serializer.serialize(src);
  buffer.moveData();
  Deserializer<T> deserializer = factory.getDeserializer(cls);
  deserializer.open(buffer.inBuffer);
  dst = deserializer.deserialize(dst);
  return dst;
}
 
Example 3
Source File: TestWritableJobConf.java    From RDFS with Apache License 2.0 6 votes vote down vote up
private <K> K serDeser(K conf) throws Exception {
  SerializationFactory factory = new SerializationFactory(CONF);
  Serializer<K> serializer =
    factory.getSerializer(GenericsUtil.getClass(conf));
  Deserializer<K> deserializer =
    factory.getDeserializer(GenericsUtil.getClass(conf));

  DataOutputBuffer out = new DataOutputBuffer();
  serializer.open(out);
  serializer.serialize(conf);
  serializer.close();

  DataInputBuffer in = new DataInputBuffer();
  in.reset(out.getData(), out.getLength());
  deserializer.open(in);
  K after = deserializer.deserialize(null);
  deserializer.close();
  return after;
}
 
Example 4
Source File: ReflectionUtils.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
/**
 * Make a copy of the writable object using serialization to a buffer
 * @param dst the object to copy from
 * @param src the object to copy into, which is destroyed
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public static <T> T copy(Configuration conf, 
                              T src, T dst) throws IOException {
  CopyInCopyOutBuffer buffer = cloneBuffers.get();
  buffer.outBuffer.reset();
  SerializationFactory factory = getFactory(conf);
  Class<T> cls = (Class<T>) src.getClass();
  Serializer<T> serializer = factory.getSerializer(cls);
  serializer.open(buffer.outBuffer);
  serializer.serialize(src);
  buffer.moveData();
  Deserializer<T> deserializer = factory.getDeserializer(cls);
  deserializer.open(buffer.inBuffer);
  dst = deserializer.deserialize(dst);
  return dst;
}
 
Example 5
Source File: SequenceFile.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/** Initialize. */
@SuppressWarnings("unchecked")
void init(Path name, Configuration conf, FSDataOutputStream out,
          Class keyClass, Class valClass,
          boolean compress, CompressionCodec codec, Metadata metadata) 
  throws IOException {
  this.conf = conf;
  this.out = out;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.compress = compress;
  this.codec = codec;
  this.metadata = metadata;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  this.keySerializer.open(buffer);
  this.uncompressedValSerializer = serializationFactory.getSerializer(valClass);
  this.uncompressedValSerializer.open(buffer);
  if (this.codec != null) {
    ReflectionUtils.setConf(this.codec, this.conf);
    this.compressor = CodecPool.getCompressor(this.codec);
    this.deflateFilter = this.codec.createOutputStream(buffer, compressor);
    this.deflateOut = 
      new DataOutputStream(new BufferedOutputStream(deflateFilter));
    this.compressedValSerializer = serializationFactory.getSerializer(valClass);
    this.compressedValSerializer.open(deflateOut);
  }
}
 
Example 6
Source File: MapOperatorTest.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
public void testNodeProcessingSchema(MapOperator<LongWritable, Text, Text, IntWritable> oper) throws IOException
{

  CollectorTestSink sortSink = new CollectorTestSink();
  oper.output.setSink(sortSink);

  oper.setMapClass(WordCount.Map.class);
  oper.setCombineClass(WordCount.Reduce.class);
  oper.setDirName(testMeta.testDir);
  oper.setConfigFile(null);
  oper.setInputFormatClass(TextInputFormat.class);

  Configuration conf = new Configuration();
  JobConf jobConf = new JobConf(conf);
  FileInputFormat.setInputPaths(jobConf, new Path(testMeta.testDir));
  TextInputFormat inputFormat = new TextInputFormat();
  inputFormat.configure(jobConf);
  InputSplit[] splits = inputFormat.getSplits(jobConf, 1);
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  Serializer keySerializer = serializationFactory.getSerializer(splits[0].getClass());
  keySerializer.open(oper.getOutstream());
  keySerializer.serialize(splits[0]);
  oper.setInputSplitClass(splits[0].getClass());
  keySerializer.close();
  oper.setup(null);
  oper.beginWindow(0);
  oper.emitTuples();
  oper.emitTuples();
  oper.endWindow();
  oper.beginWindow(1);
  oper.emitTuples();
  oper.endWindow();

  Assert.assertEquals("number emitted tuples", 3, sortSink.collectedTuples.size());
  for (Object o : sortSink.collectedTuples) {
    LOG.debug(o.toString());
  }
  LOG.debug("Done testing round\n");
  oper.teardown();
}
 
Example 7
Source File: DefaultStringifier.java    From big-c with Apache License 2.0 5 votes vote down vote up
public DefaultStringifier(Configuration conf, Class<T> c) {

    SerializationFactory factory = new SerializationFactory(conf);
    this.serializer = factory.getSerializer(c);
    this.deserializer = factory.getDeserializer(c);
    this.inBuf = new DataInputBuffer();
    this.outBuf = new DataOutputBuffer();
    try {
      serializer.open(outBuf);
      deserializer.open(inBuf);
    } catch (IOException ex) {
      throw new RuntimeException(ex);
    }
  }
 
Example 8
Source File: JobSplitWriter.java    From big-c with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static <T extends InputSplit> 
SplitMetaInfo[] writeNewSplits(Configuration conf, 
    T[] array, FSDataOutputStream out)
throws IOException, InterruptedException {

  SplitMetaInfo[] info = new SplitMetaInfo[array.length];
  if (array.length != 0) {
    SerializationFactory factory = new SerializationFactory(conf);
    int i = 0;
    int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY,
        MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT);
    long offset = out.getPos();
    for(T split: array) {
      long prevCount = out.getPos();
      Text.writeString(out, split.getClass().getName());
      Serializer<T> serializer = 
        factory.getSerializer((Class<T>) split.getClass());
      serializer.open(out);
      serializer.serialize(split);
      long currCount = out.getPos();
      String[] locations = split.getLocations();
      if (locations.length > maxBlockLocations) {
        LOG.warn("Max block location exceeded for split: "
            + split + " splitsize: " + locations.length +
            " maxsize: " + maxBlockLocations);
        locations = Arrays.copyOf(locations, maxBlockLocations);
      }
      info[i++] = 
        new JobSplit.SplitMetaInfo( 
            locations, offset,
            split.getLength());
      offset += currCount - prevCount;
    }
  }
  return info;
}
 
Example 9
Source File: IFile.java    From big-c with Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream out, 
    Class<K> keyClass, Class<V> valueClass,
    CompressionCodec codec, Counters.Counter writesCounter,
    boolean ownOutputStream)
    throws IOException {
  this.writtenRecordsCounter = writesCounter;
  this.checksumOut = new IFileOutputStream(out);
  this.rawOut = out;
  this.start = this.rawOut.getPos();
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  
  this.keyClass = keyClass;
  this.valueClass = valueClass;

  if (keyClass != null) {
    SerializationFactory serializationFactory = 
      new SerializationFactory(conf);
    this.keySerializer = serializationFactory.getSerializer(keyClass);
    this.keySerializer.open(buffer);
    this.valueSerializer = serializationFactory.getSerializer(valueClass);
    this.valueSerializer.open(buffer);
  }
  this.ownOutputStream = ownOutputStream;
}
 
Example 10
Source File: WALFile.java    From streamx with Apache License 2.0 5 votes vote down vote up
void init(Configuration conf, FSDataOutputStream out, boolean ownStream)
    throws IOException {
  this.conf = conf;
  this.out = out;
  this.ownOutputStream = ownStream;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(WALEntry.class);
  if (this.keySerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Key class: '"
        + WALEntry.class.getCanonicalName() + "'. "
        + "Please ensure that the configuration '" +
        CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
        + "properly configured, if you're using"
        + "custom serialization.");
  }
  this.keySerializer.open(buffer);
  this.valSerializer = serializationFactory.getSerializer(WALEntry.class);
  if (this.valSerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Value class: '"
        + WALEntry.class.getCanonicalName() + "'. "
        + "Please ensure that the configuration '" +
        CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
        + "properly configured, if you're using"
        + "custom serialization.");
  }
  this.valSerializer.open(buffer);
  if (appendMode) {
    sync();
  } else {
    writeFileHeader();
  }
}
 
Example 11
Source File: DefaultStringifier.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public DefaultStringifier(Configuration conf, Class<T> c) {

    SerializationFactory factory = new SerializationFactory(conf);
    this.serializer = factory.getSerializer(c);
    this.deserializer = factory.getDeserializer(c);
    this.inBuf = new DataInputBuffer();
    this.outBuf = new DataOutputBuffer();
    try {
      serializer.open(outBuf);
      deserializer.open(inBuf);
    } catch (IOException ex) {
      throw new RuntimeException(ex);
    }
  }
 
Example 12
Source File: IFile.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream out, 
    Class<K> keyClass, Class<V> valueClass,
    CompressionCodec codec, Counters.Counter writesCounter)
    throws IOException {
  this.writtenRecordsCounter = writesCounter;
  this.checksumOut = new IFileOutputStream(out);
  this.rawOut = out;
  this.start = this.rawOut.getPos();
  
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    this.compressor.reset();
    this.compressedOut = codec.createOutputStream(checksumOut, compressor);
    this.out = new FSDataOutputStream(this.compressedOut,  null);
    this.compressOutput = true;
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  
  this.keyClass = keyClass;
  this.valueClass = valueClass;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  this.keySerializer.open(buffer);
  this.valueSerializer = serializationFactory.getSerializer(valueClass);
  this.valueSerializer.open(buffer);
}
 
Example 13
Source File: IFile.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream out, 
    Class<K> keyClass, Class<V> valueClass,
    CompressionCodec codec, Counters.Counter writesCounter,
    boolean ownOutputStream)
    throws IOException {
  this.writtenRecordsCounter = writesCounter;
  this.checksumOut = new IFileOutputStream(out);
  this.rawOut = out;
  this.start = this.rawOut.getPos();
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  
  this.keyClass = keyClass;
  this.valueClass = valueClass;

  if (keyClass != null) {
    SerializationFactory serializationFactory = 
      new SerializationFactory(conf);
    this.keySerializer = serializationFactory.getSerializer(keyClass);
    this.keySerializer.open(buffer);
    this.valueSerializer = serializationFactory.getSerializer(valueClass);
    this.valueSerializer.open(buffer);
  }
  this.ownOutputStream = ownOutputStream;
}
 
Example 14
Source File: SequenceFile.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
/** Initialize. */
@SuppressWarnings("unchecked")
void init(Configuration conf, FSDataOutputStream out, boolean ownStream,
          Class keyClass, Class valClass,
          CompressionCodec codec, Metadata metadata) 
  throws IOException {
  this.conf = conf;
  this.out = out;
  this.ownOutputStream = ownStream;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.codec = codec;
  this.metadata = metadata;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  if (this.keySerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Key class: '"
            + keyClass.getCanonicalName() + "'. "
            + "Please ensure that the configuration '" +
            CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
            + "properly configured, if you're using"
            + "custom serialization.");
  }
  this.keySerializer.open(buffer);
  this.uncompressedValSerializer = serializationFactory.getSerializer(valClass);
  if (this.uncompressedValSerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Value class: '"
            + valClass.getCanonicalName() + "'. "
            + "Please ensure that the configuration '" +
            CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
            + "properly configured, if you're using"
            + "custom serialization.");
  }
  this.uncompressedValSerializer.open(buffer);
  if (this.codec != null) {
    ReflectionUtils.setConf(this.codec, this.conf);
    this.compressor = CodecPool.getCompressor(this.codec);
    this.deflateFilter = this.codec.createOutputStream(buffer, compressor);
    this.deflateOut = 
      new DataOutputStream(new BufferedOutputStream(deflateFilter));
    this.compressedValSerializer = serializationFactory.getSerializer(valClass);
    if (this.compressedValSerializer == null) {
      throw new IOException(
          "Could not find a serializer for the Value class: '"
              + valClass.getCanonicalName() + "'. "
              + "Please ensure that the configuration '" +
              CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
              + "properly configured, if you're using"
              + "custom serialization.");
    }
    this.compressedValSerializer.open(deflateOut);
  }
  writeFileHeader();
}
 
Example 15
Source File: TestValuesIterator.java    From tez with Apache License 2.0 4 votes vote down vote up
/**
 * create inmemory segments
 *
 * @return
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public List<TezMerger.Segment> createInMemStreams() throws IOException {
  int numberOfStreams = Math.max(2, rnd.nextInt(10));
  LOG.info("No of streams : " + numberOfStreams);

  SerializationFactory serializationFactory = new SerializationFactory(conf);
  Serializer keySerializer = serializationFactory.getSerializer(keyClass);
  Serializer valueSerializer = serializationFactory.getSerializer(valClass);

  LocalDirAllocator localDirAllocator =
      new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
  InputContext context = createTezInputContext();
  MergeManager mergeManager = new MergeManager(conf, fs, localDirAllocator,
      context, null, null, null, null, null, 1024 * 1024 * 10, null, false, -1);

  DataOutputBuffer keyBuf = new DataOutputBuffer();
  DataOutputBuffer valBuf = new DataOutputBuffer();
  DataInputBuffer keyIn = new DataInputBuffer();
  DataInputBuffer valIn = new DataInputBuffer();
  keySerializer.open(keyBuf);
  valueSerializer.open(valBuf);

  List<TezMerger.Segment> segments = new LinkedList<TezMerger.Segment>();
  for (int i = 0; i < numberOfStreams; i++) {
    BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024);
    InMemoryWriter writer =
        new InMemoryWriter(bout);
    Map<Writable, Writable> data = createData();
    //write data
    for (Map.Entry<Writable, Writable> entry : data.entrySet()) {
      keySerializer.serialize(entry.getKey());
      valueSerializer.serialize(entry.getValue());
      keyIn.reset(keyBuf.getData(), 0, keyBuf.getLength());
      valIn.reset(valBuf.getData(), 0, valBuf.getLength());
      writer.append(keyIn, valIn);
      originalData.put(entry.getKey(), entry.getValue());
      keyBuf.reset();
      valBuf.reset();
      keyIn.reset();
      valIn.reset();
    }
    IFile.Reader reader = new InMemoryReader(mergeManager, null, bout.getBuffer(), 0,
        bout.getBuffer().length);
    segments.add(new TezMerger.Segment(reader, null));

    data.clear();
    writer.close();
  }
  return segments;
}
 
Example 16
Source File: SequenceFile.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/** Initialize. */
@SuppressWarnings("unchecked")
void init(Configuration conf, FSDataOutputStream out, boolean ownStream,
          Class keyClass, Class valClass,
          CompressionCodec codec, Metadata metadata) 
  throws IOException {
  this.conf = conf;
  this.out = out;
  this.ownOutputStream = ownStream;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.codec = codec;
  this.metadata = metadata;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  if (this.keySerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Key class: '"
            + keyClass.getCanonicalName() + "'. "
            + "Please ensure that the configuration '" +
            CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
            + "properly configured, if you're using"
            + "custom serialization.");
  }
  this.keySerializer.open(buffer);
  this.uncompressedValSerializer = serializationFactory.getSerializer(valClass);
  if (this.uncompressedValSerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Value class: '"
            + valClass.getCanonicalName() + "'. "
            + "Please ensure that the configuration '" +
            CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
            + "properly configured, if you're using"
            + "custom serialization.");
  }
  this.uncompressedValSerializer.open(buffer);
  if (this.codec != null) {
    ReflectionUtils.setConf(this.codec, this.conf);
    this.compressor = CodecPool.getCompressor(this.codec);
    this.deflateFilter = this.codec.createOutputStream(buffer, compressor);
    this.deflateOut = 
      new DataOutputStream(new BufferedOutputStream(deflateFilter));
    this.compressedValSerializer = serializationFactory.getSerializer(valClass);
    if (this.compressedValSerializer == null) {
      throw new IOException(
          "Could not find a serializer for the Value class: '"
              + valClass.getCanonicalName() + "'. "
              + "Please ensure that the configuration '" +
              CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
              + "properly configured, if you're using"
              + "custom serialization.");
    }
    this.compressedValSerializer.open(deflateOut);
  }
  writeFileHeader();
}
 
Example 17
Source File: MapTask.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public MapOutputBuffer(TaskUmbilicalProtocol umbilical, JobConf job,
                       TaskReporter reporter
                       ) throws IOException, ClassNotFoundException {
  this.job = job;
  this.reporter = reporter;
  localFs = FileSystem.getLocal(job);
  partitions = job.getNumReduceTasks();
   
  rfs = ((LocalFileSystem)localFs).getRaw();

  indexCacheList = new ArrayList<SpillRecord>();
  
  //sanity checks
  final float spillper = job.getFloat("io.sort.spill.percent",(float)0.8);
  final float recper = job.getFloat("io.sort.record.percent",(float)0.05);
  final int sortmb = job.getInt("io.sort.mb", 100);
  if (spillper > (float)1.0 || spillper < (float)0.0) {
    throw new IOException("Invalid \"io.sort.spill.percent\": " + spillper);
  }
  if (recper > (float)1.0 || recper < (float)0.01) {
    throw new IOException("Invalid \"io.sort.record.percent\": " + recper);
  }
  if ((sortmb & 0x7FF) != sortmb) {
    throw new IOException("Invalid \"io.sort.mb\": " + sortmb);
  }
  sorter = ReflectionUtils.newInstance(
        job.getClass("map.sort.class", QuickSort.class, IndexedSorter.class), job);
  LOG.info("io.sort.mb = " + sortmb);
  // buffers and accounting
  int maxMemUsage = sortmb << 20;
  int recordCapacity = (int)(maxMemUsage * recper);
  recordCapacity -= recordCapacity % RECSIZE;
  kvbuffer = new byte[maxMemUsage - recordCapacity];
  bufvoid = kvbuffer.length;
  recordCapacity /= RECSIZE;
  kvoffsets = new int[recordCapacity];
  kvindices = new int[recordCapacity * ACCTSIZE];
  softBufferLimit = (int)(kvbuffer.length * spillper);
  softRecordLimit = (int)(kvoffsets.length * spillper);
  LOG.info("data buffer = " + softBufferLimit + "/" + kvbuffer.length);
  LOG.info("record buffer = " + softRecordLimit + "/" + kvoffsets.length);
  // k/v serialization
  comparator = job.getOutputKeyComparator();
  keyClass = (Class<K>)job.getMapOutputKeyClass();
  valClass = (Class<V>)job.getMapOutputValueClass();
  serializationFactory = new SerializationFactory(job);
  keySerializer = serializationFactory.getSerializer(keyClass);
  keySerializer.open(bb);
  valSerializer = serializationFactory.getSerializer(valClass);
  valSerializer.open(bb);
  // counters
  mapOutputByteCounter = reporter.getCounter(MAP_OUTPUT_BYTES);
  mapOutputRecordCounter = reporter.getCounter(MAP_OUTPUT_RECORDS);
  Counters.Counter combineInputCounter = 
    reporter.getCounter(COMBINE_INPUT_RECORDS);
  combineOutputCounter = reporter.getCounter(COMBINE_OUTPUT_RECORDS);
  // compression
  if (job.getCompressMapOutput()) {
    Class<? extends CompressionCodec> codecClass =
      job.getMapOutputCompressorClass(DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
  }
  // combiner
  combinerRunner = CombinerRunner.create(job, getTaskID(), 
                                         combineInputCounter,
                                         reporter, null);
  if (combinerRunner != null) {
    combineCollector= new CombineOutputCollector<K,V>(combineOutputCounter);
  } else {
    combineCollector = null;
  }
  minSpillsForCombine = job.getInt("min.num.spills.for.combine", 3);
  spillThread.setDaemon(true);
  spillThread.setName("SpillThread");
  spillLock.lock();
  try {
    spillThread.start();
    while (!spillThreadRunning) {
      spillDone.await();
    }
  } catch (InterruptedException e) {
    throw (IOException)new IOException("Spill thread failed to initialize"
        ).initCause(sortSpillException);
  } finally {
    spillLock.unlock();
  }
  if (sortSpillException != null) {
    throw (IOException)new IOException("Spill thread failed to initialize"
        ).initCause(sortSpillException);
  }
}
 
Example 18
Source File: SequenceFile.java    From big-c with Apache License 2.0 4 votes vote down vote up
/** Initialize. */
@SuppressWarnings("unchecked")
void init(Configuration conf, FSDataOutputStream out, boolean ownStream,
          Class keyClass, Class valClass,
          CompressionCodec codec, Metadata metadata) 
  throws IOException {
  this.conf = conf;
  this.out = out;
  this.ownOutputStream = ownStream;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.codec = codec;
  this.metadata = metadata;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  if (this.keySerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Key class: '"
            + keyClass.getCanonicalName() + "'. "
            + "Please ensure that the configuration '" +
            CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
            + "properly configured, if you're using"
            + "custom serialization.");
  }
  this.keySerializer.open(buffer);
  this.uncompressedValSerializer = serializationFactory.getSerializer(valClass);
  if (this.uncompressedValSerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Value class: '"
            + valClass.getCanonicalName() + "'. "
            + "Please ensure that the configuration '" +
            CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
            + "properly configured, if you're using"
            + "custom serialization.");
  }
  this.uncompressedValSerializer.open(buffer);
  if (this.codec != null) {
    ReflectionUtils.setConf(this.codec, this.conf);
    this.compressor = CodecPool.getCompressor(this.codec);
    this.deflateFilter = this.codec.createOutputStream(buffer, compressor);
    this.deflateOut = 
      new DataOutputStream(new BufferedOutputStream(deflateFilter));
    this.compressedValSerializer = serializationFactory.getSerializer(valClass);
    if (this.compressedValSerializer == null) {
      throw new IOException(
          "Could not find a serializer for the Value class: '"
              + valClass.getCanonicalName() + "'. "
              + "Please ensure that the configuration '" +
              CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
              + "properly configured, if you're using"
              + "custom serialization.");
    }
    this.compressedValSerializer.open(deflateOut);
  }
  writeFileHeader();
}
 
Example 19
Source File: RubixRecordWriter.java    From Cubert with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings({ "unchecked", "rawtypes" })
public RubixRecordWriter(Configuration conf,
                         FSDataOutputStream out,
                         Class keyClass,
                         Class valueClass,
                         CompressionCodec codec) throws IOException
{
    this.out = out;

    final SerializationFactory serializationFactory = new SerializationFactory(conf);
    keySerializer = serializationFactory.getSerializer(keyClass);

    ObjectMapper mapper = new ObjectMapper();
    metadataJson =
            mapper.readValue(conf.get(CubertStrings.JSON_METADATA), JsonNode.class);
    ((ObjectNode) metadataJson).put("keyClass", keyClass.getCanonicalName());
    ((ObjectNode) metadataJson).put("valueClass", valueClass.getCanonicalName());
    BlockSchema schema = new BlockSchema(metadataJson.get("schema"));

    if (conf.getBoolean(CubertStrings.USE_COMPACT_SERIALIZATION, false)
            && schema.isFlatSchema())
    {
        valueSerializer = new CompactSerializer<V>(schema);
        ((ObjectNode) metadataJson).put("serializationType",
                                        BlockSerializationType.COMPACT.toString());
    }
    else
    {
        valueSerializer = serializationFactory.getSerializer(valueClass);
        ((ObjectNode) metadataJson).put("serializationType",
                                        BlockSerializationType.DEFAULT.toString());
    }

    keySerializer.open(keySectionStream);

    if (codec == null)
    {
        valueSerializer.open(out);
        compressedStream = null;
    }
    else
    {
        compressedStream = codec.createOutputStream(out);
        valueSerializer.open(compressedStream);
    }

}
 
Example 20
Source File: JobClient.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
private <T extends org.apache.hadoop.mapreduce.InputSplit> 
int writeNewSplits(JobContext job, Path submitSplitFile
                   ) throws IOException, InterruptedException, 
                            ClassNotFoundException {
  JobConf conf = job.getJobConf();
  org.apache.hadoop.mapreduce.InputFormat<?,?> input =
    ReflectionUtils.newInstance(job.getInputFormatClass(), job.getJobConf());
  
  List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(job);
  T[] array = (T[])
    splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);

  // sort the splits into order based on size, so that the biggest
  // go first
  Arrays.sort(array, new NewSplitComparator());
  DataOutputStream out = writeSplitsFileHeader(conf, submitSplitFile, 
                                               array.length);
  try {
    if (array.length != 0) {
      DataOutputBuffer buffer = new DataOutputBuffer();
      RawSplit rawSplit = new RawSplit();
      SerializationFactory factory = new SerializationFactory(conf);
      Serializer<T> serializer = 
        factory.getSerializer((Class<T>) array[0].getClass());
      serializer.open(buffer);
      for(T split: array) {
        rawSplit.setClassName(split.getClass().getName());
        buffer.reset();
        serializer.serialize(split);
        rawSplit.setDataLength(split.getLength());
        rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
        rawSplit.setLocations(split.getLocations());
        rawSplit.write(out);
      }
      serializer.close();
    }
  } finally {
    out.close();
  }
  return array.length;
}