Java Code Examples for org.apache.hadoop.io.serializer.Deserializer

The following examples show how to use org.apache.hadoop.io.serializer.Deserializer. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop   Source File: TestWritableJobConf.java    License: Apache License 2.0 6 votes vote down vote up
private <K> K serDeser(K conf) throws Exception {
  SerializationFactory factory = new SerializationFactory(CONF);
  Serializer<K> serializer =
    factory.getSerializer(GenericsUtil.getClass(conf));
  Deserializer<K> deserializer =
    factory.getDeserializer(GenericsUtil.getClass(conf));

  DataOutputBuffer out = new DataOutputBuffer();
  serializer.open(out);
  serializer.serialize(conf);
  serializer.close();

  DataInputBuffer in = new DataInputBuffer();
  in.reset(out.getData(), out.getLength());
  deserializer.open(in);
  K after = deserializer.deserialize(null);
  deserializer.close();
  return after;
}
 
Example 2
Source Project: hadoop   Source File: Chain.java    License: Apache License 2.0 6 votes vote down vote up
private <E> E makeCopyForPassByValue(Serialization<E> serialization,
                                      E obj) throws IOException {
  Serializer<E> ser =
    serialization.getSerializer(GenericsUtil.getClass(obj));
  Deserializer<E> deser =
    serialization.getDeserializer(GenericsUtil.getClass(obj));

  DataOutputBuffer dof = threadLocalDataOutputBuffer.get();

  dof.reset();
  ser.open(dof);
  ser.serialize(obj);
  ser.close();
  obj = ReflectionUtils.newInstance(GenericsUtil.getClass(obj),
                                    getChainJobConf());
  ByteArrayInputStream bais =
    new ByteArrayInputStream(dof.getData(), 0, dof.getLength());
  deser.open(bais);
  deser.deserialize(obj);
  deser.close();
  return obj;
}
 
Example 3
Source Project: hadoop   Source File: ReflectionUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Make a copy of the writable object using serialization to a buffer
 * @param src the object to copy from
 * @param dst the object to copy into, which is destroyed
 * @return dst param (the copy)
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public static <T> T copy(Configuration conf, 
                              T src, T dst) throws IOException {
  CopyInCopyOutBuffer buffer = cloneBuffers.get();
  buffer.outBuffer.reset();
  SerializationFactory factory = getFactory(conf);
  Class<T> cls = (Class<T>) src.getClass();
  Serializer<T> serializer = factory.getSerializer(cls);
  serializer.open(buffer.outBuffer);
  serializer.serialize(src);
  buffer.moveData();
  Deserializer<T> deserializer = factory.getDeserializer(cls);
  deserializer.open(buffer.inBuffer);
  dst = deserializer.deserialize(dst);
  return dst;
}
 
Example 4
Source Project: big-c   Source File: TestWritableJobConf.java    License: Apache License 2.0 6 votes vote down vote up
private <K> K serDeser(K conf) throws Exception {
  SerializationFactory factory = new SerializationFactory(CONF);
  Serializer<K> serializer =
    factory.getSerializer(GenericsUtil.getClass(conf));
  Deserializer<K> deserializer =
    factory.getDeserializer(GenericsUtil.getClass(conf));

  DataOutputBuffer out = new DataOutputBuffer();
  serializer.open(out);
  serializer.serialize(conf);
  serializer.close();

  DataInputBuffer in = new DataInputBuffer();
  in.reset(out.getData(), out.getLength());
  deserializer.open(in);
  K after = deserializer.deserialize(null);
  deserializer.close();
  return after;
}
 
Example 5
Source Project: big-c   Source File: Chain.java    License: Apache License 2.0 6 votes vote down vote up
private <E> E makeCopyForPassByValue(Serialization<E> serialization,
                                      E obj) throws IOException {
  Serializer<E> ser =
    serialization.getSerializer(GenericsUtil.getClass(obj));
  Deserializer<E> deser =
    serialization.getDeserializer(GenericsUtil.getClass(obj));

  DataOutputBuffer dof = threadLocalDataOutputBuffer.get();

  dof.reset();
  ser.open(dof);
  ser.serialize(obj);
  ser.close();
  obj = ReflectionUtils.newInstance(GenericsUtil.getClass(obj),
                                    getChainJobConf());
  ByteArrayInputStream bais =
    new ByteArrayInputStream(dof.getData(), 0, dof.getLength());
  deser.open(bais);
  deser.deserialize(obj);
  deser.close();
  return obj;
}
 
Example 6
Source Project: big-c   Source File: ReflectionUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Make a copy of the writable object using serialization to a buffer
 * @param src the object to copy from
 * @param dst the object to copy into, which is destroyed
 * @return dst param (the copy)
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public static <T> T copy(Configuration conf, 
                              T src, T dst) throws IOException {
  CopyInCopyOutBuffer buffer = cloneBuffers.get();
  buffer.outBuffer.reset();
  SerializationFactory factory = getFactory(conf);
  Class<T> cls = (Class<T>) src.getClass();
  Serializer<T> serializer = factory.getSerializer(cls);
  serializer.open(buffer.outBuffer);
  serializer.serialize(src);
  buffer.moveData();
  Deserializer<T> deserializer = factory.getDeserializer(cls);
  deserializer.open(buffer.inBuffer);
  dst = deserializer.deserialize(dst);
  return dst;
}
 
Example 7
Source Project: attic-apex-malhar   Source File: OutputCollectorImpl.java    License: Apache License 2.0 6 votes vote down vote up
private <T> T cloneObj(T t) throws IOException
{
  Serializer<T> keySerializer;
  Class<T> keyClass;
  PipedInputStream pis = new PipedInputStream();
  PipedOutputStream pos = new PipedOutputStream(pis);
  keyClass = (Class<T>)t.getClass();
  keySerializer = serializationFactory.getSerializer(keyClass);
  keySerializer.open(pos);
  keySerializer.serialize(t);
  Deserializer<T> keyDesiralizer = serializationFactory.getDeserializer(keyClass);
  keyDesiralizer.open(pis);
  T clonedArg0 = keyDesiralizer.deserialize(null);
  pos.close();
  pis.close();
  keySerializer.close();
  keyDesiralizer.close();
  return clonedArg0;

}
 
Example 8
Source Project: RDFS   Source File: Chain.java    License: Apache License 2.0 6 votes vote down vote up
private <E> E makeCopyForPassByValue(Serialization<E> serialization,
                                      E obj) throws IOException {
  Serializer<E> ser =
    serialization.getSerializer(GenericsUtil.getClass(obj));
  Deserializer<E> deser =
    serialization.getDeserializer(GenericsUtil.getClass(obj));

  DataOutputBuffer dof = threadLocalDataOutputBuffer.get();

  dof.reset();
  ser.open(dof);
  ser.serialize(obj);
  ser.close();
  obj = ReflectionUtils.newInstance(GenericsUtil.getClass(obj),
                                    getChainJobConf());
  ByteArrayInputStream bais =
    new ByteArrayInputStream(dof.getData(), 0, dof.getLength());
  deser.open(bais);
  deser.deserialize(obj);
  deser.close();
  return obj;
}
 
Example 9
Source Project: RDFS   Source File: TestWritableJobConf.java    License: Apache License 2.0 6 votes vote down vote up
private <K> K serDeser(K conf) throws Exception {
  SerializationFactory factory = new SerializationFactory(CONF);
  Serializer<K> serializer =
    factory.getSerializer(GenericsUtil.getClass(conf));
  Deserializer<K> deserializer =
    factory.getDeserializer(GenericsUtil.getClass(conf));

  DataOutputBuffer out = new DataOutputBuffer();
  serializer.open(out);
  serializer.serialize(conf);
  serializer.close();

  DataInputBuffer in = new DataInputBuffer();
  in.reset(out.getData(), out.getLength());
  deserializer.open(in);
  K after = deserializer.deserialize(null);
  deserializer.close();
  return after;
}
 
Example 10
Source Project: RDFS   Source File: ReflectionUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Make a copy of the writable object using serialization to a buffer
 * @param dst the object to copy from
 * @param src the object to copy into, which is destroyed
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public static <T> T copy(Configuration conf, 
                              T src, T dst) throws IOException {
  CopyInCopyOutBuffer buffer = cloneBuffers.get();
  buffer.outBuffer.reset();
  SerializationFactory factory = getFactory(conf);
  Class<T> cls = (Class<T>) src.getClass();
  Serializer<T> serializer = factory.getSerializer(cls);
  serializer.open(buffer.outBuffer);
  serializer.serialize(src);
  buffer.moveData();
  Deserializer<T> deserializer = factory.getDeserializer(cls);
  deserializer.open(buffer.inBuffer);
  dst = deserializer.deserialize(dst);
  return dst;
}
 
Example 11
Source Project: incubator-tez   Source File: MRHelpers.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public static InputSplit createOldFormatSplitFromUserPayload(
    MRSplitProto splitProto, SerializationFactory serializationFactory)
    throws IOException {
  // This may not need to use serialization factory, since OldFormat
  // always uses Writable to write splits.
  Preconditions.checkNotNull(splitProto, "splitProto cannot be null");
  String className = splitProto.getSplitClassName();
  Class<InputSplit> clazz;

  try {
    clazz = (Class<InputSplit>) Class.forName(className);
  } catch (ClassNotFoundException e) {
    throw new IOException("Failed to load InputSplit class: [" + className + "]", e);
  }

  Deserializer<InputSplit> deserializer = serializationFactory
      .getDeserializer(clazz);
  deserializer.open(splitProto.getSplitBytes().newInput());
  InputSplit inputSplit = deserializer.deserialize(null);
  deserializer.close();
  return inputSplit;
}
 
Example 12
Source Project: incubator-tez   Source File: MRHelpers.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public static org.apache.hadoop.mapreduce.InputSplit createNewFormatSplitFromUserPayload(
    MRSplitProto splitProto, SerializationFactory serializationFactory)
    throws IOException {
  Preconditions.checkNotNull(splitProto, "splitProto must be specified");
  String className = splitProto.getSplitClassName();
  Class<org.apache.hadoop.mapreduce.InputSplit> clazz;

  try {
    clazz = (Class<org.apache.hadoop.mapreduce.InputSplit>) Class
        .forName(className);
  } catch (ClassNotFoundException e) {
    throw new IOException("Failed to load InputSplit class: [" + className + "]", e);
  }

  Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = serializationFactory
      .getDeserializer(clazz);
  deserializer.open(splitProto.getSplitBytes().newInput());
  org.apache.hadoop.mapreduce.InputSplit inputSplit = deserializer
      .deserialize(null);
  deserializer.close();
  return inputSplit;
}
 
Example 13
Source Project: tez   Source File: MRInputHelpers.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create an instance of {@link org.apache.hadoop.mapred.InputSplit} from the {@link
 * org.apache.tez.mapreduce.input.MRInput} representation of a split.
 *
 * @param splitProto           The {@link org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto}
 *                             instance representing the split
 * @param serializationFactory the serialization mechanism used to write out the split
 * @return an instance of the split
 * @throws java.io.IOException
 */
@SuppressWarnings("unchecked")
@InterfaceStability.Evolving
@InterfaceAudience.LimitedPrivate({"hive, pig"})
public static InputSplit createOldFormatSplitFromUserPayload(
    MRRuntimeProtos.MRSplitProto splitProto, SerializationFactory serializationFactory)
    throws IOException {
  // This may not need to use serialization factory, since OldFormat
  // always uses Writable to write splits.
  Objects.requireNonNull(splitProto, "splitProto cannot be null");
  String className = splitProto.getSplitClassName();
  Class<InputSplit> clazz;

  try {
    clazz = (Class<InputSplit>) Class.forName(className);
  } catch (ClassNotFoundException e) {
    throw new IOException("Failed to load InputSplit class: [" + className + "]", e);
  }

  Deserializer<InputSplit> deserializer = serializationFactory
      .getDeserializer(clazz);
  deserializer.open(splitProto.getSplitBytes().newInput());
  InputSplit inputSplit = deserializer.deserialize(null);
  deserializer.close();
  return inputSplit;
}
 
Example 14
Source Project: tez   Source File: MRInputHelpers.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create an instance of {@link org.apache.hadoop.mapreduce.InputSplit} from the {@link
 * org.apache.tez.mapreduce.input.MRInput} representation of a split.
 *
 * @param splitProto           The {@link org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto}
 *                             instance representing the split
 * @param serializationFactory the serialization mechanism used to write out the split
 * @return an instance of the split
 * @throws IOException
 */
@InterfaceStability.Evolving
@SuppressWarnings("unchecked")
public static org.apache.hadoop.mapreduce.InputSplit createNewFormatSplitFromUserPayload(
    MRRuntimeProtos.MRSplitProto splitProto, SerializationFactory serializationFactory)
    throws IOException {
  Objects.requireNonNull(splitProto, "splitProto must be specified");
  String className = splitProto.getSplitClassName();
  Class<org.apache.hadoop.mapreduce.InputSplit> clazz;

  try {
    clazz = (Class<org.apache.hadoop.mapreduce.InputSplit>) Class
        .forName(className);
  } catch (ClassNotFoundException e) {
    throw new IOException("Failed to load InputSplit class: [" + className + "]", e);
  }

  Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = serializationFactory
      .getDeserializer(clazz);
  deserializer.open(splitProto.getSplitBytes().newInput());
  org.apache.hadoop.mapreduce.InputSplit inputSplit = deserializer
      .deserialize(null);
  deserializer.close();
  return inputSplit;
}
 
Example 15
Source Project: hadoop-gpu   Source File: Chain.java    License: Apache License 2.0 6 votes vote down vote up
private <E> E makeCopyForPassByValue(Serialization<E> serialization,
                                      E obj) throws IOException {
  Serializer<E> ser =
    serialization.getSerializer(GenericsUtil.getClass(obj));
  Deserializer<E> deser =
    serialization.getDeserializer(GenericsUtil.getClass(obj));

  DataOutputBuffer dof = threadLocalDataOutputBuffer.get();

  dof.reset();
  ser.open(dof);
  ser.serialize(obj);
  ser.close();
  obj = ReflectionUtils.newInstance(GenericsUtil.getClass(obj),
                                    getChainJobConf());
  ByteArrayInputStream bais =
    new ByteArrayInputStream(dof.getData(), 0, dof.getLength());
  deser.open(bais);
  deser.deserialize(obj);
  deser.close();
  return obj;
}
 
Example 16
Source Project: hadoop-gpu   Source File: TestWritableJobConf.java    License: Apache License 2.0 6 votes vote down vote up
private <K> K serDeser(K conf) throws Exception {
  SerializationFactory factory = new SerializationFactory(CONF);
  Serializer<K> serializer =
    factory.getSerializer(GenericsUtil.getClass(conf));
  Deserializer<K> deserializer =
    factory.getDeserializer(GenericsUtil.getClass(conf));

  DataOutputBuffer out = new DataOutputBuffer();
  serializer.open(out);
  serializer.serialize(conf);
  serializer.close();

  DataInputBuffer in = new DataInputBuffer();
  in.reset(out.getData(), out.getLength());
  deserializer.open(in);
  K after = deserializer.deserialize(null);
  deserializer.close();
  return after;
}
 
Example 17
Source Project: hadoop-gpu   Source File: ReflectionUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Make a copy of the writable object using serialization to a buffer
 * @param dst the object to copy from
 * @param src the object to copy into, which is destroyed
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public static <T> T copy(Configuration conf, 
                              T src, T dst) throws IOException {
  CopyInCopyOutBuffer buffer = cloneBuffers.get();
  buffer.outBuffer.reset();
  SerializationFactory factory = getFactory(conf);
  Class<T> cls = (Class<T>) src.getClass();
  Serializer<T> serializer = factory.getSerializer(cls);
  serializer.open(buffer.outBuffer);
  serializer.serialize(src);
  buffer.moveData();
  Deserializer<T> deserializer = factory.getDeserializer(cls);
  deserializer.open(buffer.inBuffer);
  dst = deserializer.deserialize(dst);
  return dst;
}
 
Example 18
Source Project: hadoop   Source File: MapTask.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private <T> T getSplitDetails(Path file, long offset) 
 throws IOException {
  FileSystem fs = file.getFileSystem(conf);
  FSDataInputStream inFile = fs.open(file);
  inFile.seek(offset);
  String className = StringInterner.weakIntern(Text.readString(inFile));
  Class<T> cls;
  try {
    cls = (Class<T>) conf.getClassByName(className);
  } catch (ClassNotFoundException ce) {
    IOException wrap = new IOException("Split class " + className + 
                                        " not found");
    wrap.initCause(ce);
    throw wrap;
  }
  SerializationFactory factory = new SerializationFactory(conf);
  Deserializer<T> deserializer = 
    (Deserializer<T>) factory.getDeserializer(cls);
  deserializer.open(inFile);
  T split = deserializer.deserialize(null);
  long pos = inFile.getPos();
  getCounters().findCounter(
      TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset);
  inFile.close();
  return split;
}
 
Example 19
Source Project: big-c   Source File: MapTask.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private <T> T getSplitDetails(Path file, long offset) 
 throws IOException {
  FileSystem fs = file.getFileSystem(conf);
  FSDataInputStream inFile = fs.open(file);
  inFile.seek(offset);
  String className = StringInterner.weakIntern(Text.readString(inFile));
  Class<T> cls;
  try {
    cls = (Class<T>) conf.getClassByName(className);
  } catch (ClassNotFoundException ce) {
    IOException wrap = new IOException("Split class " + className + 
                                        " not found");
    wrap.initCause(ce);
    throw wrap;
  }
  SerializationFactory factory = new SerializationFactory(conf);
  Deserializer<T> deserializer = 
    (Deserializer<T>) factory.getDeserializer(cls);
  deserializer.open(inFile);
  T split = deserializer.deserialize(null);
  long pos = inFile.getPos();
  getCounters().findCounter(
      TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset);
  inFile.close();
  return split;
}
 
Example 20
Source Project: ignite   Source File: HadoopV2JobSelfTest.java    License: Apache License 2.0 5 votes vote down vote up
/** {@inheritDoc} */
@Override public Deserializer<Writable> getDeserializer(Class<Writable> c) {
    return new Deserializer<Writable>() {
        @Override public void open(InputStream in) { }

        @Override public Writable deserialize(Writable writable) {
            return new Text(TEST_SERIALIZED_VALUE);
        }

        @Override public void close() { }
    };
}
 
Example 21
Source Project: Cubert   Source File: RubixInputSplit.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void readFields(DataInput in) throws IOException
{
    int keyBytesLen = in.readInt();
    byte[] keyBytes = new byte[keyBytesLen];
    in.readFully(keyBytes, 0, keyBytesLen);

    filename = new Path(in.readUTF());
    offset = in.readLong();
    length = in.readLong();
    blockId = in.readLong();
    numRecords = in.readLong();
    try
    {
        keyClass = (Class<K>) ClassCache.forName(in.readUTF());
        valueClass = (Class<V>) ClassCache.forName(in.readUTF());

        SerializationFactory serializationFactory = new SerializationFactory(conf);
        Deserializer<K> keyDeserializer =
                serializationFactory.getDeserializer(keyClass);

        ByteArrayInputStream bis = new ByteArrayInputStream(keyBytes);
        keyDeserializer.open(bis);

        key = keyDeserializer.deserialize(null);

        ObjectMapper mapper = new ObjectMapper();
        schema = new BlockSchema(mapper.readValue(in.readUTF(), JsonNode.class));
        blockSerializationType = BlockSerializationType.values()[in.readInt()];
    }
    catch (ClassNotFoundException e)
    {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
 
Example 22
Source Project: hbase   Source File: ResultSerialization.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Deserializer<Result> getDeserializer(Class<Result> c) {
  // check input format version
  Configuration conf = getConf();
  if (conf != null) {
    String inputVersion = conf.get(IMPORT_FORMAT_VER);
    if (inputVersion != null && inputVersion.equals("0.94")) {
      LOG.info("Load exported file using deserializer for HBase 0.94 format");
      return new Result94Deserializer();
    }
  }

  return new ResultDeserializer();
}
 
Example 23
Source Project: spork   Source File: PigSplit.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void readFields(DataInput is) throws IOException {
    disableCounter = is.readBoolean();
    isMultiInputs = is.readBoolean();
    totalSplits = is.readInt();
    splitIndex = is.readInt();
    inputIndex = is.readInt();
    targetOps = (ArrayList<OperatorKey>) readObject(is);
    int splitLen = is.readInt();
    int distinctSplitClassCount = is.readInt();
    //construct the input split class name list
    String[] distinctSplitClassName = new String[distinctSplitClassCount];
    for (int i = 0; i < distinctSplitClassCount; i++) {
        distinctSplitClassName[i] = is.readUTF();
    }
    try {
        SerializationFactory sf = new SerializationFactory(conf);
        // The correct call sequence for Deserializer is, we shall open, then deserialize, but we shall not close
        wrappedSplits = new InputSplit[splitLen];
        for (int i = 0; i < splitLen; i++)
        {
            //read the className index
            int index = is.readInt();
            //get the split class name
            String splitClassName = distinctSplitClassName[index];
            Class splitClass = conf.getClassByName(splitClassName);
            Deserializer d = sf.getDeserializer(splitClass);
            d.open((InputStream) is);
            wrappedSplits[i] = (InputSplit)ReflectionUtils.newInstance(splitClass, conf);
            d.deserialize(wrappedSplits[i]);
        }
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}
 
Example 24
Source Project: incubator-tez   Source File: TestIFile.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Data verification
 *
 * @param reader
 * @param data
 * @throws IOException
 */
private void verifyData(Reader reader, List<KVPair> data)
    throws IOException {
  LOG.info("Data verification");
  Text readKey = new Text();
  IntWritable readValue = new IntWritable();
  DataInputBuffer keyIn = new DataInputBuffer();
  DataInputBuffer valIn = new DataInputBuffer();
  Deserializer<Text> keyDeserializer;
  Deserializer<IntWritable> valDeserializer;
  SerializationFactory serializationFactory = new SerializationFactory(
      defaultConf);
  keyDeserializer = serializationFactory.getDeserializer(Text.class);
  valDeserializer = serializationFactory.getDeserializer(IntWritable.class);
  keyDeserializer.open(keyIn);
  valDeserializer.open(valIn);

  int numRecordsRead = 0;

  while (reader.nextRawKey(keyIn)) {
    reader.nextRawValue(valIn);
    readKey = keyDeserializer.deserialize(readKey);
    readValue = valDeserializer.deserialize(readValue);

    KVPair expected = data.get(numRecordsRead);
    assertEquals("Key does not match: Expected: " + expected.getKey()
        + ", Read: " + readKey, expected.getKey(), readKey);
    assertEquals("Value does not match: Expected: " + expected.getvalue()
        + ", Read: " + readValue, expected.getvalue(), readValue);

    numRecordsRead++;
  }
  assertEquals("Expected: " + data.size() + " records, but found: "
      + numRecordsRead, data.size(), numRecordsRead);
  LOG.info("Found: " + numRecordsRead + " records");
}
 
Example 25
Source Project: incubator-tez   Source File: MRInputUtils.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static org.apache.hadoop.mapreduce.InputSplit getNewSplitDetailsFromDisk(
    TaskSplitIndex splitMetaInfo, JobConf jobConf, TezCounter splitBytesCounter)
    throws IOException {
  Path file = new Path(splitMetaInfo.getSplitLocation());
  long offset = splitMetaInfo.getStartOffset();

  // Split information read from local filesystem.
  FileSystem fs = FileSystem.getLocal(jobConf);
  file = fs.makeQualified(file);
  LOG.info("Reading input split file from : " + file);
  FSDataInputStream inFile = fs.open(file);
  inFile.seek(offset);
  String className = Text.readString(inFile);
  Class<org.apache.hadoop.mapreduce.InputSplit> cls;
  try {
    cls = (Class<org.apache.hadoop.mapreduce.InputSplit>) jobConf.getClassByName(className);
  } catch (ClassNotFoundException ce) {
    IOException wrap = new IOException("Split class " + className + " not found");
    wrap.initCause(ce);
    throw wrap;
  }
  SerializationFactory factory = new SerializationFactory(jobConf);
  Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapreduce.InputSplit>) factory
      .getDeserializer(cls);
  deserializer.open(inFile);
  org.apache.hadoop.mapreduce.InputSplit split = deserializer.deserialize(null);
  long pos = inFile.getPos();
  if (splitBytesCounter != null) {
    splitBytesCounter.increment(pos - offset);
  }
  inFile.close();
  return split;
}
 
Example 26
Source Project: incubator-tez   Source File: MRInputUtils.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static InputSplit getOldSplitDetailsFromDisk(TaskSplitIndex splitMetaInfo,
    JobConf jobConf, TezCounter splitBytesCounter) throws IOException {
  Path file = new Path(splitMetaInfo.getSplitLocation());
  FileSystem fs = FileSystem.getLocal(jobConf);
  file = fs.makeQualified(file);
  LOG.info("Reading input split file from : " + file);
  long offset = splitMetaInfo.getStartOffset();

  FSDataInputStream inFile = fs.open(file);
  inFile.seek(offset);
  String className = Text.readString(inFile);
  Class<org.apache.hadoop.mapred.InputSplit> cls;
  try {
    cls = (Class<org.apache.hadoop.mapred.InputSplit>) jobConf.getClassByName(className);
  } catch (ClassNotFoundException ce) {
    IOException wrap = new IOException("Split class " + className + " not found");
    wrap.initCause(ce);
    throw wrap;
  }
  SerializationFactory factory = new SerializationFactory(jobConf);
  Deserializer<org.apache.hadoop.mapred.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapred.InputSplit>) factory
      .getDeserializer(cls);
  deserializer.open(inFile);
  org.apache.hadoop.mapred.InputSplit split = deserializer.deserialize(null);
  long pos = inFile.getPos();
  if (splitBytesCounter != null) {
    splitBytesCounter.increment(pos - offset);
  }
  inFile.close();
  return split;
}
 
Example 27
Source Project: tez   Source File: TestPipelinedSorter.java    License: Apache License 2.0 5 votes vote down vote up
private void verifyData(IFile.Reader reader)
    throws IOException {
  Text readKey = new Text();
  Text readValue = new Text();
  DataInputBuffer keyIn = new DataInputBuffer();
  DataInputBuffer valIn = new DataInputBuffer();
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  Deserializer<Text> keyDeserializer = serializationFactory.getDeserializer(Text.class);
  Deserializer<Text> valDeserializer = serializationFactory.getDeserializer(Text.class);
  keyDeserializer.open(keyIn);
  valDeserializer.open(valIn);

  int numRecordsRead = 0;

  for (Map.Entry<String, String> entry : sortedDataMap.entrySet()) {
    String key = entry.getKey();
    String val = entry.getValue();
    if (reader.nextRawKey(keyIn)) {
      reader.nextRawValue(valIn);
      readKey = keyDeserializer.deserialize(readKey);
      readValue = valDeserializer.deserialize(readValue);
      Assert.assertTrue(key.equalsIgnoreCase(readKey.toString()));
      Assert.assertTrue(val.equalsIgnoreCase(readValue.toString()));
      numRecordsRead++;
    }
  }
  Assert.assertTrue(numRecordsRead == sortedDataMap.size());
}
 
Example 28
Source Project: tez   Source File: TestIFile.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Data verification
 *
 * @param reader
 * @param data
 * @throws IOException
 */
private void verifyData(Reader reader, List<KVPair> data)
    throws IOException {
  LOG.info("Data verification");
  Text readKey = new Text();
  IntWritable readValue = new IntWritable();
  DataInputBuffer keyIn = new DataInputBuffer();
  DataInputBuffer valIn = new DataInputBuffer();
  Deserializer<Text> keyDeserializer;
  Deserializer<IntWritable> valDeserializer;
  SerializationFactory serializationFactory = new SerializationFactory(
      defaultConf);
  keyDeserializer = serializationFactory.getDeserializer(Text.class);
  valDeserializer = serializationFactory.getDeserializer(IntWritable.class);
  keyDeserializer.open(keyIn);
  valDeserializer.open(valIn);

  int numRecordsRead = 0;

  while (reader.nextRawKey(keyIn)) {
    reader.nextRawValue(valIn);
    readKey = keyDeserializer.deserialize(readKey);
    readValue = valDeserializer.deserialize(readValue);

    KVPair expected = data.get(numRecordsRead);
    assertEquals("Key does not match: Expected: " + expected.getKey()
        + ", Read: " + readKey, expected.getKey(), readKey);
    assertEquals("Value does not match: Expected: " + expected.getvalue()
        + ", Read: " + readValue, expected.getvalue(), readValue);

    numRecordsRead++;
  }
  assertEquals("Expected: " + data.size() + " records, but found: "
      + numRecordsRead, data.size(), numRecordsRead);
  LOG.info("Found: " + numRecordsRead + " records");
}
 
Example 29
Source Project: tez   Source File: MRInputUtils.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static org.apache.hadoop.mapreduce.InputSplit getNewSplitDetailsFromDisk(
    TaskSplitIndex splitMetaInfo, JobConf jobConf, TezCounter splitBytesCounter)
    throws IOException {
  Path file = new Path(splitMetaInfo.getSplitLocation());
  long offset = splitMetaInfo.getStartOffset();

  // Split information read from local filesystem.
  FileSystem fs = FileSystem.getLocal(jobConf);
  file = fs.makeQualified(file);
  LOG.info("Reading input split file from : " + file);
  FSDataInputStream inFile = fs.open(file);
  inFile.seek(offset);
  String className = Text.readString(inFile);
  Class<org.apache.hadoop.mapreduce.InputSplit> cls;
  try {
    cls = (Class<org.apache.hadoop.mapreduce.InputSplit>) jobConf.getClassByName(className);
  } catch (ClassNotFoundException ce) {
    IOException wrap = new IOException("Split class " + className + " not found");
    wrap.initCause(ce);
    throw wrap;
  }
  SerializationFactory factory = new SerializationFactory(jobConf);
  Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapreduce.InputSplit>) factory
      .getDeserializer(cls);
  deserializer.open(inFile);
  org.apache.hadoop.mapreduce.InputSplit split = deserializer.deserialize(null);
  long pos = inFile.getPos();
  if (splitBytesCounter != null) {
    splitBytesCounter.increment(pos - offset);
  }
  inFile.close();
  return split;
}
 
Example 30
Source Project: tez   Source File: MRInputUtils.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static InputSplit getOldSplitDetailsFromDisk(TaskSplitIndex splitMetaInfo,
    JobConf jobConf, TezCounter splitBytesCounter) throws IOException {
  Path file = new Path(splitMetaInfo.getSplitLocation());
  FileSystem fs = FileSystem.getLocal(jobConf);
  file = fs.makeQualified(file);
  LOG.info("Reading input split file from : " + file);
  long offset = splitMetaInfo.getStartOffset();

  FSDataInputStream inFile = fs.open(file);
  inFile.seek(offset);
  String className = Text.readString(inFile);
  Class<org.apache.hadoop.mapred.InputSplit> cls;
  try {
    cls = (Class<org.apache.hadoop.mapred.InputSplit>) jobConf.getClassByName(className);
  } catch (ClassNotFoundException ce) {
    IOException wrap = new IOException("Split class " + className + " not found");
    wrap.initCause(ce);
    throw wrap;
  }
  SerializationFactory factory = new SerializationFactory(jobConf);
  Deserializer<org.apache.hadoop.mapred.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapred.InputSplit>) factory
      .getDeserializer(cls);
  deserializer.open(inFile);
  org.apache.hadoop.mapred.InputSplit split = deserializer.deserialize(null);
  long pos = inFile.getPos();
  if (splitBytesCounter != null) {
    splitBytesCounter.increment(pos - offset);
  }
  inFile.close();
  return split;
}