Java Code Examples for org.apache.tez.mapreduce.protos.MRRuntimeProtos#MRSplitProto

The following examples show how to use org.apache.tez.mapreduce.protos.MRRuntimeProtos#MRSplitProto . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: MRInputHelpers.java From tez with Apache License 2.0

6 votes

/**
 * Create an instance of {@link org.apache.hadoop.mapred.InputSplit} from the {@link
 * org.apache.tez.mapreduce.input.MRInput} representation of a split.
 *
 * @param splitProto           The {@link org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto}
 *                             instance representing the split
 * @param serializationFactory the serialization mechanism used to write out the split
 * @return an instance of the split
 * @throws java.io.IOException
 */
@SuppressWarnings("unchecked")
@InterfaceStability.Evolving
@InterfaceAudience.LimitedPrivate({"hive, pig"})
public static InputSplit createOldFormatSplitFromUserPayload(
    MRRuntimeProtos.MRSplitProto splitProto, SerializationFactory serializationFactory)
    throws IOException {
  // This may not need to use serialization factory, since OldFormat
  // always uses Writable to write splits.
  Objects.requireNonNull(splitProto, "splitProto cannot be null");
  String className = splitProto.getSplitClassName();
  Class<InputSplit> clazz;

  try {
    clazz = (Class<InputSplit>) Class.forName(className);
  } catch (ClassNotFoundException e) {
    throw new IOException("Failed to load InputSplit class: [" + className + "]", e);
  }

  Deserializer<InputSplit> deserializer = serializationFactory
      .getDeserializer(clazz);
  deserializer.open(splitProto.getSplitBytes().newInput());
  InputSplit inputSplit = deserializer.deserialize(null);
  deserializer.close();
  return inputSplit;
}

Example 2

Source File: MRInputHelpers.java From tez with Apache License 2.0

6 votes

/**
 * Create an instance of {@link org.apache.hadoop.mapreduce.InputSplit} from the {@link
 * org.apache.tez.mapreduce.input.MRInput} representation of a split.
 *
 * @param splitProto           The {@link org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto}
 *                             instance representing the split
 * @param serializationFactory the serialization mechanism used to write out the split
 * @return an instance of the split
 * @throws IOException
 */
@InterfaceStability.Evolving
@SuppressWarnings("unchecked")
public static org.apache.hadoop.mapreduce.InputSplit createNewFormatSplitFromUserPayload(
    MRRuntimeProtos.MRSplitProto splitProto, SerializationFactory serializationFactory)
    throws IOException {
  Objects.requireNonNull(splitProto, "splitProto must be specified");
  String className = splitProto.getSplitClassName();
  Class<org.apache.hadoop.mapreduce.InputSplit> clazz;

  try {
    clazz = (Class<org.apache.hadoop.mapreduce.InputSplit>) Class
        .forName(className);
  } catch (ClassNotFoundException e) {
    throw new IOException("Failed to load InputSplit class: [" + className + "]", e);
  }

  Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = serializationFactory
      .getDeserializer(clazz);
  deserializer.open(splitProto.getSplitBytes().newInput());
  org.apache.hadoop.mapreduce.InputSplit inputSplit = deserializer
      .deserialize(null);
  deserializer.close();
  return inputSplit;
}

Example 3

Source File: MRInputHelpers.java From tez with Apache License 2.0

6 votes

@InterfaceStability.Evolving
public static <T extends org.apache.hadoop.mapreduce.InputSplit> MRRuntimeProtos.MRSplitProto createSplitProto(
    T newSplit, SerializationFactory serializationFactory)
    throws IOException, InterruptedException {
  MRRuntimeProtos.MRSplitProto.Builder builder = MRRuntimeProtos.MRSplitProto
      .newBuilder();

  builder.setSplitClassName(newSplit.getClass().getName());

  @SuppressWarnings("unchecked")
  Serializer<T> serializer = serializationFactory
      .getSerializer((Class<T>) newSplit.getClass());
  ByteString.Output out = ByteString
      .newOutput(SPLIT_SERIALIZED_LENGTH_ESTIMATE);
  serializer.open(out);
  serializer.serialize(newSplit);
  // TODO MR Compat: Check against max block locations per split.
  ByteString splitBs = out.toByteString();
  builder.setSplitBytes(splitBs);

  return builder.build();
}

Example 4

Source File: MRInputHelpers.java From tez with Apache License 2.0

6 votes

@InterfaceStability.Evolving
@InterfaceAudience.LimitedPrivate({"hive, pig"})
public static MRRuntimeProtos.MRSplitProto createSplitProto(
    org.apache.hadoop.mapred.InputSplit oldSplit) throws IOException {
  MRRuntimeProtos.MRSplitProto.Builder builder = MRRuntimeProtos.MRSplitProto.newBuilder();

  builder.setSplitClassName(oldSplit.getClass().getName());

  ByteString.Output os = ByteString
      .newOutput(SPLIT_SERIALIZED_LENGTH_ESTIMATE);
  oldSplit.write(new NonSyncDataOutputStream(os));
  ByteString splitBs = os.toByteString();
  builder.setSplitBytes(splitBs);

  return builder.build();
}

Example 5

Source File: TestMRInput.java From tez with Apache License 2.0

4 votes

@Test(timeout = 5000)
public void testAttributesInJobConf() throws Exception {
  InputContext inputContext = mock(InputContext.class);
  doReturn(TEST_ATTRIBUTES_DAG_INDEX).when(inputContext).getDagIdentifier();
  doReturn(TEST_ATTRIBUTES_VERTEX_INDEX).when(inputContext).getTaskVertexIndex();
  doReturn(TEST_ATTRIBUTES_TASK_INDEX).when(inputContext).getTaskIndex();
  doReturn(TEST_ATTRIBUTES_TASK_ATTEMPT_INDEX).when(inputContext).getTaskAttemptNumber();
  doReturn(TEST_ATTRIBUTES_INPUT_INDEX).when(inputContext).getInputIndex();
  doReturn(TEST_ATTRIBUTES_DAG_ATTEMPT_NUMBER).when(inputContext).getDAGAttemptNumber();
  doReturn(TEST_ATTRIBUTES_DAG_NAME).when(inputContext).getDAGName();
  doReturn(TEST_ATTRIBUTES_VERTEX_NAME).when(inputContext).getTaskVertexName();
  doReturn(TEST_ATTRIBUTES_INPUT_NAME).when(inputContext).getSourceVertexName();
  doReturn(TEST_ATTRIBUTES_APPLICATION_ID).when(inputContext).getApplicationId();
  doReturn(TEST_ATTRIBUTES_UNIQUE_IDENTIFIER).when(inputContext).getUniqueIdentifier();
  doReturn(new Configuration(false)).when(inputContext).getContainerConfiguration();


  DataSourceDescriptor dsd = MRInput.createConfigBuilder(new Configuration(false),
      TestInputFormat.class).groupSplits(false).build();

  doReturn(dsd.getInputDescriptor().getUserPayload()).when(inputContext).getUserPayload();
  doReturn(new TezCounters()).when(inputContext).getCounters();


  MRInput mrInput = new MRInput(inputContext, 1);
  mrInput.initialize();

  MRRuntimeProtos.MRSplitProto splitProto =
      MRRuntimeProtos.MRSplitProto.newBuilder().setSplitClassName(TestInputSplit.class.getName())
          .build();
  InputDataInformationEvent diEvent = InputDataInformationEvent
      .createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());

  List<Event> events = new LinkedList<>();
  events.add(diEvent);
  mrInput.handleEvents(events);
  TezCounter counter = mrInput.getContext().getCounters()
      .findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES);
  assertEquals(counter.getValue(), TestInputSplit.length);
  assertTrue(TestInputFormat.invoked.get());
}