Java Code Examples for org.apache.tez.mapreduce.hadoop.MRHelpers#createSplitProto()

The following examples show how to use org.apache.tez.mapreduce.hadoop.MRHelpers#createSplitProto() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMultiMRInput.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 5000)
public void testSingleSplit() throws Exception {

  Path workDir = new Path(TEST_ROOT_DIR, "testSingleSplit");
  JobConf jobConf = new JobConf(defaultConf);
  jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
  FileInputFormat.setInputPaths(jobConf, workDir);

  MRInputUserPayloadProto.Builder builder = MRInputUserPayloadProto.newBuilder();
  builder.setInputFormatName(SequenceFileInputFormat.class.getName());
  builder.setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf));
  byte[] payload = builder.build().toByteArray();

  TezInputContext inputContext = createTezInputContext(payload);

  MultiMRInput input = new MultiMRInput();
  input.setNumPhysicalInputs(1);
  input.initialize(inputContext);
  List<Event> eventList = new ArrayList<Event>();

  String file1 = "file1";
  LinkedHashMap<LongWritable, Text> data1 = createInputData(localFs, workDir, jobConf, file1, 0,
      10);
  SequenceFileInputFormat<LongWritable, Text> format =
      new SequenceFileInputFormat<LongWritable, Text>();
  InputSplit[] splits = format.getSplits(jobConf, 1);
  assertEquals(1, splits.length);

  MRSplitProto splitProto = MRHelpers.createSplitProto(splits[0]);
  RootInputDataInformationEvent event = new RootInputDataInformationEvent(0,
      splitProto.toByteArray());

  eventList.clear();
  eventList.add(event);
  input.handleEvents(eventList);

  int readerCount = 0;
  for (KeyValueReader reader : input.getKeyValueReaders()) {
    readerCount++;
    while (reader.next()) {
      if (data1.size() == 0) {
        fail("Found more records than expected");
      }
      Object key = reader.getCurrentKey();
      Object val = reader.getCurrentValue();
      assertEquals(val, data1.remove(key));
    }
  }
  assertEquals(1, readerCount);
}
 
Example 2
Source File: TestMultiMRInput.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 5000)
public void testMultipleSplits() throws Exception {

  Path workDir = new Path(TEST_ROOT_DIR, "testMultipleSplits");
  JobConf jobConf = new JobConf(defaultConf);
  jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
  FileInputFormat.setInputPaths(jobConf, workDir);

  MRInputUserPayloadProto.Builder builder = MRInputUserPayloadProto.newBuilder();
  builder.setInputFormatName(SequenceFileInputFormat.class.getName());
  builder.setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf));
  byte[] payload = builder.build().toByteArray();

  TezInputContext inputContext = createTezInputContext(payload);

  MultiMRInput input = new MultiMRInput();
  input.setNumPhysicalInputs(2);
  input.initialize(inputContext);
  List<Event> eventList = new ArrayList<Event>();

  LinkedHashMap<LongWritable, Text> data = new LinkedHashMap<LongWritable, Text>();

  String file1 = "file1";
  LinkedHashMap<LongWritable, Text> data1 = createInputData(localFs, workDir, jobConf, file1, 0,
      10);

  String file2 = "file2";
  LinkedHashMap<LongWritable, Text> data2 = createInputData(localFs, workDir, jobConf, file2, 10,
      20);

  data.putAll(data1);
  data.putAll(data2);

  SequenceFileInputFormat<LongWritable, Text> format =
      new SequenceFileInputFormat<LongWritable, Text>();
  InputSplit[] splits = format.getSplits(jobConf, 2);
  assertEquals(2, splits.length);

  MRSplitProto splitProto1 = MRHelpers.createSplitProto(splits[0]);
  RootInputDataInformationEvent event1 = new RootInputDataInformationEvent(0,
      splitProto1.toByteArray());

  MRSplitProto splitProto2 = MRHelpers.createSplitProto(splits[1]);
  RootInputDataInformationEvent event2 = new RootInputDataInformationEvent(0,
      splitProto2.toByteArray());

  eventList.clear();
  eventList.add(event1);
  eventList.add(event2);
  input.handleEvents(eventList);

  int readerCount = 0;
  for (KeyValueReader reader : input.getKeyValueReaders()) {
    readerCount++;
    while (reader.next()) {
      if (data.size() == 0) {
        fail("Found more records than expected");
      }
      Object key = reader.getCurrentKey();
      Object val = reader.getCurrentValue();
      assertEquals(val, data.remove(key));
    }
  }
  assertEquals(2, readerCount);
}
 
Example 3
Source File: TestMultiMRInput.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 5000)
public void testExtraEvents() throws Exception {
  Path workDir = new Path(TEST_ROOT_DIR, "testExtraEvents");
  JobConf jobConf = new JobConf(defaultConf);
  jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
  FileInputFormat.setInputPaths(jobConf, workDir);

  MRInputUserPayloadProto.Builder builder = MRInputUserPayloadProto.newBuilder();
  builder.setInputFormatName(SequenceFileInputFormat.class.getName());
  builder.setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf));
  byte[] payload = builder.build().toByteArray();

  TezInputContext inputContext = createTezInputContext(payload);

  MultiMRInput input = new MultiMRInput();
  input.setNumPhysicalInputs(1);
  input.initialize(inputContext);
  List<Event> eventList = new ArrayList<Event>();

  String file1 = "file1";
  createInputData(localFs, workDir, jobConf, file1, 0, 10);
  SequenceFileInputFormat<LongWritable, Text> format =
      new SequenceFileInputFormat<LongWritable, Text>();
  InputSplit[] splits = format.getSplits(jobConf, 1);
  assertEquals(1, splits.length);

  MRSplitProto splitProto = MRHelpers.createSplitProto(splits[0]);
  RootInputDataInformationEvent event1 = new RootInputDataInformationEvent(0,
      splitProto.toByteArray());
  RootInputDataInformationEvent event2 = new RootInputDataInformationEvent(1,
      splitProto.toByteArray());

  eventList.clear();
  eventList.add(event1);
  eventList.add(event2);
  try {
    input.handleEvents(eventList);
    fail("Expecting Exception due to too many events");
  } catch (Exception e) {
    assertTrue(e.getMessage().contains(
        "Unexpected event. All physical sources already initialized"));
  }
}
 
Example 4
Source File: TestMRInputSplitDistributor.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Test
public void testSerializedPayload() throws IOException {

  Configuration conf = new Configuration(false);
  conf.setBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, true);
  ByteString confByteString = MRHelpers.createByteStringFromConf(conf);
  InputSplit split1 = new InputSplitForTest(1);
  InputSplit split2 = new InputSplitForTest(2);
  MRSplitProto proto1 = MRHelpers.createSplitProto(split1);
  MRSplitProto proto2 = MRHelpers.createSplitProto(split2);
  MRSplitsProto.Builder splitsProtoBuilder = MRSplitsProto.newBuilder();
  splitsProtoBuilder.addSplits(proto1);
  splitsProtoBuilder.addSplits(proto2);
  MRInputUserPayloadProto.Builder payloadProto = MRInputUserPayloadProto.newBuilder();
  payloadProto.setSplits(splitsProtoBuilder.build());
  payloadProto.setConfigurationBytes(confByteString);
  byte[] userPayload = payloadProto.build().toByteArray();

  TezRootInputInitializerContext context = new TezRootInputInitializerContextForTest(userPayload);
  MRInputSplitDistributor splitDist = new MRInputSplitDistributor();

  List<Event> events = splitDist.initialize(context);

  assertEquals(3, events.size());
  assertTrue(events.get(0) instanceof RootInputUpdatePayloadEvent);
  assertTrue(events.get(1) instanceof RootInputDataInformationEvent);
  assertTrue(events.get(2) instanceof RootInputDataInformationEvent);

  RootInputDataInformationEvent diEvent1 = (RootInputDataInformationEvent) (events.get(1));
  RootInputDataInformationEvent diEvent2 = (RootInputDataInformationEvent) (events.get(2));

  assertNull(diEvent1.getDeserializedUserPayload());
  assertNull(diEvent2.getDeserializedUserPayload());

  assertNotNull(diEvent1.getUserPayload());
  assertNotNull(diEvent2.getUserPayload());

  MRSplitProto event1Proto = MRSplitProto.parseFrom(diEvent1.getUserPayload());
  InputSplit is1 = MRInputUtils.getOldSplitDetailsFromEvent(event1Proto, new Configuration());
  assertTrue(is1 instanceof InputSplitForTest);
  assertEquals(1, ((InputSplitForTest) is1).identifier);

  MRSplitProto event2Proto = MRSplitProto.parseFrom(diEvent2.getUserPayload());
  InputSplit is2 = MRInputUtils.getOldSplitDetailsFromEvent(event2Proto, new Configuration());
  assertTrue(is2 instanceof InputSplitForTest);
  assertEquals(2, ((InputSplitForTest) is2).identifier);
}
 
Example 5
Source File: TestMRInputSplitDistributor.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Test
public void testDeserializedPayload() throws IOException {

  Configuration conf = new Configuration(false);
  conf.setBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, false);
  ByteString confByteString = MRHelpers.createByteStringFromConf(conf);
  InputSplit split1 = new InputSplitForTest(1);
  InputSplit split2 = new InputSplitForTest(2);
  MRSplitProto proto1 = MRHelpers.createSplitProto(split1);
  MRSplitProto proto2 = MRHelpers.createSplitProto(split2);
  MRSplitsProto.Builder splitsProtoBuilder = MRSplitsProto.newBuilder();
  splitsProtoBuilder.addSplits(proto1);
  splitsProtoBuilder.addSplits(proto2);
  MRInputUserPayloadProto.Builder payloadProto = MRInputUserPayloadProto.newBuilder();
  payloadProto.setSplits(splitsProtoBuilder.build());
  payloadProto.setConfigurationBytes(confByteString);
  byte[] userPayload = payloadProto.build().toByteArray();

  TezRootInputInitializerContext context = new TezRootInputInitializerContextForTest(userPayload);
  MRInputSplitDistributor splitDist = new MRInputSplitDistributor();

  List<Event> events = splitDist.initialize(context);

  assertEquals(3, events.size());
  assertTrue(events.get(0) instanceof RootInputUpdatePayloadEvent);
  assertTrue(events.get(1) instanceof RootInputDataInformationEvent);
  assertTrue(events.get(2) instanceof RootInputDataInformationEvent);

  RootInputDataInformationEvent diEvent1 = (RootInputDataInformationEvent) (events.get(1));
  RootInputDataInformationEvent diEvent2 = (RootInputDataInformationEvent) (events.get(2));

  assertNull(diEvent1.getUserPayload());
  assertNull(diEvent2.getUserPayload());

  assertNotNull(diEvent1.getDeserializedUserPayload());
  assertNotNull(diEvent2.getDeserializedUserPayload());

  assertTrue(diEvent1.getDeserializedUserPayload() instanceof InputSplitForTest);
  assertEquals(1, ((InputSplitForTest) diEvent1.getDeserializedUserPayload()).identifier);

  assertTrue(diEvent2.getDeserializedUserPayload() instanceof InputSplitForTest);
  assertEquals(2, ((InputSplitForTest) diEvent2.getDeserializedUserPayload()).identifier);
}