Java Code Examples for org.apache.tez.dag.api.TezConfiguration#TEZ_AM_STAGING_DIR

The following examples show how to use org.apache.tez.dag.api.TezConfiguration#TEZ_AM_STAGING_DIR . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestTezJobs.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 60000)
public void testIntersectExample() throws Exception {
  IntersectExample intersectExample = new IntersectExample();
  intersectExample.setConf(new Configuration(mrrTezCluster.getConfig()));
  Path stagingDirPath = new Path("/tmp/tez-staging-dir");
  Path inPath1 = new Path("/tmp/inPath1");
  Path inPath2 = new Path("/tmp/inPath2");
  Path outPath = new Path("/tmp/outPath");
  remoteFs.mkdirs(inPath1);
  remoteFs.mkdirs(inPath2);
  remoteFs.mkdirs(stagingDirPath);

  Set<String> expectedResult = new HashSet<String>();

  FSDataOutputStream out1 = remoteFs.create(new Path(inPath1, "file"));
  FSDataOutputStream out2 = remoteFs.create(new Path(inPath2, "file"));
  BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1));
  BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2));
  for (int i = 0; i < 20; i++) {
    String term = "term" + i;
    writer1.write(term);
    writer1.newLine();
    if (i % 2 == 0) {
      writer2.write(term);
      writer2.newLine();
      expectedResult.add(term);
    }
  }
  writer1.close();
  writer2.close();
  out1.close();
  out2.close();

  String[] args = new String[] {
      "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(),
      inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
  assertEquals(0, intersectExample.run(args));

  FileStatus[] statuses = remoteFs.listStatus(outPath, new PathFilter() {
    public boolean accept(Path p) {
      String name = p.getName();
      return !name.startsWith("_") && !name.startsWith(".");
    }
  });
  assertEquals(1, statuses.length);
  FSDataInputStream inStream = remoteFs.open(statuses[0].getPath());
  BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
  String line = null;
  while ((line = reader.readLine()) != null) {
    assertTrue(expectedResult.remove(line));
  }
  reader.close();
  inStream.close();
  assertEquals(0, expectedResult.size());
}
 
Example 2
Source File: TestRecovery.java    From tez with Apache License 2.0 4 votes vote down vote up
private void testHashJoinExample(SimpleShutdownCondition shutdownCondition,
    boolean enableAutoParallelism, boolean generateSplitInClient) throws Exception {
  HashJoinExample hashJoinExample = new HashJoinExample();
  TezConfiguration tezConf = new TezConfiguration(miniTezCluster.getConfig());
  tezConf.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, 4);
  tezConf.set(TezConfiguration.TEZ_AM_RECOVERY_SERVICE_CLASS,
      RecoveryServiceWithEventHandlingHook.class.getName());
  tezConf.set(
      RecoveryServiceWithEventHandlingHook.AM_RECOVERY_SERVICE_HOOK_CLASS,
      SimpleRecoveryEventHook.class.getName());
  tezConf.set(SimpleRecoveryEventHook.SIMPLE_SHUTDOWN_CONDITION,
      shutdownCondition.serialize());
  tezConf.setBoolean(
      ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL,
      enableAutoParallelism);
  tezConf.setBoolean(
      RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, false);
  tezConf.setBoolean(
      TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE, false);
  tezConf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,0);
  tezConf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, 0);
  tezConf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_TIMEOUT_KEY,1000);
  tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "INFO;org.apache.tez=DEBUG");

  hashJoinExample.setConf(tezConf);
  Path stagingDirPath = new Path("/tmp/tez-staging-dir");
  Path inPath1 = new Path("/tmp/hashJoin/inPath1");
  Path inPath2 = new Path("/tmp/hashJoin/inPath2");
  Path outPath = new Path("/tmp/hashJoin/outPath");
  remoteFs.delete(outPath, true);
  remoteFs.mkdirs(inPath1);
  remoteFs.mkdirs(inPath2);
  remoteFs.mkdirs(stagingDirPath);

  Set<String> expectedResult = new HashSet<String>();

  FSDataOutputStream out1 = remoteFs.create(new Path(inPath1, "file"));
  FSDataOutputStream out2 = remoteFs.create(new Path(inPath2, "file"));
  BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1));
  BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2));
  for (int i = 0; i < 20; i++) {
    String term = "term" + i;
    writer1.write(term);
    writer1.newLine();
    if (i % 2 == 0) {
      writer2.write(term);
      writer2.newLine();
      expectedResult.add(term);
    }
  }
  writer1.close();
  writer2.close();
  out1.close();
  out2.close();

  String[] args = null;
  if (generateSplitInClient) {
    args = new String[]{
        "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "="
            + stagingDirPath.toString(),
        "-generateSplitInClient",
        inPath1.toString(), inPath2.toString(), "1", outPath.toString()};
  } else {
    args = new String[]{
        "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "="
            + stagingDirPath.toString(),
        inPath1.toString(), inPath2.toString(), "1", outPath.toString()};
  }
  assertEquals(0, hashJoinExample.run(args));

  FileStatus[] statuses = remoteFs.listStatus(outPath, new PathFilter() {
    public boolean accept(Path p) {
      String name = p.getName();
      return !name.startsWith("_") && !name.startsWith(".");
    }
  });
  assertEquals(1, statuses.length);
  FSDataInputStream inStream = remoteFs.open(statuses[0].getPath());
  BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
  String line;
  while ((line = reader.readLine()) != null) {
    assertTrue(expectedResult.remove(line));
  }
  reader.close();
  inStream.close();
  assertEquals(0, expectedResult.size());

  List<HistoryEvent> historyEventsOfAttempt1 = RecoveryParser
      .readRecoveryEvents(tezConf, hashJoinExample.getAppId(), 1);
  HistoryEvent lastEvent = historyEventsOfAttempt1
      .get(historyEventsOfAttempt1.size() - 1);
  assertEquals(shutdownCondition.getEvent().getEventType(),
      lastEvent.getEventType());
  assertTrue(shutdownCondition.match(lastEvent));
}
 
Example 3
Source File: TestTezJobs.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 60000)
public void testHashJoinExample() throws Exception {
  HashJoinExample hashJoinExample = new HashJoinExample();
  hashJoinExample.setConf(new Configuration(mrrTezCluster.getConfig()));
  Path stagingDirPath = new Path("/tmp/tez-staging-dir");
  Path inPath1 = new Path("/tmp/hashJoin/inPath1");
  Path inPath2 = new Path("/tmp/hashJoin/inPath2");
  Path outPath = new Path("/tmp/hashJoin/outPath");
  remoteFs.mkdirs(inPath1);
  remoteFs.mkdirs(inPath2);
  remoteFs.mkdirs(stagingDirPath);

  Set<String> expectedResult = new HashSet<String>();

  FSDataOutputStream out1 = remoteFs.create(new Path(inPath1, "file"));
  FSDataOutputStream out2 = remoteFs.create(new Path(inPath2, "file"));
  BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1));
  BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2));
  for (int i = 0; i < 20; i++) {
    String term = "term" + i;
    writer1.write(term);
    writer1.newLine();
    if (i % 2 == 0) {
      writer2.write(term);
      writer2.newLine();
      expectedResult.add(term);
    }
  }
  writer1.close();
  writer2.close();
  out1.close();
  out2.close();

  String[] args = new String[] {
      "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(),
      "-counter", inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
  assertEquals(0, hashJoinExample.run(args));

  FileStatus[] statuses = remoteFs.listStatus(outPath, new PathFilter() {
    public boolean accept(Path p) {
      String name = p.getName();
      return !name.startsWith("_") && !name.startsWith(".");
    }
  });
  assertEquals(1, statuses.length);
  FSDataInputStream inStream = remoteFs.open(statuses[0].getPath());
  BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
  String line;
  while ((line = reader.readLine()) != null) {
    assertTrue(expectedResult.remove(line));
  }
  reader.close();
  inStream.close();
  assertEquals(0, expectedResult.size());
}
 
Example 4
Source File: TestTezJobs.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 60000)
public void testHashJoinExampleDisableSplitGrouping() throws Exception {
  HashJoinExample hashJoinExample = new HashJoinExample();
  hashJoinExample.setConf(new Configuration(mrrTezCluster.getConfig()));
  Path stagingDirPath = new Path(TEST_ROOT_DIR + "/tmp/tez-staging-dir");
  Path inPath1 = new Path(TEST_ROOT_DIR + "/tmp/hashJoin/inPath1");
  Path inPath2 = new Path(TEST_ROOT_DIR + "/tmp/hashJoin/inPath2");
  Path outPath = new Path(TEST_ROOT_DIR + "/tmp/hashJoin/outPath");
  localFs.delete(outPath, true);
  localFs.mkdirs(inPath1);
  localFs.mkdirs(inPath2);
  localFs.mkdirs(stagingDirPath);

  Set<String> expectedResult = new HashSet<String>();

  FSDataOutputStream out1 = localFs.create(new Path(inPath1, "file"));
  FSDataOutputStream out2 = localFs.create(new Path(inPath2, "file"));
  BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1));
  BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2));
  for (int i = 0; i < 20; i++) {
    String term = "term" + i;
    writer1.write(term);
    writer1.newLine();
    if (i % 2 == 0) {
      writer2.write(term);
      writer2.newLine();
      expectedResult.add(term);
    }
  }
  writer1.close();
  writer2.close();
  out1.close();
  out2.close();

  String[] args = new String[] {
      "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(),
      "-counter", "-local", "-disableSplitGrouping",
      inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
  assertEquals(0, hashJoinExample.run(args));

  FileStatus[] statuses = localFs.listStatus(outPath, new PathFilter() {
    public boolean accept(Path p) {
      String name = p.getName();
      return !name.startsWith("_") && !name.startsWith(".");
    }
  });
  assertEquals(1, statuses.length);
  FSDataInputStream inStream = localFs.open(statuses[0].getPath());
  BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
  String line;
  while ((line = reader.readLine()) != null) {
    assertTrue(expectedResult.remove(line));
  }
  reader.close();
  inStream.close();
  assertEquals(0, expectedResult.size());
}
 
Example 5
Source File: TestTezJobs.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 60000)
public void testSortMergeJoinExample() throws Exception {
  SortMergeJoinExample sortMergeJoinExample = new SortMergeJoinExample();
  sortMergeJoinExample.setConf(new Configuration(mrrTezCluster.getConfig()));
  Path stagingDirPath = new Path("/tmp/tez-staging-dir");
  Path inPath1 = new Path("/tmp/sortMerge/inPath1");
  Path inPath2 = new Path("/tmp/sortMerge/inPath2");
  Path outPath = new Path("/tmp/sortMerge/outPath");
  remoteFs.mkdirs(inPath1);
  remoteFs.mkdirs(inPath2);
  remoteFs.mkdirs(stagingDirPath);

  Set<String> expectedResult = new HashSet<String>();

  FSDataOutputStream out1 = remoteFs.create(new Path(inPath1, "file"));
  FSDataOutputStream out2 = remoteFs.create(new Path(inPath2, "file"));
  BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1));
  BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2));
  for (int i = 0; i < 20; i++) {
    String term = "term" + i;
    writer1.write(term);
    writer1.newLine();
    if (i % 2 == 0) {
      writer2.write(term);
      writer2.newLine();
      expectedResult.add(term);
    }
  }
  writer1.close();
  writer2.close();
  out1.close();
  out2.close();

  String[] args = new String[] {
      "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(),
      "-D" + TezConfiguration.TEZ_AM_APPLICATION_PRIORITY + "=" + "2",
      "-counter", inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
  assertEquals(0, sortMergeJoinExample.run(args));

  FileStatus[] statuses = remoteFs.listStatus(outPath, new PathFilter() {
    public boolean accept(Path p) {
      String name = p.getName();
      return !name.startsWith("_") && !name.startsWith(".");
    }
  });
  assertEquals(1, statuses.length);
  FSDataInputStream inStream = remoteFs.open(statuses[0].getPath());
  BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
  String line;
  while ((line = reader.readLine()) != null) {
    assertTrue(expectedResult.remove(line));
  }
  reader.close();
  inStream.close();
  assertEquals(0, expectedResult.size());
}
 
Example 6
Source File: TestTezJobs.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 60000)
public void testPerIOCounterAggregation() throws Exception {
  String baseDir = "/tmp/perIOCounterAgg/";
  Path inPath1 = new Path(baseDir + "inPath1");
  Path inPath2 = new Path(baseDir + "inPath2");
  Path outPath = new Path(baseDir + "outPath");
  final Set<String> expectedResults = generateSortMergeJoinInput(inPath1, inPath2);
  Path stagingDirPath = new Path("/tmp/tez-staging-dir");
  remoteFs.mkdirs(stagingDirPath);

  TezConfiguration conf = new TezConfiguration(mrrTezCluster.getConfig());
  conf.setBoolean(TezConfiguration.TEZ_TASK_GENERATE_COUNTERS_PER_IO, true);
  TezClient tezClient = TezClient.create(SortMergeJoinHelper.class.getSimpleName(), conf);
  tezClient.start();

  SortMergeJoinHelper sortMergeJoinHelper = new SortMergeJoinHelper(tezClient);
  sortMergeJoinHelper.setConf(conf);

  String[] args = new String[] {
      "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(),
      "-counter", inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
  assertEquals(0, sortMergeJoinHelper.run(conf, args, tezClient));

  verifySortMergeJoinInput(outPath, expectedResults);

  String joinerVertexName = "joiner";
  String input1Name = "input1";
  String input2Name = "input2";
  String joinOutputName = "joinOutput";
  Set<StatusGetOpts> statusOpts = new HashSet<StatusGetOpts>();
  statusOpts.add(StatusGetOpts.GET_COUNTERS);
  VertexStatus joinerVertexStatus =
      sortMergeJoinHelper.dagClient.getVertexStatus(joinerVertexName, statusOpts);
  final TezCounters joinerCounters = joinerVertexStatus.getVertexCounters();
  final CounterGroup aggregatedGroup = joinerCounters.getGroup(TaskCounter.class.getCanonicalName());
  final CounterGroup input1Group = joinerCounters.getGroup(
      TaskCounter.class.getSimpleName() + "_" + joinerVertexName + "_INPUT_" + input1Name);
  final CounterGroup input2Group = joinerCounters.getGroup(
      TaskCounter.class.getSimpleName() + "_" + joinerVertexName + "_INPUT_" + input2Name);
  assertTrue("aggregated counter group cannot be empty", aggregatedGroup.size() > 0);
  assertTrue("per io group for input1 cannot be empty", input1Group.size() > 0);
  assertTrue("per io group for input1 cannot be empty", input2Group.size() > 0);

  List<TaskCounter> countersToVerifyAgg = Arrays.asList(
      TaskCounter.ADDITIONAL_SPILLS_BYTES_READ,
      TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN,
      TaskCounter.COMBINE_INPUT_RECORDS,
      TaskCounter.MERGED_MAP_OUTPUTS,
      TaskCounter.NUM_DISK_TO_DISK_MERGES,
      TaskCounter.NUM_FAILED_SHUFFLE_INPUTS,
      TaskCounter.NUM_MEM_TO_DISK_MERGES,
      TaskCounter.NUM_SHUFFLED_INPUTS,
      TaskCounter.NUM_SKIPPED_INPUTS,
      TaskCounter.REDUCE_INPUT_GROUPS,
      TaskCounter.REDUCE_INPUT_RECORDS,
      TaskCounter.SHUFFLE_BYTES,
      TaskCounter.SHUFFLE_BYTES_DECOMPRESSED,
      TaskCounter.SHUFFLE_BYTES_DISK_DIRECT,
      TaskCounter.SHUFFLE_BYTES_TO_DISK,
      TaskCounter.SHUFFLE_BYTES_TO_MEM,
      TaskCounter.SPILLED_RECORDS
  );

  int nonZeroCounters = 0;
  // verify that the sum of the counter values for edges add up to the aggregated counter value.
  for(TaskCounter c : countersToVerifyAgg) {
    TezCounter aggregatedCounter = aggregatedGroup.findCounter(c.name(), false);
    TezCounter input1Counter = input1Group.findCounter(c.name(), false);
    TezCounter input2Counter = input2Group.findCounter(c.name(), false);
    assertNotNull("aggregated counter cannot be null " + c.name(), aggregatedCounter);
    assertNotNull("input1 counter cannot be null " + c.name(), input1Counter);
    assertNotNull("input2 counter cannot be null " + c.name(), input2Counter);

    assertEquals("aggregated counter does not match sum of input counters " + c.name(),
        aggregatedCounter.getValue(), input1Counter.getValue() + input2Counter.getValue());

    if (aggregatedCounter.getValue() > 0) {
      nonZeroCounters++;
    }
  }

  // ensure that at least one of the counters tested above were non-zero.
  assertTrue("At least one of the counter should be non-zero. invalid test ", nonZeroCounters > 0);

  CounterGroup joinerOutputGroup = joinerCounters.getGroup(
      TaskCounter.class.getSimpleName() + "_" + joinerVertexName + "_OUTPUT_" + joinOutputName);
  String outputCounterName = TaskCounter.OUTPUT_RECORDS.name();
  TezCounter aggregateCounter = aggregatedGroup.findCounter(outputCounterName, false);
  TezCounter joinerOutputCounter = joinerOutputGroup.findCounter(outputCounterName, false);
  assertNotNull("aggregated counter cannot be null " + outputCounterName, aggregateCounter);
  assertNotNull("output counter cannot be null " + outputCounterName, joinerOutputCounter);
  assertTrue("counter value is zero. test is invalid", aggregateCounter.getValue() > 0);
  assertEquals("aggregated counter does not match sum of output counters " + outputCounterName,
      aggregateCounter.getValue(), joinerOutputCounter.getValue());
}
 
Example 7
Source File: TestTezJobs.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 60000)
public void testSortMergeJoinExampleDisableSplitGrouping() throws Exception {
  SortMergeJoinExample sortMergeJoinExample = new SortMergeJoinExample();
  sortMergeJoinExample.setConf(new Configuration(mrrTezCluster.getConfig()));
  Path stagingDirPath = new Path(TEST_ROOT_DIR + "/tmp/tez-staging-dir");
  Path inPath1 = new Path(TEST_ROOT_DIR + "/tmp/sortMerge/inPath1");
  Path inPath2 = new Path(TEST_ROOT_DIR + "/tmp/sortMerge/inPath2");
  Path outPath = new Path(TEST_ROOT_DIR + "/tmp/sortMerge/outPath");
  localFs.delete(outPath, true);
  localFs.mkdirs(inPath1);
  localFs.mkdirs(inPath2);
  localFs.mkdirs(stagingDirPath);

  Set<String> expectedResult = new HashSet<String>();

  FSDataOutputStream out1 = localFs.create(new Path(inPath1, "file"));
  FSDataOutputStream out2 = localFs.create(new Path(inPath2, "file"));
  BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1));
  BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2));
  for (int i = 0; i < 20; i++) {
    String term = "term" + i;
    writer1.write(term);
    writer1.newLine();
    if (i % 2 == 0) {
      writer2.write(term);
      writer2.newLine();
      expectedResult.add(term);
    }
  }
  writer1.close();
  writer2.close();
  out1.close();
  out2.close();

  String[] args = new String[] {
      "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(),
      "-counter", "-local","-disableSplitGrouping",
      inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
  assertEquals(0, sortMergeJoinExample.run(args));

  FileStatus[] statuses = localFs.listStatus(outPath, new PathFilter() {
    public boolean accept(Path p) {
      String name = p.getName();
      return !name.startsWith("_") && !name.startsWith(".");
    }
  });
  assertEquals(1, statuses.length);
  FSDataInputStream inStream = localFs.open(statuses[0].getPath());
  BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
  String line;
  while ((line = reader.readLine()) != null) {
    assertTrue(expectedResult.remove(line));
  }
  reader.close();
  inStream.close();
  assertEquals(0, expectedResult.size());
}