org.apache.tez.runtime.library.api.TezRuntimeConfiguration Java Examples

The following examples show how to use org.apache.tez.runtime.library.api.TezRuntimeConfiguration. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UnorderedKVOutputConfig.java    From tez with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public Builder setAdditionalConfiguration(String key, String value) {
  Objects.requireNonNull(key, "Key cannot be null");
  if (ConfigUtils.doesKeyQualify(key,
      Lists.newArrayList(UnorderedKVOutput.getConfigurationKeySet(),
          TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()),
      TezRuntimeConfiguration.getAllowedPrefixes())) {
    if (value == null) {
      this.conf.unset(key);
    } else {
      this.conf.set(key, value);
    }
  }
  return this;
}
 
Example #2
Source File: StandaloneHiveServerContext.java    From HiveRunner with Apache License 2.0 6 votes vote down vote up
protected void configureTezExecutionEngine(HiveConf conf) {
    /*
    Tez local mode settings
     */
    conf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true);
    conf.set("fs.defaultFS", "file:///");
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true);

    /*
    Set to be able to run tests offline
     */
    conf.set(TezConfiguration.TEZ_AM_DISABLE_CLIENT_VERSION_CHECK, "true");

    /*
    General attempts to strip of unnecessary functionality to speed up test execution and increase stability
     */
    conf.set(TezConfiguration.TEZ_AM_USE_CONCURRENT_DISPATCHER, "false");
    conf.set(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, "false");
    conf.set(TezConfiguration.DAG_RECOVERY_ENABLED, "false");
    conf.set(TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX, "1");
    conf.set(TezConfiguration.TEZ_AM_WEBSERVICE_ENABLE, "false");
    conf.set(TezConfiguration.DAG_RECOVERY_ENABLED, "false");
    conf.set(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, "false");
}
 
Example #3
Source File: TezDagBuilder.java    From spork with Apache License 2.0 6 votes vote down vote up
void selectOutputComparator(byte keyType, Configuration conf, TezOperator tezOp)
        throws JobCreationException {
    // TODO: Handle sorting like in JobControlCompiler
    // TODO: Group comparators as in JobControlCompiler
    if (tezOp != null && tezOp.isUseSecondaryKey()) {
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS,
                PigSecondaryKeyComparator.class.getName());
        setGroupingComparator(conf, PigSecondaryKeyGroupComparator.class.getName());
    } else {
        if (tezOp != null && tezOp.isSkewedJoin()) {
            // TODO: PigGroupingPartitionWritableComparator only used as Group comparator in MR.
            // What should be TEZ_RUNTIME_KEY_COMPARATOR_CLASS if same as MR?
            conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS,
                    PigGroupingPartitionWritableComparator.class.getName());
            setGroupingComparator(conf, PigGroupingPartitionWritableComparator.class.getName());
        } else {
            boolean hasOrderby = hasOrderby(tezOp);
            conf.setClass(
                    TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS,
                    comparatorForKeyType(keyType, hasOrderby), RawComparator.class);
            if (!hasOrderby) {
                setGroupingComparator(conf, getGroupingComparatorForKeyType(keyType).getName());
            }
        }
    }
}
 
Example #4
Source File: TestPipelinedShuffle.java    From tez with Apache License 2.0 6 votes vote down vote up
@Before
public void setupTezCluster() throws Exception {
  //With 1 MB sort buffer and with good amount of dataset, it would spill records
  conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1);

  //Enable PipelinedShuffle
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, true);

  //Enable local fetch
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true);

  // 3 seconds should be good enough in local machine
  conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, 3 * 1000);
  conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 3 * 1000);
  //set to low value so that it can detect failures quickly
  conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT, 2);

  conf.setLong(TezConfiguration.TEZ_AM_SLEEP_TIME_BEFORE_EXIT_MILLIS, 500);

  miniTezCluster = new MiniTezCluster(TestPipelinedShuffle.class.getName(), 1, 1, 1);

  miniTezCluster.init(conf);
  miniTezCluster.start();
}
 
Example #5
Source File: TestShuffleScheduler.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test(timeout=5000)
public void testUseSharedExecutor() throws Exception {
  InputContext inputContext = createTezInputContext();
  Configuration conf = new TezConfiguration();
  int numInputs = 10;
  Shuffle shuffle = mock(Shuffle.class);
  MergeManager mergeManager = mock(MergeManager.class);

  ShuffleSchedulerForTest scheduler = new ShuffleSchedulerForTest(inputContext, conf, numInputs,
      shuffle, mergeManager, mergeManager, System.currentTimeMillis(), null, false, 0, "srcName");
  verify(inputContext, times(0)).createTezFrameworkExecutorService(anyInt(), anyString());
  scheduler.close();

  inputContext = createTezInputContext();
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCHER_USE_SHARED_POOL, true);
  scheduler = new ShuffleSchedulerForTest(inputContext, conf, numInputs, shuffle, mergeManager,
      mergeManager, System.currentTimeMillis(), null, false, 0, "srcName");
  verify(inputContext).createTezFrameworkExecutorService(anyInt(), anyString());
  scheduler.close();
}
 
Example #6
Source File: ConfigUtils.java    From tez with Apache License 2.0 6 votes vote down vote up
public static Class<? extends CompressionCodec> getIntermediateInputCompressorClass(
    Configuration conf, Class<DefaultCodec> defaultValue) {
  Class<? extends CompressionCodec> codecClass = defaultValue;
  String name = conf
      .get(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC);
  if (name != null) {
    try {
      codecClass = conf.getClassByName(name).asSubclass(
          CompressionCodec.class);
    } catch (ClassNotFoundException e) {
      throw new IllegalArgumentException("Compression codec " + name
          + " was not found.", e);
    }
  }
  return codecClass;
}
 
Example #7
Source File: TestOnFileSortedOutput.java    From tez with Apache License 2.0 6 votes vote down vote up
@Before
public void setup() throws Exception {
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_SORTER_CLASS, sorterImpl.name());
  conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SORTER_SORT_THREADS, sorterThreads);
  conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 5);

  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, Text.class.getName());
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS,
      HashPartitioner.class.getName());

  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED,
      sendEmptyPartitionViaEvent);
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_REPORT_PARTITION_STATS,
      reportPartitionStats.getType());
  outputSize.set(0);
  numRecords.set(0);
  fs.mkdirs(workingDir);
  this.partitions = Math.max(1, rnd.nextInt(10));
}
 
Example #8
Source File: TestUnorderedKVOutputConfig.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 5000)
public void testDefaultConfigsUsed() {
  UnorderedKVOutputConfig.Builder builder =
      UnorderedKVOutputConfig
          .newBuilder("KEY", "VALUE");
  UnorderedKVOutputConfig configuration = builder.build();

  UnorderedKVOutputConfig rebuilt =
      new UnorderedKVOutputConfig();
  rebuilt.fromUserPayload(configuration.toUserPayload());

  Configuration conf = rebuilt.conf;

  assertEquals(true, conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD,
      TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT));

  // Default property present.
  assertEquals("TestCodec",
      conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC, ""));

  // Verify whatever was configured
  assertEquals("KEY", conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, ""));
  assertEquals("VALUE", conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, ""));
}
 
Example #9
Source File: ExternalSorter.java    From tez with Apache License 2.0 6 votes vote down vote up
public static long getInitialMemoryRequirement(Configuration conf, long maxAvailableTaskMemory) {
  int initialMemRequestMb = 
      conf.getInt(
          TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 
          TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB_DEFAULT);
  long reqBytes = ((long) initialMemRequestMb) << 20;
  //Higher bound checks are done in individual sorter implementations
  Preconditions.checkArgument(initialMemRequestMb > 0 && reqBytes < maxAvailableTaskMemory,
      TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB + " " + initialMemRequestMb + " should be "
          + "larger than 0 and should be less than the available task memory (MB):" +
          (maxAvailableTaskMemory >> 20));
  if (LOG.isDebugEnabled()) {
    LOG.debug("Requested SortBufferSize ("
        + TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB + "): "
        + initialMemRequestMb);
  }
  return reqBytes;
}
 
Example #10
Source File: TestOrderedPartitionedKVEdgeConfig.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test (timeout=2000)
public void testHistoryText() {
  OrderedPartitionedKVEdgeConfig.Builder builder =
      OrderedPartitionedKVEdgeConfig.newBuilder("KEY", "VALUE", "PARTITIONER");
  Configuration fromConf = new Configuration(false);
  fromConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
      true);
  builder.setFromConfiguration(fromConf);

  OrderedPartitionedKVEdgeConfig kvEdgeConfig = builder.build();
  checkHistoryText(kvEdgeConfig.getInputHistoryText());
  checkHistoryText(kvEdgeConfig.getOutputHistoryText());

  EdgeProperty defaultEdgeProperty = builder.build().createDefaultEdgeProperty();
  checkHistoryText(defaultEdgeProperty.getEdgeDestination().getHistoryText());
  checkHistoryText(defaultEdgeProperty.getEdgeSource().getHistoryText());

  EdgeManagerPluginDescriptor descriptor = mock(EdgeManagerPluginDescriptor.class);
  EdgeProperty edgeProperty = builder.build().createDefaultCustomEdgeProperty(descriptor);
  checkHistoryText(edgeProperty.getEdgeDestination().getHistoryText());
  checkHistoryText(edgeProperty.getEdgeSource().getHistoryText());
}
 
Example #11
Source File: TestOnFileSortedOutput.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test
public void testPipelinedSettingsWithDefaultSorter() throws Exception {
  conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 3);
  //negative. with default sorter
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_SORTER_CLASS, SorterImpl.LEGACY.name());
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, true);

  OutputContext context = createTezOutputContext();
  UserPayload payLoad = TezUtils.createUserPayloadFromConf(conf);
  doReturn(payLoad).when(context).getUserPayload();
  sortedOutput = new OrderedPartitionedKVOutput(context, partitions);

  sortedOutput.initialize();
  try {
    sortedOutput.start();
    fail("Should have thrown illegal argument exception as pipelining is enabled with "
        + "DefaultSorter");
  } catch(IllegalArgumentException ie) {
    assertTrue(ie.getMessage().contains("works with PipelinedSorter"));
  }

}
 
Example #12
Source File: TestUnorderedPartitionedKVWriter.java    From tez with Apache License 2.0 6 votes vote down vote up
private Configuration createConfiguration(OutputContext outputContext,
    Class<? extends Writable> keyClass, Class<? extends Writable> valClass,
    boolean shouldCompress, int maxSingleBufferSizeBytes,
    Class<? extends Partitioner> partitionerClass) {
  Configuration conf = new Configuration(false);
  conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077");
  conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, outputContext.getWorkDirs());
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, keyClass.getName());
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, valClass.getName());
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, partitionerClass.getName());
  if (maxSingleBufferSizeBytes >= 0) {
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_MAX_PER_BUFFER_SIZE_BYTES,
        maxSingleBufferSizeBytes);
  }
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, shouldCompress);
  if (shouldCompress) {
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC,
        DefaultCodec.class.getName());
  }
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_REPORT_PARTITION_STATS,
      reportPartitionStats.getType());
  return conf;
}
 
Example #13
Source File: TestDefaultSorter.java    From tez with Apache License 2.0 6 votes vote down vote up
@Before
public void setup() throws IOException {
  conf = new Configuration();
  conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077");
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_SORTER_CLASS, SorterImpl.LEGACY.name()); // DefaultSorter
  conf.set("fs.defaultFS", "file:///");
  localFs = FileSystem.getLocal(conf);

  workingDir = new Path(
      new Path(System.getProperty("test.build.data", "/tmp")),
      TestDefaultSorter.class.getName())
      .makeQualified(localFs.getUri(), localFs.getWorkingDirectory());
  String localDirs = workingDir.toString();
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, Text.class.getName());
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS,
      HashPartitioner.class.getName());
  conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDirs);
  dirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
}
 
Example #14
Source File: OrderedPartitionedKVOutputConfig.java    From tez with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public Builder setAdditionalConfiguration(String key, String value) {
  Objects.requireNonNull(key, "Key cannot be null");
  if (ConfigUtils.doesKeyQualify(key,
      Lists.newArrayList(OrderedPartitionedKVOutput.getConfigurationKeySet(),
          TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()),
      TezRuntimeConfiguration.getAllowedPrefixes())) {
    if (value == null) {
      this.conf.unset(key);
    } else {
      this.conf.set(key, value);
    }
  }
  return this;
}
 
Example #15
Source File: TestDefaultSorter.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 30000)
public void testWithEmptyDataWithFinalMergeDisabled() throws IOException {
  OutputContext context = createTezOutputContext();

  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
  conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1);
  MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler();
  context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf,
      context.getTotalMemoryAvailableToTask()), handler);
  DefaultSorter sorter = new DefaultSorter(context, conf, 5, handler.getMemoryAssigned());

  //no data written. Empty
  try {
    sorter.flush();
    sorter.close();
    assertTrue(sorter.isClosed());
    assertTrue(sorter.getFinalOutputFile().getParent().getName().equalsIgnoreCase(UniqueID +
        "_0"));
    verifyCounters(sorter, context);
  } catch(Exception e) {
    fail();
  }
}
 
Example #16
Source File: OrderedPartitionedKVOutputConfig.java    From tez with Apache License 2.0 6 votes vote down vote up
/**
 * Set serialization class and the relevant comparator to be used for sorting.
 * Providing custom serialization class could change the way, keys needs to be compared in
 * sorting. Providing invalid comparator here could create invalid results.
 *
 * @param serializationClassName
 * @param comparatorClassName
 * @param serializerConf         the serializer configuration. This can be null, and is a
 *                               {@link java.util.Map} of key-value pairs. The keys should be limited
 *                               to the ones required by the comparator.
 * @return this object for further chained method calls
 */
public Builder setKeySerializationClass(String serializationClassName,
    String comparatorClassName, @Nullable Map<String, String> serializerConf) {
  Preconditions.checkArgument(serializationClassName != null,
      "serializationClassName cannot be null");
  Preconditions.checkArgument(comparatorClassName != null,
      "comparator cannot be null");
  this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + ","
      + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY));
  setKeyComparatorClass(comparatorClassName, null);
  if (serializerConf != null) {
    // Merging the confs for now. Change to be specific in the future.
    ConfigUtils.mergeConfsWithExclusions(this.conf, serializerConf,
        TezRuntimeConfiguration.getRuntimeConfigKeySet());
  }
  return this;
}
 
Example #17
Source File: TestMRHelpers.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 5000)
public void testTranslateMRConfToTez() {
  Configuration conf = new Configuration(false);
  conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1000);
  conf.setLong(org.apache.tez.mapreduce.hadoop.MRJobConfig.IO_SORT_MB, 500);

  Configuration conf1 = new Configuration(conf);
  MRHelpers.translateMRConfToTez(conf1);
  Assert.assertNull(conf1.get(org.apache.tez.mapreduce.hadoop.MRJobConfig.IO_SORT_MB));
  Assert.assertEquals(1000, conf1.getLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 0));

  Configuration conf2 = new Configuration(conf);
  MRHelpers.translateMRConfToTez(conf2, true);
  Assert.assertNull(conf2.get(org.apache.tez.mapreduce.hadoop.MRJobConfig.IO_SORT_MB));
  Assert.assertEquals(1000, conf2.getLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 0));

  Configuration conf3 = new Configuration(conf);
  MRHelpers.translateMRConfToTez(conf3, false);
  Assert.assertNull(conf3.get(org.apache.tez.mapreduce.hadoop.MRJobConfig.IO_SORT_MB));
  Assert.assertEquals(500, conf3.getLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 0));
}
 
Example #18
Source File: TestPipelinedSorter.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test
public void testWithPipelinedShuffle() throws IOException {
  this.numOutputs = 1;
  this.initialAvailableMem = 5 *1024 * 1024;
  conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 5);
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
  conf.setInt(TezRuntimeConfiguration
      .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, 1);
  PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs,
      initialAvailableMem);

  //Write 100 keys each of size 10
  writeData(sorter, 10000, 100, false);
  sorter.flush();
  List<Event> events = sorter.close();

  //final merge is disabled. Final output file would not be populated in this case.
  assertTrue(sorter.finalOutputFile == null);
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true);
  verify(outputContext, times(0)).sendEvents(any());
  assertTrue(events.size() > 0);
}
 
Example #19
Source File: OrderedGroupedKVInputConfig.java    From tez with Apache License 2.0 6 votes vote down vote up
/**
 * Set serialization class and the relevant comparator to be used for sorting.
 * Providing custom serialization class could change the way, keys needs to be compared in
 * sorting. Providing invalid comparator here could create invalid results.
 *
 * @param serializationClassName
 * @param comparatorClassName
 * @param serializerConf         the serializer configuration. This can be null, and is a
 *                               {@link java.util.Map} of key-value pairs. The keys should be limited
 *                               to the ones required by the comparator.
 * @return this object for further chained method calls
 */
public Builder setKeySerializationClass(String serializationClassName,
    String comparatorClassName, @Nullable Map<String, String> serializerConf) {
  Preconditions.checkArgument(serializationClassName != null,
      "serializationClassName cannot be null");
  Preconditions.checkArgument(comparatorClassName != null,
      "comparator cannot be null");
  this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + ","
      + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY));
  setKeyComparatorClass(comparatorClassName, null);
  if (serializerConf != null) {
    // Merging the confs for now. Change to be specific in the future.
    ConfigUtils.mergeConfsWithExclusions(this.conf, serializerConf,
        TezRuntimeConfiguration.getRuntimeConfigKeySet());
  }
  return this;
}
 
Example #20
Source File: TestPipelinedSorter.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test
public void test_TEZ_2602_50mb() throws IOException {
  this.numOutputs = 1;
  this.initialAvailableMem = 1 *1024 * 1024;
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true);
  conf.setInt(TezRuntimeConfiguration
      .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, 1);
  PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs,
      initialAvailableMem);

  Text value = new Text("1");
  long size = 50 * 1024 * 1024;
  while(size > 0) {
    Text key = RandomTextGenerator.generateSentence();
    sorter.write(key, value);
    size -= key.getLength();
  }

  sorter.flush();
  sorter.close();
  verifyOutputPermissions(outputContext.getUniqueIdentifier());
}
 
Example #21
Source File: OrderedPartitionedKVOutputConfig.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * Set serialization class responsible for providing serializer/deserializer for values.
 *
 * @param serializationClassName
 * @param serializerConf         the serializer configuration. This can be null, and is a
 *                               {@link java.util.Map} of key-value pairs. The keys should be limited
 *                               to the ones required by the comparator.
 * @return this object for further chained method calls
 */
public Builder setValueSerializationClass(String serializationClassName,
                                          @Nullable Map<String, String> serializerConf) {
  Preconditions.checkArgument(serializationClassName != null,
      "serializationClassName cannot be null");
  this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + ","
      + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY));
  if (serializerConf != null) {
    // Merging the confs for now. Change to be specific in the future.
    ConfigUtils.mergeConfsWithExclusions(this.conf, serializerConf,
        TezRuntimeConfiguration.getRuntimeConfigKeySet());
  }
  return this;
}
 
Example #22
Source File: TestPipelinedSorter.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test
public void testCountersWithMultiplePartitions() throws IOException {
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true);
  this.numOutputs = 5;
  this.initialAvailableMem = 5 * 1024 * 1024;
  conf.setInt(TezRuntimeConfiguration
      .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, 1);
  PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs,
      initialAvailableMem);

  writeData(sorter, 10000, 100);
  verifyCounters(sorter, outputContext);
  verifyOutputPermissions(outputContext.getUniqueIdentifier());
}
 
Example #23
Source File: TestUnorderedPartitionedKVEdgeConfig.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test (timeout=2000)
public void testDefaultConfigsUsed() {
  UnorderedPartitionedKVEdgeConfig.Builder builder =
      UnorderedPartitionedKVEdgeConfig.newBuilder("KEY", "VALUE", "PARTITIONER");
  builder.setKeySerializationClass("SerClass1", null);
  builder.setValueSerializationClass("SerClass2", null);

  UnorderedPartitionedKVEdgeConfig configuration = builder.build();

  UnorderedPartitionedKVOutputConfig rebuiltOutput =
      new UnorderedPartitionedKVOutputConfig();
  rebuiltOutput.fromUserPayload(configuration.getOutputPayload());
  UnorderedKVInputConfig rebuiltInput =
      new UnorderedKVInputConfig();
  rebuiltInput.fromUserPayload(configuration.getInputPayload());

  Configuration outputConf = rebuiltOutput.conf;
  assertEquals(true, outputConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD,
      TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT));
  assertEquals("TestCodec",
      outputConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC, ""));
  assertTrue(outputConf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY).startsWith
      ("SerClass2,SerClass1"));

  Configuration inputConf = rebuiltInput.conf;
  assertEquals(true, inputConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD,
      TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT));
  assertEquals("TestCodec",
      inputConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC, ""));
  assertTrue(inputConf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY).startsWith
      ("SerClass2,SerClass1"));
}
 
Example #24
Source File: TestUnorderedKVEdgeConfig.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test (timeout=2000)
public void testDefaultConfigsUsed() {
  UnorderedKVEdgeConfig.Builder builder =
      UnorderedKVEdgeConfig.newBuilder("KEY", "VALUE");
  builder.setKeySerializationClass("SerClass1", null).setValueSerializationClass("SerClass2", null);

  UnorderedKVEdgeConfig configuration = builder.build();


  UnorderedKVOutputConfig rebuiltOutput =
      new UnorderedKVOutputConfig();
  rebuiltOutput.fromUserPayload(configuration.getOutputPayload());
  UnorderedKVInputConfig rebuiltInput =
      new UnorderedKVInputConfig();
  rebuiltInput.fromUserPayload(configuration.getInputPayload());

  Configuration outputConf = rebuiltOutput.conf;
  assertEquals(true, outputConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD,
      TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT));
  assertEquals("TestCodec",
      outputConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC, ""));
  assertTrue(outputConf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY).startsWith
      ("SerClass2,SerClass1"));

  Configuration inputConf = rebuiltInput.conf;
  assertEquals(true, inputConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD,
      TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT));
  assertEquals("TestCodec",
      inputConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC, ""));
  assertTrue(inputConf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY).startsWith
      ("SerClass2,SerClass1"));
}
 
Example #25
Source File: TestSecureShuffle.java    From tez with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void setupDFSCluster() throws Exception {
  conf = new Configuration();
  conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_EDITS_NOEDITLOGCHANNELFLUSH, false);
  EditLogFileOutputStream.setShouldSkipFsyncForTesting(true);
  conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR);
  miniDFSCluster =
      new MiniDFSCluster.Builder(conf).numDataNodes(1).format(true).build();
  fs = miniDFSCluster.getFileSystem();
  conf.set("fs.defaultFS", fs.getUri().toString());
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, false);
}
 
Example #26
Source File: TestSecureShuffle.java    From tez with Apache License 2.0 5 votes vote down vote up
@Before
public void setupTezCluster() throws Exception {
  if (enableSSLInCluster) {
    // Enable SSL debugging
    System.setProperty("javax.net.debug", "all");
    setupKeyStores();
  }
  conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, enableSSLInCluster);

  // 3 seconds should be good enough in local machine
  conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, 3 * 1000);
  conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 3 * 1000);
  //set to low value so that it can detect failures quickly
  conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT, 2);

  conf.setLong(TezConfiguration.TEZ_AM_SLEEP_TIME_BEFORE_EXIT_MILLIS, 500);

  String sslConf = conf.get(SSL_CLIENT_CONF_KEY, "ssl-client.xml");
  conf.addResource(sslConf);

  miniTezCluster = new MiniTezCluster(TestSecureShuffle.class.getName() + "-" +
      (enableSSLInCluster ? "withssl" : "withoutssl"), 1, 1, 1);

  miniTezCluster.init(conf);
  miniTezCluster.start();
  createSampleFile(inputLoc);
}
 
Example #27
Source File: TestSecureShuffle.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * Verify whether shuffle works in mini cluster
 *
 * @throws Exception
 */
@Test(timeout = 500000)
public void testSecureShuffle() throws Exception {
  //With tez-ssl setting
  miniTezCluster.getConfig().setBoolean(
      TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL, true);
  baseTest(this.resultWithTezSSL);

  //Without tez-ssl setting
  miniTezCluster.getConfig().setBoolean(
      TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL, false);
  baseTest(this.resultWithoutTezSSL);
}
 
Example #28
Source File: UnorderedPartitionedKVWriter.java    From tez with Apache License 2.0 5 votes vote down vote up
public static long getInitialMemoryRequirement(Configuration conf, long maxAvailableTaskMemory) {
  long initialMemRequestMb = conf.getInt(
      TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB,
      TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB_DEFAULT);
  Preconditions.checkArgument(initialMemRequestMb != 0,
      TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB + " should be larger than 0");
  long reqBytes = initialMemRequestMb << 20;
  LOG.info("Requested BufferSize (" + TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB
      + ") : " + initialMemRequestMb);
  return reqBytes;
}
 
Example #29
Source File: TestUnorderedKVOutput2.java    From tez with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() throws IOException {
  conf = new Configuration();
  localFs = FileSystem.getLocal(conf);
  workingDir = new Path(System.getProperty("test.build.data",
      System.getProperty("java.io.tmpdir", "/tmp")),
      TestUnorderedKVOutput2.class.getName()).makeQualified(
          localFs.getUri(), localFs.getWorkingDirectory());
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, Text.class.getName());
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS,
      HashPartitioner.class.getName());
  conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, workingDir.toString());
}
 
Example #30
Source File: UnorderedKVInputConfig.java    From tez with Apache License 2.0 5 votes vote down vote up
public Builder setCompression(boolean enabled, @Nullable String compressionCodec,
                              @Nullable Map<String, String> codecConf) {
  this.conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, enabled);
  if (enabled && compressionCodec != null) {
    this.conf
        .set(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC, compressionCodec);
  }
  if (codecConf != null) {
    // Merging the confs for now. Change to be specific in the future.
    ConfigUtils.mergeConfsWithExclusions(this.conf, codecConf,
        TezRuntimeConfiguration.getRuntimeConfigKeySet());
  }
  return this;
}