Java Code Examples for org.apache.hadoop.conf.Configuration#setIfUnset()

The following examples show how to use org.apache.hadoop.conf.Configuration#setIfUnset() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may want to check out the right sidebar which shows the related API usage.
Example 1
Source Project: ambari-metrics   File: DownSamplerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGetDownsampleMetricPatterns() throws Exception {

  Configuration configuration = new Configuration();
  configuration.setIfUnset("timeline.metrics.downsampler.topn.metric.patterns", "pattern1,pattern2");
  configuration.setIfUnset("timeline.metrics.downsampler.lastvalue.metric.patterns", "pattern3");

  List<String> patterns = DownSamplerUtils.getDownsampleMetricPatterns(configuration);
  Assert.assertEquals(patterns.size(), 3);
  Assert.assertTrue(patterns.contains("pattern1"));
  Assert.assertTrue(patterns.contains("pattern2"));
  Assert.assertTrue(patterns.contains("pattern3"));

  Configuration configuration2 = new Configuration();
  patterns = DownSamplerUtils.getDownsampleMetricPatterns(configuration2);
  Assert.assertEquals(patterns.size(), 0);
}
 
Example 2
Source Project: ambari-metrics   File: DownSamplerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testPrepareEventDownSamplingStatement() throws Exception {
  Configuration configuration = new Configuration();
  configuration.setIfUnset("timeline.metrics.downsampler.event.metric.patterns", "pattern1,pattern2");

  Map<String, String> conf = configuration.getValByRegex(DownSamplerUtils.downSamplerConfigPrefix);

  EventMetricDownSampler eventMetricDownSampler = EventMetricDownSampler.fromConfig(conf);
  List<String> stmts = eventMetricDownSampler.prepareDownSamplingStatement(14000000l, 14100000l, "METRIC_RECORD_UUID");
  Assert.assertEquals(stmts.size(),2);

  Assert.assertTrue(stmts.get(0).equals("SELECT METRIC_NAME, HOSTNAME, APP_ID, INSTANCE_ID, 14100000 AS SERVER_TIME, " +
    "UNITS, SUM(METRIC_SUM), SUM(METRIC_COUNT), MAX(METRIC_MAX), MIN(METRIC_MIN) FROM METRIC_RECORD_UUID WHERE METRIC_NAME " +
    "LIKE 'pattern1' AND SERVER_TIME > 14000000 AND SERVER_TIME <= 14100000 GROUP BY METRIC_NAME, HOSTNAME, APP_ID, INSTANCE_ID, UNITS"));

  Assert.assertTrue(stmts.get(1).equals("SELECT METRIC_NAME, HOSTNAME, APP_ID, INSTANCE_ID, 14100000 AS SERVER_TIME, " +
    "UNITS, SUM(METRIC_SUM), SUM(METRIC_COUNT), MAX(METRIC_MAX), MIN(METRIC_MIN) FROM METRIC_RECORD_UUID WHERE METRIC_NAME " +
    "LIKE 'pattern2' AND SERVER_TIME > 14000000 AND SERVER_TIME <= 14100000 GROUP BY METRIC_NAME, HOSTNAME, APP_ID, INSTANCE_ID, UNITS"));
}
 
Example 3
Source Project: hbase   File: Constraints.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Update the configuration for the {@link Constraint}; does not change the
 * order in which the constraint is run.
 * 
 * @param desc
 *          {@link HTableDescriptor} to update
 * @param clazz
 *          {@link Constraint} to update
 * @param configuration
 *          to update the {@link Constraint} with.
 * @throws IOException
 *           if the Constraint was not stored correctly
 * @throws IllegalArgumentException
 *           if the Constraint was not present on this table.
 */
public static void setConfiguration(HTableDescriptor desc,
    Class<? extends Constraint> clazz, Configuration configuration)
    throws IOException, IllegalArgumentException {
  // get the entry for this class
  Pair<String, String> e = getKeyValueForClass(desc, clazz);

  if (e == null) {
    throw new IllegalArgumentException("Constraint: " + clazz.getName()
        + " is not associated with this table.");
  }

  // clone over the configuration elements
  Configuration conf = new Configuration(configuration);

  // read in the previous info about the constraint
  Configuration internal = readConfiguration(e.getSecond());

  // update the fields based on the previous settings
  conf.setIfUnset(ENABLED_KEY, internal.get(ENABLED_KEY));
  conf.setIfUnset(PRIORITY_KEY, internal.get(PRIORITY_KEY));

  // update the current value
  writeConstraint(desc, e.getFirst(), conf);
}
 
Example 4
@Override
public void setConf(Configuration conf) {
  conf.setIfUnset(
    String.format("%s.%s", TEST_NAME, LoadTestTool.OPT_REGION_REPLICATION),
    String.valueOf(DEFAULT_REGION_REPLICATION));

  conf.setIfUnset(
    String.format("%s.%s", TEST_NAME, LoadTestTool.OPT_COLUMN_FAMILIES),
    StringUtils.join(",", DEFAULT_COLUMN_FAMILIES));

  conf.setBoolean(TableDescriptorChecker.TABLE_SANITY_CHECKS, true);

  // enable async wal replication to region replicas for unit tests
  conf.setBoolean(ServerRegionReplicaUtil.REGION_REPLICA_REPLICATION_CONF_KEY, true);

  conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024L * 1024 * 4); // flush every 4 MB
  conf.setInt("hbase.hstore.blockingStoreFiles", 100);

  super.setConf(conf);
}
 
Example 5
Source Project: ambari-metrics   File: DownSamplerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGetDownSamplers() throws Exception {

  Configuration configuration = new Configuration();
  configuration.setIfUnset("timeline.metrics.downsampler.topn.metric.patterns", "pattern1,pattern2");
  configuration.setIfUnset("timeline.metrics.downsampler.test.metric.patterns", "pattern3");

  List<CustomDownSampler> downSamplers = DownSamplerUtils.getDownSamplers(configuration);
  Assert.assertEquals(downSamplers.size(), 1);
  Assert.assertTrue(downSamplers.get(0) instanceof TopNDownSampler);
}
 
Example 6
Source Project: ambari-metrics   File: DownSamplerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Ignore
@Test
public void testPrepareTopNDownSamplingStatement() throws Exception {
  Configuration configuration = new Configuration();
  configuration.setIfUnset("timeline.metrics.downsampler.topn.metric.patterns", "pattern1,pattern2");
  configuration.setIfUnset("timeline.metrics.downsampler.topn.value", "3");

  Map<String, String> conf = configuration.getValByRegex(DownSamplerUtils.downSamplerConfigPrefix);

  TopNDownSampler topNDownSampler = TopNDownSampler.fromConfig(conf);
  List<String> stmts = topNDownSampler.prepareDownSamplingStatement(14000000l, 14100000l, "METRIC_RECORD_UUID");
  Assert.assertEquals(stmts.size(),2);
  Assert.assertTrue(stmts.contains("SELECT METRIC_NAME, HOSTNAME, APP_ID, INSTANCE_ID, 14100000 AS SERVER_TIME, UNITS, " +
    "MAX(METRIC_MAX), 1, MAX(METRIC_MAX), MAX(METRIC_MAX) FROM METRIC_RECORD_UUID WHERE " +
    "METRIC_NAME LIKE 'pattern1' AND SERVER_TIME > 14000000 AND SERVER_TIME <= 14100000 " +
    "GROUP BY METRIC_NAME, HOSTNAME, APP_ID, INSTANCE_ID, UNITS ORDER BY MAX(METRIC_MAX) DESC LIMIT 3"));

  Assert.assertTrue(stmts.contains("SELECT METRIC_NAME, HOSTNAME, APP_ID, INSTANCE_ID, 14100000 AS SERVER_TIME, UNITS, " +
    "MAX(METRIC_MAX), 1, MAX(METRIC_MAX), MAX(METRIC_MAX) FROM METRIC_RECORD_UUID WHERE " +
    "METRIC_NAME LIKE 'pattern2' AND SERVER_TIME > 14000000 AND SERVER_TIME <= 14100000 " +
    "GROUP BY METRIC_NAME, HOSTNAME, APP_ID, INSTANCE_ID, UNITS ORDER BY MAX(METRIC_MAX) DESC LIMIT 3"));

  configuration.clear();
  configuration.setIfUnset("timeline.metrics.downsampler.topn.metric.patterns", "pattern1");
  configuration.setIfUnset("timeline.metrics.downsampler.topn.value", "4");
  configuration.setIfUnset("timeline.metrics.downsampler.topn.function", "sum");
  conf = configuration.getValByRegex(DownSamplerUtils.downSamplerConfigPrefix);
  topNDownSampler = TopNDownSampler.fromConfig(conf);
  stmts = topNDownSampler.prepareDownSamplingStatement(14000000l, 14100000l, "METRIC_AGGREGATE_MINUTE_UUID");
  Assert.assertEquals(stmts.size(),1);

  Assert.assertTrue(stmts.contains("SELECT METRIC_NAME, APP_ID, INSTANCE_ID, 14100000 AS SERVER_TIME, UNITS, " +
    "SUM(METRIC_SUM), 1, SUM(METRIC_SUM), SUM(METRIC_SUM) FROM METRIC_AGGREGATE_MINUTE_UUID WHERE " +
    "METRIC_NAME LIKE 'pattern1' AND SERVER_TIME > 14000000 AND SERVER_TIME <= 14100000 " +
    "GROUP BY METRIC_NAME, APP_ID, INSTANCE_ID, UNITS ORDER BY SUM(METRIC_SUM) DESC LIMIT 4"));
}
 
Example 7
@Override
protected void propagateOptionsToJob(Job job) {
  super.propagateOptionsToJob(job);
  SqoopOptions opts = context.getOptions();
  Configuration conf = job.getConfiguration();
  conf.setIfUnset("pgbulkload.bin", "pg_bulkload");
  if (opts.getNullStringValue() != null) {
    conf.set("pgbulkload.null.string", opts.getNullStringValue());
  }
  setDelimiter("pgbulkload.input.field.delim",
               opts.getInputFieldDelim(),
               conf);
  setDelimiter("pgbulkload.input.record.delim",
               opts.getInputRecordDelim(),
               conf);
  setDelimiter("pgbulkload.input.enclosedby",
               opts.getInputEnclosedBy(),
               conf);
  setDelimiter("pgbulkload.input.escapedby",
               opts.getInputEscapedBy(),
               conf);
  conf.setBoolean("pgbulkload.input.encloserequired",
                  opts.isInputEncloseRequired());
  conf.setIfUnset("pgbulkload.check.constraints", "YES");
  conf.setIfUnset("pgbulkload.parse.errors", "INFINITE");
  conf.setIfUnset("pgbulkload.duplicate.errors", "INFINITE");
  conf.set("mapred.jar", context.getJarFile());
  conf.setBoolean("mapred.map.tasks.speculative.execution", false);
  conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
  conf.setInt("mapred.map.max.attempts", 1);
  conf.setInt("mapred.reduce.max.attempts", 1);
}
 
Example 8
Source Project: hadoop   File: MiniDFSCluster.java    License: Apache License 2.0 5 votes vote down vote up
protected void setupDatanodeAddress(Configuration conf, boolean setupHostsFile,
                         boolean checkDataNodeAddrConfig) throws IOException {
  if (setupHostsFile) {
    String hostsFile = conf.get(DFS_HOSTS, "").trim();
    if (hostsFile.length() == 0) {
      throw new IOException("Parameter dfs.hosts is not setup in conf");
    }
    // Setup datanode in the include file, if it is defined in the conf
    String address = "127.0.0.1:" + NetUtils.getFreeSocketPort();
    if (checkDataNodeAddrConfig) {
      conf.setIfUnset(DFS_DATANODE_ADDRESS_KEY, address);
    } else {
      conf.set(DFS_DATANODE_ADDRESS_KEY, address);
    }
    addToFile(hostsFile, address);
    LOG.info("Adding datanode " + address + " to hosts file " + hostsFile);
  } else {
    if (checkDataNodeAddrConfig) {
      conf.setIfUnset(DFS_DATANODE_ADDRESS_KEY, "127.0.0.1:0");
    } else {
      conf.set(DFS_DATANODE_ADDRESS_KEY, "127.0.0.1:0");
    }
  }
  if (checkDataNodeAddrConfig) {
    conf.setIfUnset(DFS_DATANODE_HTTP_ADDRESS_KEY, "127.0.0.1:0");
    conf.setIfUnset(DFS_DATANODE_IPC_ADDRESS_KEY, "127.0.0.1:0");
  } else {
    conf.set(DFS_DATANODE_HTTP_ADDRESS_KEY, "127.0.0.1:0");
    conf.set(DFS_DATANODE_IPC_ADDRESS_KEY, "127.0.0.1:0");
  }
}
 
Example 9
Source Project: dremio-oss   File: DremioFileSystem.java    License: Apache License 2.0 5 votes vote down vote up
private void updateAzureConfiguration(Configuration conf, URI uri) {
  // default is key based, same as azure sources
  String accountName = getAccountNameFromURI(conf.get("authority"), uri);
  // strip any url information if any
  String accountNameWithoutSuffix = accountName.split("[.]")[0];
  conf.set("dremio.azure.account", accountNameWithoutSuffix);
  String authType = getAuthTypeForAccount(conf, accountName, accountNameWithoutSuffix);
  String key = null;

  String old_scheme = conf.get("old_scheme");
  if (old_scheme.equals(FileSystemUriSchemes.WASB_SCHEME) || old_scheme.equals(FileSystemUriSchemes.WASB_SECURE_SCHEME)) {
    conf.setIfUnset("dremio.azure.mode","STORAGE_V1");
  } else if (old_scheme.equals(FileSystemUriSchemes.ABFS_SCHEME) || old_scheme.equals(FileSystemUriSchemes.ABFS_SECURE_SCHEME)) {
    conf.setIfUnset("dremio.azure.mode","STORAGE_V2");
  }

  if (authType.equals(AuthType.SharedKey.name())) {
    key = getValueForProperty(conf, FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME, accountName,
      accountNameWithoutSuffix, "Account Key not present in the configuration.");
    conf.set("dremio.azure.key", key);
    conf.set("dremio.azure.credentialsType", "ACCESS_KEY");
  } else if (authType.equals(AuthType.OAuth.name())) {
    updateOAuthConfig(conf, accountName, accountNameWithoutSuffix);
    conf.set("dremio.azure.credentialsType", "AZURE_ACTIVE_DIRECTORY");
  } else {
    throw new UnsupportedOperationException("This credentials type is not supported " + authType);
  }

}
 
Example 10
Source Project: big-c   File: MiniDFSCluster.java    License: Apache License 2.0 5 votes vote down vote up
protected void setupDatanodeAddress(Configuration conf, boolean setupHostsFile,
                         boolean checkDataNodeAddrConfig) throws IOException {
  if (setupHostsFile) {
    String hostsFile = conf.get(DFS_HOSTS, "").trim();
    if (hostsFile.length() == 0) {
      throw new IOException("Parameter dfs.hosts is not setup in conf");
    }
    // Setup datanode in the include file, if it is defined in the conf
    String address = "127.0.0.1:" + NetUtils.getFreeSocketPort();
    if (checkDataNodeAddrConfig) {
      conf.setIfUnset(DFS_DATANODE_ADDRESS_KEY, address);
    } else {
      conf.set(DFS_DATANODE_ADDRESS_KEY, address);
    }
    addToFile(hostsFile, address);
    LOG.info("Adding datanode " + address + " to hosts file " + hostsFile);
  } else {
    if (checkDataNodeAddrConfig) {
      conf.setIfUnset(DFS_DATANODE_ADDRESS_KEY, "127.0.0.1:0");
    } else {
      conf.set(DFS_DATANODE_ADDRESS_KEY, "127.0.0.1:0");
    }
  }
  if (checkDataNodeAddrConfig) {
    conf.setIfUnset(DFS_DATANODE_HTTP_ADDRESS_KEY, "127.0.0.1:0");
    conf.setIfUnset(DFS_DATANODE_IPC_ADDRESS_KEY, "127.0.0.1:0");
  } else {
    conf.set(DFS_DATANODE_HTTP_ADDRESS_KEY, "127.0.0.1:0");
    conf.set(DFS_DATANODE_IPC_ADDRESS_KEY, "127.0.0.1:0");
  }
}
 
Example 11
Source Project: sqoop-on-spark   File: TestSqoopLoader.java    License: Apache License 2.0 5 votes vote down vote up
@BeforeMethod(alwaysRun = true)
public void setUp() {
  conf = new Configuration();
  conf.setIfUnset(MRJobConstants.TO_INTERMEDIATE_DATA_FORMAT,
      CSVIntermediateDataFormat.class.getName());
  jobContextMock = mock(TaskAttemptContext.class);
  GenericCounter counter = new GenericCounter("test", "test-me");
  when(((TaskAttemptContext) jobContextMock).getCounter(SqoopCounters.ROWS_WRITTEN)).thenReturn(counter);
  org.apache.hadoop.mapred.JobConf testConf = new org.apache.hadoop.mapred.JobConf();
  when(jobContextMock.getConfiguration()).thenReturn(testConf);
}
 
Example 12
Source Project: phoenix   File: PhoenixIndexCodec.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(RegionCoprocessorEnvironment env) {
    this.env = env;
    Configuration conf = env.getConfiguration();
    // Install handler that will attempt to disable the index first before killing the region
    // server
    conf.setIfUnset(IndexWriter.INDEX_FAILURE_POLICY_CONF_KEY,
        PhoenixIndexFailurePolicy.class.getName());
    // Use the GenericKeyValueBuilder, as it's been shown in perf testing that ClientKeyValue doesn't help
    // TODO: Jesse to investigate more
    this.kvBuilder = GenericKeyValueBuilder.INSTANCE;
}
 
Example 13
Source Project: hbase   File: FSUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Check if short circuit read buffer size is set and if not, set it to hbase value.
 * @param conf
 */
public static void checkShortCircuitReadBufferSize(final Configuration conf) {
  final int defaultSize = HConstants.DEFAULT_BLOCKSIZE * 2;
  final int notSet = -1;
  // DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY is only defined in h2
  final String dfsKey = "dfs.client.read.shortcircuit.buffer.size";
  int size = conf.getInt(dfsKey, notSet);
  // If a size is set, return -- we will use it.
  if (size != notSet) return;
  // But short circuit buffer size is normally not set.  Put in place the hbase wanted size.
  int hbaseSize = conf.getInt("hbase." + dfsKey, defaultSize);
  conf.setIfUnset(dfsKey, Integer.toString(hbaseSize));
}
 
Example 14
@Override
public void setConf(Configuration conf) {
  super.setConf(conf);
  // default replication for this test is 3
  String clazz = this.getClass().getSimpleName();
  conf.setIfUnset(String.format("%s.%s", clazz, LoadTestTool.OPT_REGION_REPLICATION),
    Integer.toString(DEFAULT_REGION_REPLICATION));
}
 
Example 15
Source Project: hbase   File: CommonFSUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Check if short circuit read buffer size is set and if not, set it to hbase value.
 * @param conf must not be null
 */
public static void checkShortCircuitReadBufferSize(final Configuration conf) {
  final int defaultSize = HConstants.DEFAULT_BLOCKSIZE * 2;
  final int notSet = -1;
  // DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY is only defined in h2
  final String dfsKey = "dfs.client.read.shortcircuit.buffer.size";
  int size = conf.getInt(dfsKey, notSet);
  // If a size is set, return -- we will use it.
  if (size != notSet) {
    return;
  }
  // But short circuit buffer size is normally not set.  Put in place the hbase wanted size.
  int hbaseSize = conf.getInt("hbase." + dfsKey, defaultSize);
  conf.setIfUnset(dfsKey, Integer.toString(hbaseSize));
}
 
Example 16
Source Project: spork   File: MRToTezHelper.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Convert MR settings to Tez settings and set on conf.
 *
 * @param conf  Configuration on which MR equivalent Tez settings should be set
 * @param mrConf Configuration that contains MR settings
 */
private static void convertMRToTezRuntimeConf(Configuration conf, Configuration mrConf) {
    for (Entry<String, String> dep : DeprecatedKeys.getMRToTezRuntimeParamMap().entrySet()) {
        if (mrConf.get(dep.getKey()) != null) {
            conf.unset(dep.getKey());
            LOG.info("Setting " + dep.getValue() + " to "
                    + mrConf.get(dep.getKey()) + " from MR setting "
                    + dep.getKey());
            conf.setIfUnset(dep.getValue(), mrConf.get(dep.getKey()));
        }
    }
}
 
Example 17
@Override
public void initialize(RegionCoprocessorEnvironment env) {
    this.env = env;
    Configuration conf = env.getConfiguration();
    // Install handler that will attempt to disable the index first before killing the region
    // server
    conf.setIfUnset(IndexWriter.INDEX_FAILURE_POLICY_CONF_KEY,
        PhoenixIndexFailurePolicy.class.getName());
    this.builder = KeyValueBuilder.get(env.getHBaseVersion());
}
 
Example 18
/**
 * Add hive conf to configuration object without overriding already set properties.
 * @param hiveConf
 * @param conf
 */
public static void addHiveConfigs(Configuration hiveConf, Configuration conf) {
  for (Map.Entry<String, String> item : hiveConf) {
    conf.setIfUnset(item.getKey(), item.getValue());
  }
}
 
Example 19
@Override
protected void setup(Configuration conf) throws IOException {
  parentConf = conf;
  conf.setIfUnset(CREDENTIALS_TYPE, ACCESS_KEY.name());
  credentialsType = AzureAuthenticationType.valueOf(conf.get(CREDENTIALS_TYPE));

  // TODO: check if following are needed
  accountKind = AccountKind.valueOf(conf.get(MODE));
  secure = conf.getBoolean(SECURE, true);
  proto = accountKind.getPrototype(secure);
  conf.setIfUnset(AZURE_ENDPOINT, proto.getEndpointSuffix());
  azureEndpoint = conf.get(AZURE_ENDPOINT);
  // -- End --

  account = Objects.requireNonNull(conf.get(ACCOUNT));
  setupClient(account, secure);

  if (credentialsType == AZURE_ACTIVE_DIRECTORY) {
    clientID = Objects.requireNonNull(conf.get(CLIENT_ID));
    tokenEndpoint = Objects.requireNonNull(conf.get(TOKEN_ENDPOINT));
    clientSecret = Objects.requireNonNull(conf.get(CLIENT_SECRET));
    this.authProvider = new AzureOAuthTokenProvider(tokenEndpoint, clientID, clientSecret);
  } else if (credentialsType == ACCESS_KEY) {
    key = Objects.requireNonNull(conf.get(KEY));
    this.authProvider = new AzureSharedKeyAuthTokenProvider(account, key);
  } else {
    throw new IOException("Unrecognized credential type");
  }

  final String[] containerList = getContainerNames(conf.get(CONTAINER_LIST));
  if (containerList != null) {
    containerProvider = new ProvidedContainerList(this, containerList);
  } else {
    if (accountKind == AccountKind.STORAGE_V2) {
      containerProvider = new AzureAsyncContainerProvider(asyncHttpClient, account, authProvider, this, secure);
    } else {
      final String connection = String.format("%s://%s.%s", proto.getEndpointScheme(), account, azureEndpoint);
      switch (credentialsType) {
        case ACCESS_KEY:
          containerProvider = new BlobContainerProvider(this, connection, account, key);
          break;
        case AZURE_ACTIVE_DIRECTORY:
          try {
            containerProvider = new BlobContainerProviderOAuth(this, connection, account, (AzureOAuthTokenProvider) authProvider);
          } catch (Exception e) {
            throw new IOException("Unable to establish connection to Storage V1 account with Azure Active Directory");
          }
          break;
        default:
          break;
      }
    }
  }
}
 
Example 20
Source Project: spork   File: TezDagBuilder.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Return EdgeProperty that connects two vertices.
 *
 * @param from
 * @param to
 * @return EdgeProperty
 * @throws IOException
 */
private EdgeProperty newEdge(TezOperator from, TezOperator to)
        throws IOException {
    TezEdgeDescriptor edge = to.inEdges.get(from.getOperatorKey());
    PhysicalPlan combinePlan = edge.combinePlan;

    InputDescriptor in = InputDescriptor.create(edge.inputClassName);
    OutputDescriptor out = OutputDescriptor.create(edge.outputClassName);

    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties(), false);
    if (!combinePlan.isEmpty()) {
        addCombiner(combinePlan, to, conf);
    }

    List<POLocalRearrangeTez> lrs = PlanHelper.getPhysicalOperators(from.plan,
            POLocalRearrangeTez.class);

    for (POLocalRearrangeTez lr : lrs) {
        if (lr.getOutputKey().equals(to.getOperatorKey().toString())) {
            byte keyType = lr.getKeyType();
            setIntermediateOutputKeyValue(keyType, conf, to, lr.isConnectedToPackage());
            // In case of secondary key sort, main key type is the actual key type
            conf.set("pig.reduce.key.type", Byte.toString(lr.getMainKeyType()));
            break;
        }
    }

    conf.setIfUnset(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS,
            MRPartitioner.class.getName());

    if (edge.getIntermediateOutputKeyClass() != null) {
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS,
                edge.getIntermediateOutputKeyClass());
    }

    if (edge.getIntermediateOutputValueClass() != null) {
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS,
                edge.getIntermediateOutputValueClass());
    }

    if (edge.getIntermediateOutputKeyComparatorClass() != null) {
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS,
                edge.getIntermediateOutputKeyComparatorClass());
    }

    conf.setBoolean(MRConfiguration.MAPPER_NEW_API, true);
    conf.set("pig.pigContext", ObjectSerializer.serialize(pc));
    conf.set("udf.import.list",
            ObjectSerializer.serialize(PigContext.getPackageImportList()));

    if(to.isGlobalSort() || to.isLimitAfterSort()){
        conf.set("pig.sortOrder",
                ObjectSerializer.serialize(to.getSortOrder()));
    }

    if (edge.isUseSecondaryKey()) {
        conf.set("pig.secondarySortOrder",
                ObjectSerializer.serialize(edge.getSecondarySortOrder()));
        conf.set(org.apache.hadoop.mapreduce.MRJobConfig.PARTITIONER_CLASS_ATTR,
                SecondaryKeyPartitioner.class.getName());
        // These needs to be on the vertex as well for POShuffleTezLoad to pick it up.
        // Tez framework also expects this to be per vertex and not edge. IFile.java picks
        // up keyClass and valueClass from vertex config. TODO - check with Tez folks
        // In MR - job.setSortComparatorClass() or MRJobConfig.KEY_COMPARATOR
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS,
                PigSecondaryKeyComparator.class.getName());
        // In MR - job.setOutputKeyClass() or MRJobConfig.OUTPUT_KEY_CLASS
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, NullableTuple.class.getName());
        setGroupingComparator(conf, PigSecondaryKeyGroupComparator.class.getName());
    }

    if (edge.partitionerClass != null) {
        conf.set(org.apache.hadoop.mapreduce.MRJobConfig.PARTITIONER_CLASS_ATTR,
                edge.partitionerClass.getName());
    }

    conf.set("udf.import.list",
            ObjectSerializer.serialize(PigContext.getPackageImportList()));

    MRToTezHelper.processMRSettings(conf, globalConf);

    String historyString = convertToHistoryText("", conf);
    in.setUserPayload(TezUtils.createUserPayloadFromConf(conf)).setHistoryText(historyString);
    out.setUserPayload(TezUtils.createUserPayloadFromConf(conf)).setHistoryText(historyString);

    if (edge.dataMovementType!=DataMovementType.BROADCAST && to.getEstimatedParallelism()!=-1 && (to.isGlobalSort()||to.isSkewedJoin())) {
        // Use custom edge
        return EdgeProperty.create((EdgeManagerPluginDescriptor)null,
                edge.dataSourceType, edge.schedulingType, out, in);
        }

    return EdgeProperty.create(edge.dataMovementType, edge.dataSourceType,
            edge.schedulingType, out, in);
}