Java Code Examples for org.apache.hadoop.mapreduce.TaskAttemptContext#getConfiguration()

The following examples show how to use org.apache.hadoop.mapreduce.TaskAttemptContext#getConfiguration() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OraOopOutputFormatBase.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
protected void applyMapperJdbcUrl(TaskAttemptContext context, int mapperId) {

    Configuration conf = context.getConfiguration();

    // Retrieve the JDBC URL that should be used by this mapper.
    // We achieve this by modifying the JDBC URL property in the
    // configuration, prior to the OraOopDBRecordWriter's (ancestral)
    // constructor using the configuration to establish a connection
    // to the database - via DBConfiguration.getConnection()...
    String mapperJdbcUrlPropertyName =
        OraOopUtilities.getMapperJdbcUrlPropertyName(mapperId, conf);

    // Get this mapper's JDBC URL
    String mapperJdbcUrl = conf.get(mapperJdbcUrlPropertyName, null);

    LOG.debug(String.format("Mapper %d has a JDBC URL of: %s", mapperId,
        mapperJdbcUrl == null ? "<null>" : mapperJdbcUrl));

    if (mapperJdbcUrl != null) {
      conf.set(DBConfiguration.URL_PROPERTY, mapperJdbcUrl);
    }
  }
 
Example 2
Source File: DBOutputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
@Override
/** {@inheritDoc} */
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
    throws IOException {
  DBConfiguration dbConf = new DBConfiguration(context.getConfiguration());
  String tableName = dbConf.getOutputTableName();
  String[] fieldNames = dbConf.getOutputFieldNames();

  if (fieldNames == null) {
    fieldNames = new String[dbConf.getOutputFieldCount()];
  }

  try {
    Connection connection = dbConf.getConnection();
    PreparedStatement statement = null;

    statement = connection.prepareStatement(
                  constructQuery(tableName, fieldNames));
    return new com.cloudera.sqoop.mapreduce.db.DBOutputFormat.DBRecordWriter(
                   connection, statement);
  } catch (Exception ex) {
    throw new IOException(ex);
  }
}
 
Example 3
Source File: CRAMRecordReader.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
  if(isInitialized) {
    close();
  }
  isInitialized = true;

  final Configuration conf = context.getConfiguration();
  final FileSplit fileSplit = (FileSplit) split;
  final Path file  = fileSplit.getPath();

  String refSourcePath = conf.get(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY);
  ReferenceSource refSource = new ReferenceSource(refSourcePath == null ? null :
      NIOFileUtil.asPath(refSourcePath));

  seekableStream = WrapSeekable.openPath(conf, file);
  start = fileSplit.getStart();
  length = fileSplit.getLength();
  long end = start + length;
  // CRAMIterator right shifts boundaries by 16 so we do the reverse here
  // also subtract one from end since CRAMIterator's boundaries are inclusive
  long[] boundaries = new long[] {start << 16, (end - 1) << 16};
  ValidationStringency stringency = SAMHeaderReader.getValidationStringency(conf);
  cramIterator = new CRAMIterator(seekableStream, refSource, boundaries, stringency);
}
 
Example 4
Source File: BinaryReader.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<DocumentURI, BytesWritable> getRecordWriter(
        TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new BinaryWriter(getOutputPath(context), 
            context.getConfiguration());
}
 
Example 5
Source File: IndirectBigQueryOutputFormat.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/** Wraps the delegate's committer in a {@link IndirectBigQueryOutputCommitter}. */
@Override
public OutputCommitter createCommitter(TaskAttemptContext context) throws IOException {
  Configuration conf = context.getConfiguration();
  OutputCommitter delegateCommitter = getDelegate(conf).getOutputCommitter(context);
  OutputCommitter committer = new IndirectBigQueryOutputCommitter(context, delegateCommitter);
  return committer;
}
 
Example 6
Source File: KeyValueOutputFormat.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<KEYOUT, VALUEOUT> getRecordWriter(
        TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    TextArrayWritable hosts = getHosts(conf);
    String host = InternalUtilities.getHost(hosts);
    return new KeyValueWriter<KEYOUT, VALUEOUT>(conf, host);
}
 
Example 7
Source File: MainframeDatasetRecordReader.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InputSplit inputSplit,
    TaskAttemptContext taskAttemptContext)
    throws IOException, InterruptedException {

  split = (MainframeDatasetInputSplit)inputSplit;
  conf = taskAttemptContext.getConfiguration();
  inputClass = (Class<T>) (conf.getClass(
              DBConfiguration.INPUT_CLASS_PROPERTY, null));
  key = null;
  datasetRecord = null;
  numberRecordRead = 0;
  datasetProcessed = 0;
}
 
Example 8
Source File: DatabaseTransformOutputFormat.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<DocumentURI, DatabaseDocumentWithMeta> getRecordWriter(
    TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    fastLoad = Boolean.valueOf(conf.get(OUTPUT_FAST_LOAD));
    Map<String, ContentSource> sourceMap = getSourceMap(fastLoad, context);
    // construct the DatabaseTransformContentWriter
    return new DatabaseTransformWriter<DatabaseDocumentWithMeta>(conf,
        sourceMap, fastLoad, am);
}
 
Example 9
Source File: IndexedStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<WritableComparable, Tuple> getRecordWriter(
        TaskAttemptContext context) throws IOException,
        InterruptedException {

    Configuration conf = context.getConfiguration();

    FileSystem fs = FileSystem.get(conf);
    Path file = this.getDefaultWorkFile(context, "");
    FSDataOutputStream fileOut = fs.create(file, false);

    IndexManager indexManager = new IndexManager(offsetsToIndexKeys);
    indexManager.createIndexFile(fs, file);
    return new IndexedStorageRecordWriter(fileOut, this.fieldDelimiter, indexManager);
}
 
Example 10
Source File: AtomErrorDataTypeHandler.java    From datawave with Apache License 2.0 5 votes vote down vote up
@Override
public void setup(TaskAttemptContext context) {
    super.setup(context);
    
    this.errorHelper = (ErrorShardedIngestHelper) (TypeRegistry.getType("error").getIngestHelper(context.getConfiguration()));
    
    this.conf = context.getConfiguration();
    markingFunctions = MarkingFunctions.Factory.createMarkingFunctions();
}
 
Example 11
Source File: ArchiveOutputFormat.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<DocumentURI, MarkLogicDocument> getRecordWriter(
    TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    Path path = new Path(conf.get(ConfigConstants.CONF_OUTPUT_FILEPATH));
    return new ArchiveWriter(path, context);
}
 
Example 12
Source File: GenerateData.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<NullWritable,BytesWritable> getRecordWriter(
    TaskAttemptContext job) throws IOException {

  return new ChunkWriter(getDefaultWorkFile(job, ""),
      job.getConfiguration());
}
 
Example 13
Source File: OraOopOutputFormatBase.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
protected void updateBatchSizeInConfigurationToAllowOracleAppendValuesHint(
    TaskAttemptContext context) {

  Configuration conf = context.getConfiguration();

  // If using APPEND_VALUES, check the batch size and commit frequency...
  int originalBatchesPerCommit =
      conf.getInt(AsyncSqlOutputFormat.STATEMENTS_PER_TRANSACTION_KEY, 0);
  if (originalBatchesPerCommit != 1) {
    conf.setInt(AsyncSqlOutputFormat.STATEMENTS_PER_TRANSACTION_KEY, 1);
    LOG.info(String
        .format(
            "The number of batch-inserts to perform per commit has been "
                + "changed from %d to %d. This is in response "
                + "to the Oracle APPEND_VALUES hint being used.",
            originalBatchesPerCommit, 1));
  }

  int originalBatchSize =
      conf.getInt(AsyncSqlOutputFormat.RECORDS_PER_STATEMENT_KEY, 0);
  int minAppendValuesBatchSize =
      OraOopUtilities.getMinAppendValuesBatchSize(conf);
  if (originalBatchSize < minAppendValuesBatchSize) {
    conf.setInt(AsyncSqlOutputFormat.RECORDS_PER_STATEMENT_KEY,
        minAppendValuesBatchSize);
    LOG.info(String
        .format(
            "The number of rows per batch-insert has been changed from %d "
                + "to %d. This is in response "
                + "to the Oracle APPEND_VALUES hint being used.",
            originalBatchSize, minAppendValuesBatchSize));
  }
}
 
Example 14
Source File: Warp10OutputFormat.java    From warp10-platform with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<Writable, Writable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
  
  Properties props = new Properties();
  
  Configuration conf = context.getConfiguration();
  
  props.setProperty(Warp10RecordWriter.WARP10_GZIP, Warp10InputFormat.getProperty(conf, this.suffix, Warp10RecordWriter.WARP10_GZIP, "false"));
  props.setProperty(Warp10RecordWriter.WARP10_ENDPOINT, Warp10InputFormat.getProperty(conf, this.suffix, Warp10RecordWriter.WARP10_ENDPOINT, ""));
  props.setProperty(Warp10RecordWriter.WARP10_TOKEN, Warp10InputFormat.getProperty(conf, this.suffix, Warp10RecordWriter.WARP10_TOKEN, ""));
  props.setProperty(Warp10RecordWriter.WARP10_MAXRATE, Warp10InputFormat.getProperty(conf, this.suffix, Warp10RecordWriter.WARP10_MAXRATE, Long.toString(Long.MAX_VALUE)));
  
  return new Warp10RecordWriter(props);
}
 
Example 15
Source File: AtomDataTypeHandler.java    From datawave with Apache License 2.0 4 votes vote down vote up
@Override
public void setup(TaskAttemptContext context) {
    conf = context.getConfiguration();
    tableName = ConfigurationHelper.isNull(context.getConfiguration(), ATOM_TABLE_NAME, String.class);
    categoryTableName = tableName + "Categories";
    subCategories = new HashMap<>();
    markingFunctions = MarkingFunctions.Factory.createMarkingFunctions();
    
    TypeRegistry.getInstance(context.getConfiguration());
    String[] types = ConfigurationHelper.isNull(context.getConfiguration(), ATOM_TYPES_TO_PROCESS, String[].class);
    // Set up the ingest helpers for the known datatypes.
    
    fieldNames = ConfigurationHelper.isNull(context.getConfiguration(), ATOM_FIELD_NAMES, String[].class);
    // Configuration.getStrings() eats empty values, we don't want to do that. Split it ourselves.
    String aliases = ConfigurationHelper.isNull(context.getConfiguration(), ATOM_FIELD_ALIASES, String.class);
    fieldAliases = StringUtils.split(aliases, ',', true); // keeps empty elements
    String overrides = ConfigurationHelper.isNull(context.getConfiguration(), ATOM_FIELD_VALUE_OVERRIDES, String.class);
    fieldOverrides = StringUtils.split(overrides, ',', true); // keeps empty elements
    
    sCategories = StringUtils.split(ConfigurationHelper.isNull(context.getConfiguration(), ATOM_CATEGORY_SUB_FIELD, String.class), ',', false);
    
    Set<String> tSet;
    for (String s : sCategories) {
        String field_value[] = StringUtils.split(s, ':', false);
        if (field_value.length == 2 && (!Strings.isNullOrEmpty(field_value[0]) && !Strings.isNullOrEmpty(field_value[1]))) {
            
            if (!subCategories.containsKey(field_value[0])) {
                
                tSet = new HashSet<>();
                
            } else {
                
                tSet = subCategories.get(field_value[0]);
                
            }
            
            System.err.println("Value: " + field_value[0] + " " + field_value[1]);
            tSet.add(field_value[1]);
            subCategories.put(field_value[0], tSet);
            
        }
        
    }
    
    // Make sure these 3 arrays are all the same size.
    if (fieldNames.length != fieldAliases.length && fieldNames.length != fieldOverrides.length) {
        throw new IllegalArgumentException("AtomDataTypeHandler, configured fieldNames, fieldAliases, and fieldOverrides are different lengtsh.  "
                        + "Please fix the configuration. " + fieldNames.length + "," + fieldAliases.length + "," + fieldOverrides.length);
    }
}
 
Example 16
Source File: KeyValueInputFormat.java    From marklogic-contentpump with Apache License 2.0 4 votes vote down vote up
@Override
public RecordReader<KEYIN, VALUEIN> createRecordReader(InputSplit split,
        TaskAttemptContext context) 
throws IOException, InterruptedException {
    return new KeyValueReader<KEYIN, VALUEIN>(context.getConfiguration());
}
 
Example 17
Source File: GryoOutputFormat.java    From tinkerpop with Apache License 2.0 4 votes vote down vote up
@Override
public RecordWriter<NullWritable, VertexWritable> getRecordWriter(final TaskAttemptContext job) throws IOException, InterruptedException {
    return new GryoRecordWriter(getDataOutputStream(job), job.getConfiguration());
}
 
Example 18
Source File: RowOutputFormat.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public RecordWriter<Key, VALUE> getRecordWriter(TaskAttemptContext context)
    throws IOException {
  return new GfxdRecordWriter(context.getConfiguration());
}
 
Example 19
Source File: RDFReader.java    From marklogic-contentpump with Apache License 2.0 4 votes vote down vote up
@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    if (version == null)
        throw new IOException("Server Version is null");
    String majorVersion = version.substring(0, version.indexOf('.'));
    graphSupported = Integer.valueOf(majorVersion) >= 8;
    conf = context.getConfiguration();

    String rdfopt = conf.get(ConfigConstants.RDF_STREAMING_MEMORY_THRESHOLD);
    if (rdfopt != null) {
        INMEMORYTHRESHOLD = Long.parseLong(rdfopt);
    }

    rdfopt = conf.get(ConfigConstants.RDF_TRIPLES_PER_DOCUMENT);
    if (rdfopt != null) {
        MAXTRIPLESPERDOCUMENT = Integer.parseInt(rdfopt);
    }

    String fnAsColl = conf.get(ConfigConstants.CONF_OUTPUT_FILENAME_AS_COLLECTION);
    if (fnAsColl != null) {
        LOG.warn("The -filename_as_collection has no effect with input_type RDF, use -output_collections instead.");
    }

    String[] collections = conf.getStrings(MarkLogicConstants.OUTPUT_COLLECTION);
    outputGraph = conf.get(MarkLogicConstants.OUTPUT_GRAPH);
    outputOverrideGraph = conf.get(MarkLogicConstants.OUTPUT_OVERRIDE_GRAPH);
    //if no defulat-graph set and output_collections is set
    ignoreCollectionQuad = (outputGraph == null && collections != null)
        || outputOverrideGraph != null;
    hasOutputCol = (collections != null);

    Class<? extends Writable> valueClass = RDFWritable.class;

    @SuppressWarnings("unchecked")
    VALUEIN localValue = (VALUEIN) ReflectionUtils.newInstance(valueClass, 
            conf);

    value = localValue;
    encoding = conf.get(MarkLogicConstants.OUTPUT_CONTENT_ENCODING,
            DEFAULT_ENCODING);

    setFile(((FileSplit) inSplit).getPath());
    fs = file.getFileSystem(context.getConfiguration());
    
    FileStatus status = fs.getFileStatus(file);
    if(status.isDirectory()) {
        iterator = new FileIterator((FileSplit)inSplit, context);
        inSplit = iterator.next();
    }

    try {
        initStream(inSplit);
    } catch (IOException e ){
        LOG.error("Invalid input: " + file.getName() + ": " + e.getMessage());
        throw e;
    }
    String[] perms = conf.getStrings(MarkLogicConstants.OUTPUT_PERMISSION);
    if(perms!=null) {
        defaultPerms = PermissionUtil.getPermissions(perms).toArray(
            new ContentPermission[perms.length>>1]);
    } else {
        List<ContentPermission> tmp = PermissionUtil.getDefaultPermissions(conf,roleMap);
        if(tmp!=null)
            defaultPerms = tmp.toArray(new ContentPermission[tmp.size()]);
    }
        
    if (roleMapExists) 
        initExistingMapPerms();
}
 
Example 20
Source File: TreeMergeOutputFormat.java    From examples with Apache License 2.0 4 votes vote down vote up
@Override
    public void close(TaskAttemptContext context) throws IOException {
      LOG.debug("Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards);
      writeShardNumberFile(context);      
      heartBeater.needHeartBeat();
      try {
        Directory mergedIndex = new HdfsDirectory(workDir, context.getConfiguration());
        
        // TODO: shouldn't we pull the Version from the solrconfig.xml?
        IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, null)
            .setOpenMode(OpenMode.CREATE).setUseCompoundFile(false)
            //.setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml?
            //.setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml?
            ;
          
        if (LOG.isDebugEnabled()) {
          writerConfig.setInfoStream(System.out);
        }
//        writerConfig.setRAMBufferSizeMB(100); // improve performance
//        writerConfig.setMaxThreadStates(1);
        
        // disable compound file to improve performance
        // also see http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html
        // also see defaults in SolrIndexConfig
        MergePolicy mergePolicy = writerConfig.getMergePolicy();
        LOG.debug("mergePolicy was: {}", mergePolicy);
        if (mergePolicy instanceof TieredMergePolicy) {
          ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0);
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000);          
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000);       
//          ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000);
        } else if (mergePolicy instanceof LogMergePolicy) {
          ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0);
        }
        LOG.info("Using mergePolicy: {}", mergePolicy);
        
        IndexWriter writer = new IndexWriter(mergedIndex, writerConfig);
        
        Directory[] indexes = new Directory[shards.size()];
        for (int i = 0; i < shards.size(); i++) {
          indexes[i] = new HdfsDirectory(shards.get(i), context.getConfiguration());
        }

        context.setStatus("Logically merging " + shards.size() + " shards into one shard");
        LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir);
        long start = System.nanoTime();
        
        writer.addIndexes(indexes); 
        // TODO: avoid intermediate copying of files into dst directory; rename the files into the dir instead (cp -> rename) 
        // This can improve performance and turns this phase into a true "logical" merge, completing in constant time.
        // See https://issues.apache.org/jira/browse/LUCENE-4746
        
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        float secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Logical merge took {} secs", secs);        
        int maxSegments = context.getConfiguration().getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE);
        context.setStatus("Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments");
        LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
        start = System.nanoTime();
        if (maxSegments < Integer.MAX_VALUE) {
          writer.forceMerge(maxSegments); 
          // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data 
          // see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
        }
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {} secs", maxSegments, secs);
        
        start = System.nanoTime();
        LOG.info("Optimizing Solr: Closing index writer");
        writer.close();
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: Done closing index writer in {} secs", secs);
        context.setStatus("Done");
      } finally {
        heartBeater.cancelHeartBeat();
        heartBeater.close();
      }
    }