Java Code Examples for org.apache.tez.runtime.library.api.KeyValueWriter#write()

The following examples show how to use org.apache.tez.runtime.library.api.KeyValueWriter#write() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BroadcastAndOneToOneExample.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
@Override
public void run() throws Exception {
  Preconditions.checkArgument(getOutputs().size() == 1);
  OnFileUnorderedKVOutput output = (OnFileUnorderedKVOutput) getOutputs().values().iterator()
      .next();
  KeyValueWriter kvWriter = (KeyValueWriter) output.getWriter();
  kvWriter.write(word, new IntWritable(getContext().getTaskIndex()));
  byte[] userPayload = getContext().getUserPayload();
  if (userPayload != null) {
    boolean doLocalityCheck = userPayload[0] > 0 ? true : false;
    if (doLocalityCheck) {
      ObjectRegistry objectRegistry = ObjectRegistryFactory.getObjectRegistry();
      String entry = String.valueOf(getContext().getTaskIndex());
      objectRegistry.add(ObjectLifeCycle.DAG, entry, entry);
    }
  }
}
 
Example 2
Source File: HashJoinExample.java    From tez with Apache License 2.0 6 votes vote down vote up
@Override
public void run() throws Exception {
  Preconditions.checkState(getInputs().size() == 1);
  Preconditions.checkState(getOutputs().size() == 1);
  // not looking up inputs and outputs by name because there is just one
  // instance and this processor is used in many different DAGs with
  // different names for inputs and outputs
  LogicalInput input = getInputs().values().iterator().next();
  Reader rawReader = input.getReader();
  Preconditions.checkState(rawReader instanceof KeyValueReader);
  LogicalOutput output = getOutputs().values().iterator().next();

  KeyValueReader reader = (KeyValueReader) rawReader;
  KeyValueWriter writer = (KeyValueWriter) output.getWriter();

  while (reader.next()) {
    Object val = reader.getCurrentValue();
    // The data value itself is the join key. Simply write it out as the
    // key.
    // The output value is null.
    writer.write(val, NullWritable.get());
  }
}
 
Example 3
Source File: BroadcastAndOneToOneExample.java    From tez with Apache License 2.0 6 votes vote down vote up
@Override
public void run() throws Exception {
  Preconditions.checkArgument(getOutputs().size() == 1);
  UnorderedKVOutput output = (UnorderedKVOutput) getOutputs().values().iterator()
      .next();
  KeyValueWriter kvWriter = (KeyValueWriter) output.getWriter();
  kvWriter.write(word, new IntWritable(getContext().getTaskIndex()));
  ByteBuffer userPayload =
      getContext().getUserPayload() == null ? null : getContext().getUserPayload().getPayload();
  if (userPayload != null) {
    boolean doLocalityCheck = getContext().getUserPayload().getPayload().get(0) > 0 ? true : false;
    if (doLocalityCheck) {
      ObjectRegistry objectRegistry = getContext().getObjectRegistry();
      String entry = String.valueOf(getContext().getTaskIndex());
      objectRegistry.cacheForDAG(entry, entry);
    }
  }
}
 
Example 4
Source File: WordCount.java    From tez with Apache License 2.0 6 votes vote down vote up
@Override
public void run() throws Exception {
  Preconditions.checkArgument(getInputs().size() == 1);
  Preconditions.checkArgument(getOutputs().size() == 1);
  // the recommended approach is to cast the reader/writer to a specific type instead
  // of casting the input/output. This allows the actual input/output type to be replaced
  // without affecting the semantic guarantees of the data type that are represented by
  // the reader and writer.
  // The inputs/outputs are referenced via the names assigned in the DAG.
  KeyValueReader kvReader = (KeyValueReader) getInputs().get(INPUT).getReader();
  KeyValueWriter kvWriter = (KeyValueWriter) getOutputs().get(SUMMATION).getWriter();
  while (kvReader.next()) {
    StringTokenizer itr = new StringTokenizer(kvReader.getCurrentValue().toString());
    while (itr.hasMoreTokens()) {
      word.set(itr.nextToken());
      // Count 1 every time a word is observed. Word is the key a 1 is the value
      kvWriter.write(word, one);
    }
  }
}
 
Example 5
Source File: CartesianProduct.java    From tez with Apache License 2.0 6 votes vote down vote up
@Override
public void run() throws Exception {
  KeyValueWriter kvWriter = (KeyValueWriter) getOutputs().get(OUTPUT).getWriter();
  KeyValueReader kvReader1 = (KeyValueReader) getInputs().get(VERTEX1).getReader();
  KeyValueReader kvReader2 = (KeyValueReader) getInputs().get(VERTEX2).getReader();
  KeyValueReader kvReader3 = (KeyValueReader) getInputs().get(VERTEX3).getReader();
  Set<String> v2TokenSet = new HashSet<>();
  Set<String> v3TokenSet = new HashSet<>();

  while (kvReader2.next()) {
    v2TokenSet.add(kvReader2.getCurrentKey().toString());
  }
  while (kvReader3.next()) {
    v3TokenSet.add(kvReader3.getCurrentKey().toString());
  }

  while (kvReader1.next()) {
    String left = kvReader1.getCurrentKey().toString();
    if (v3TokenSet.contains(left)) {
      for (String right : v2TokenSet) {
        kvWriter.write(left, right);
      }
    }
  }
}
 
Example 6
Source File: IntersectExample.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
@Override
public void run() throws Exception {
  Preconditions.checkState(getInputs().size() == 1);
  Preconditions.checkState(getOutputs().size() == 1);
  LogicalInput input = getInputs().values().iterator().next();
  Reader rawReader = input.getReader();
  Preconditions.checkState(rawReader instanceof KeyValueReader);
  LogicalOutput output = getOutputs().values().iterator().next();

  KeyValueReader reader = (KeyValueReader) rawReader;
  KeyValueWriter writer = (KeyValueWriter) output.getWriter();

  while (reader.next()) {
    Object val = reader.getCurrentValue();
    writer.write(val, NullWritable.get());
  }
}
 
Example 7
Source File: TopK.java    From sequenceiq-samples with Apache License 2.0 6 votes vote down vote up
@Override
public void run() throws Exception {
    Preconditions.checkArgument(getInputs().size() == 1);
    Preconditions.checkArgument(getOutputs().size() == 1);
    KeyValueWriter kvWriter = (KeyValueWriter) getOutputs().get(OUTPUT).getWriter();
    UnorderedKVReader kvReader = (UnorderedKVReader) getInputs().get(SUM).getReader();
    while (kvReader.next()) {
        localTop.store(
                Integer.valueOf(kvReader.getCurrentKey().toString()),
                kvReader.getCurrentValue().toString()
        );
    }
    Map<Integer, List<String>> result = localTop.getTopKSorted();
    for (int top : result.keySet()) {
        kvWriter.write(new Text(join(result.get(top), ",")), new IntWritable(top));
    }
}
 
Example 8
Source File: CartesianProduct.java    From tez with Apache License 2.0 5 votes vote down vote up
@Override
public void run() throws Exception {
  Preconditions.checkArgument(getInputs().size() == 1);
  Preconditions.checkArgument(getOutputs().size() == 1);
  KeyValueReader kvReader = (KeyValueReader) getInputs().get(INPUT).getReader();
  KeyValueWriter kvWriter = (KeyValueWriter) getOutputs().get(VERTEX3).getWriter();
  while (kvReader.next()) {
    Object key = kvReader.getCurrentKey();
    Object value = kvReader.getCurrentValue();
    kvWriter.write(new Text((String)key), new IntWritable(1));
  }
}
 
Example 9
Source File: FilterByWordOutputProcessor.java    From tez with Apache License 2.0 5 votes vote down vote up
@Override
public void run() throws Exception {
  
  if (inputs.size() != 1) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single input");
  }

  if (outputs.size() != 1) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single output");
  }

  for (LogicalInput input : inputs.values()) {
    input.start();
  }
  for (LogicalOutput output : outputs.values()) {
    output.start();
  }

  LogicalInput li = inputs.values().iterator().next();
  if (! (li instanceof UnorderedKVInput)) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with ShuffledUnorderedKVInput");
  }

  LogicalOutput lo = outputs.values().iterator().next();
  if (! (lo instanceof MROutput)) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with MROutput");
  }

  UnorderedKVInput kvInput = (UnorderedKVInput) li;
  MROutput mrOutput = (MROutput) lo;

  KeyValueReader kvReader = kvInput.getReader();
  KeyValueWriter kvWriter = mrOutput.getWriter();
  while (kvReader.next()) {
    Object key = kvReader.getCurrentKey();
    Object value = kvReader.getCurrentValue();

    kvWriter.write(key, value);
  }
}
 
Example 10
Source File: OrderedWordCount.java    From tez with Apache License 2.0 5 votes vote down vote up
@Override
public void run() throws Exception {
  Preconditions.checkArgument(getInputs().size() == 1);
  Preconditions.checkArgument(getOutputs().size() == 1);
  KeyValueWriter kvWriter = (KeyValueWriter) getOutputs().get(OUTPUT).getWriter();
  KeyValuesReader kvReader = (KeyValuesReader) getInputs().get(SUMMATION).getReader();
  while (kvReader.next()) {
    Object sum = kvReader.getCurrentKey();
    for (Object word : kvReader.getCurrentValues()) {
      kvWriter.write(word, sum);
    }
  }
  // deriving from SimpleMRProcessor takes care of committing the output
}
 
Example 11
Source File: TestMROutput.java    From tez with Apache License 2.0 5 votes vote down vote up
@Override
public void run() throws Exception {
  KeyValueWriter writer = (KeyValueWriter) getOutputs().values().iterator().next().getWriter();
  for (int i=0; i<1000000; ++i) {
    writer.write("key", "value");
  }
}
 
Example 12
Source File: TestPipelinedShuffle.java    From tez with Apache License 2.0 5 votes vote down vote up
@Override public void run() throws Exception {
  Preconditions.checkArgument(getInputs().size() == 0);
  Preconditions.checkArgument(getOutputs().size() == 1);
  KeyValueWriter writer = (KeyValueWriter) getOutputs().get("reducer").getWriter();

  for (int i = 0; i < KEYS_PER_MAPPER; i++) {
    writer.write(new Text(RandomStringUtils.randomAlphanumeric(1000)),
        new Text(RandomStringUtils.randomAlphanumeric(1000)));
  }
}
 
Example 13
Source File: TopKDataGen.java    From sequenceiq-samples with Apache License 2.0 5 votes vote down vote up
@Override
public void run() throws Exception {
    KeyValueWriter streamOutputWriter = (KeyValueWriter) getOutputs().get(OUTPUT).getWriter();
    long sizeLarge = 0;
    while (sizeLarge < streamOutputFileSize) {
        String str = createRowString();
        Text text = new Text(str);
        int size = text.getLength();
        streamOutputWriter.write(text, NullWritable.get());
        sizeLarge += size;
    }
}
 
Example 14
Source File: TopK.java    From sequenceiq-samples with Apache License 2.0 5 votes vote down vote up
@Override
public void run() throws Exception {
    Preconditions.checkArgument(getInputs().size() == 1);
    Preconditions.checkArgument(getOutputs().size() == 1);
    // The KeyValues reader provides all values for a given key. The aggregation of values per key
    // is done by the LogicalInput. Since the key is the word and the values are its counts in
    // the different TokenProcessors, summing all values per key provides the sum for that word.
    KeyValueWriter kvWriter = (KeyValueWriter) getOutputs().get(WRITER).getWriter();
    KeyValuesReader kvReader = (KeyValuesReader) getInputs().get(TOKENIZER).getReader();
    while (kvReader.next()) {
        Text currentWord = (Text) kvReader.getCurrentKey();
        int sum = 0;
        for (Object val : kvReader.getCurrentValues()) {
            sum += ((IntWritable) val).get();
        }
        localTop.store(sum, currentWord.toString());
    }

    // write to the output only the local top results
    Map<Integer, List<String>> result = localTop.getTopK();
    for (int top : result.keySet()) {
        IntWritable topWritable = new IntWritable(top);
        for (String string : result.get(top)) {
            word.set(string);
            kvWriter.write(topWritable, word);
        }
    }
}
 
Example 15
Source File: FilterByWordInputProcessor.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Override
public void run(Map<String, LogicalInput> inputs,
    Map<String, LogicalOutput> outputs) throws Exception {
  
  if (inputs.size() != 1) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single input");
  }

  if (outputs.size() != 1) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single output");
  }
  
  for (LogicalInput input : inputs.values()) {
    input.start();
  }
  for (LogicalOutput output : outputs.values()) {
    output.start();
  }

  LogicalInput li = inputs.values().iterator().next();
  if (! (li instanceof MRInput)) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
  }

  LogicalOutput lo = outputs.values().iterator().next();
  if (! (lo instanceof OnFileUnorderedKVOutput)) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
  }

  
  
  
  MRInputLegacy mrInput = (MRInputLegacy) li;
  mrInput.init();
  OnFileUnorderedKVOutput kvOutput = (OnFileUnorderedKVOutput) lo;

  Configuration updatedConf = mrInput.getConfigUpdates();
  Text srcFile = new Text();
  srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
  if (updatedConf != null) {
    String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
    if (fileName != null) {
      LOG.info("Processing file: " + fileName);
      srcFile.set(fileName);
    }
  }

  KeyValueReader kvReader = mrInput.getReader();
  KeyValueWriter kvWriter = kvOutput.getWriter();

  while (kvReader.next()) {
    Object key = kvReader.getCurrentKey();
    Object val = kvReader.getCurrentValue();

    Text valText = (Text) val;
    String readVal = valText.toString();
    if (readVal.contains(filterWord)) {
      LongWritable lineNum = (LongWritable) key;
      TextLongPair outVal = new TextLongPair(srcFile, lineNum);
      kvWriter.write(valText, outVal);
    }
  }
}
 
Example 16
Source File: FilterByWordOutputProcessor.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Override
public void run(Map<String, LogicalInput> inputs,
    Map<String, LogicalOutput> outputs) throws Exception {
  
  if (inputs.size() != 1) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single input");
  }

  if (outputs.size() != 1) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single output");
  }

  for (LogicalInput input : inputs.values()) {
    input.start();
  }
  for (LogicalOutput output : outputs.values()) {
    output.start();
  }

  LogicalInput li = inputs.values().iterator().next();
  if (! (li instanceof ShuffledUnorderedKVInput)) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with ShuffledUnorderedKVInput");
  }

  LogicalOutput lo = outputs.values().iterator().next();
  if (! (lo instanceof MROutput)) {
    throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with MROutput");
  }

  ShuffledUnorderedKVInput kvInput = (ShuffledUnorderedKVInput) li;
  MROutput mrOutput = (MROutput) lo;

  KeyValueReader kvReader = kvInput.getReader();
  KeyValueWriter kvWriter = mrOutput.getWriter();
  while (kvReader.next()) {
    Object key = kvReader.getCurrentKey();
    Object value = kvReader.getCurrentValue();

    kvWriter.write(key, value);
  }
  if (processorContext.canCommit()) {
    mrOutput.commit();
  } else {
    mrOutput.abort();
  }
}
 
Example 17
Source File: FilterByWordInputProcessor.java    From tez with Apache License 2.0 4 votes vote down vote up
@Override
public void run(Map<String, LogicalInput> _inputs,
    Map<String, LogicalOutput> _outputs) throws Exception {
  this.inputs = _inputs;
  this.outputs = _outputs;
  this.progressHelper = new ProgressHelper(this.inputs, getContext(),this.getClass().getSimpleName());
  if (_inputs.size() != 1) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single input");
  }

  if (_outputs.size() != 1) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single output");
  }
  
  for (LogicalInput input : _inputs.values()) {
    input.start();
  }
  for (LogicalOutput output : _outputs.values()) {
    output.start();
  }

  LogicalInput li = _inputs.values().iterator().next();
  if (! (li instanceof MRInput)) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
  }

  LogicalOutput lo = _outputs.values().iterator().next();
  if (! (lo instanceof UnorderedKVOutput)) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
  }
  progressHelper.scheduleProgressTaskService(0, 100);
  MRInputLegacy mrInput = (MRInputLegacy) li;
  mrInput.init();
  UnorderedKVOutput kvOutput = (UnorderedKVOutput) lo;

  Configuration updatedConf = mrInput.getConfigUpdates();
  Text srcFile = new Text();
  srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
  if (updatedConf != null) {
    String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
    if (fileName != null) {
      LOG.info("Processing file: " + fileName);
      srcFile.set(fileName);
    }
  }

  KeyValueReader kvReader = mrInput.getReader();
  KeyValueWriter kvWriter = kvOutput.getWriter();

  while (kvReader.next()) {
    Object key = kvReader.getCurrentKey();
    Object val = kvReader.getCurrentValue();

    Text valText = (Text) val;
    String readVal = valText.toString();
    if (readVal.contains(filterWord)) {
      LongWritable lineNum = (LongWritable) key;
      TextLongPair outVal = new TextLongPair(srcFile, lineNum);
      kvWriter.write(valText, outVal);
    }
  }
}
 
Example 18
Source File: ReduceProcessor.java    From tez with Apache License 2.0 4 votes vote down vote up
void runOldReducer(JobConf job,
    final MRTaskReporter reporter,
    KeyValuesReader input,
    RawComparator comparator,
    Class keyClass,
    Class valueClass,
    final KeyValueWriter output) throws IOException, InterruptedException {

  Reducer reducer =
      ReflectionUtils.newInstance(job.getReducerClass(), job);

  // make output collector

  OutputCollector collector =
      new OutputCollector() {
    public void collect(Object key, Object value)
        throws IOException {
      output.write(key, value);
    }
  };

  // apply reduce function
  try {
    ReduceValuesIterator values =
        new ReduceValuesIterator(
            input, reporter, reduceInputValueCounter);

    values.informReduceProgress();
    while (values.more()) {
      reduceInputKeyCounter.increment(1);
      reducer.reduce(values.getKey(), values, collector, reporter);
      values.informReduceProgress();
    }

    // Set progress to 1.0f if there was no exception,
    reporter.setProgress(1.0f);
    
    //Clean up: repeated in catch block below
    reducer.close();
    //End of clean up.
  } catch (IOException ioe) {
    try {
      reducer.close();
    } catch (IOException ignored) {
    }

    throw ioe;
  }
}
 
Example 19
Source File: ReduceProcessor.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
void runOldReducer(JobConf job,
    final MRTaskReporter reporter,
    KeyValuesReader input,
    RawComparator comparator,
    Class keyClass,
    Class valueClass,
    final KeyValueWriter output) throws IOException, InterruptedException {

  Reducer reducer =
      ReflectionUtils.newInstance(job.getReducerClass(), job);

  // make output collector

  OutputCollector collector =
      new OutputCollector() {
    public void collect(Object key, Object value)
        throws IOException {
      output.write(key, value);
    }
  };

  // apply reduce function
  try {
    ReduceValuesIterator values =
        new ReduceValuesIterator(
            input, reporter, reduceInputValueCounter);

    values.informReduceProgress();
    while (values.more()) {
      reduceInputKeyCounter.increment(1);
      reducer.reduce(values.getKey(), values, collector, reporter);
      values.informReduceProgress();
    }

    // Set progress to 1.0f if there was no exception,
    reporter.setProgress(1.0f);
    
    //Clean up: repeated in catch block below
    reducer.close();
    //End of clean up.
  } catch (IOException ioe) {
    try {
      reducer.close();
    } catch (IOException ignored) {
    }

    throw ioe;
  }
}
 
Example 20
Source File: ReduceProcessor.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
void runNewReducer(JobConf job,
    final MRTaskReporter reporter,
    ShuffledMergedInputLegacy input,
    RawComparator comparator,
    Class keyClass,
    Class valueClass,
    final KeyValueWriter out
    ) throws IOException,InterruptedException,
    ClassNotFoundException {

  // make a task context so we can get the classes
  org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = getTaskAttemptContext();

  // make a reducer
  org.apache.hadoop.mapreduce.Reducer reducer =
      (org.apache.hadoop.mapreduce.Reducer)
      ReflectionUtils.newInstance(taskContext.getReducerClass(), job);

  // wrap value iterator to report progress.
  final TezRawKeyValueIterator rawIter = input.getIterator();
  TezRawKeyValueIterator rIter = new TezRawKeyValueIterator() {
    public void close() throws IOException {
      rawIter.close();
    }
    public DataInputBuffer getKey() throws IOException {
      return rawIter.getKey();
    }
    public Progress getProgress() {
      return rawIter.getProgress();
    }
    public DataInputBuffer getValue() throws IOException {
      return rawIter.getValue();
    }
    public boolean next() throws IOException {
      boolean ret = rawIter.next();
      reporter.setProgress(rawIter.getProgress().getProgress());
      return ret;
    }
  };

  org.apache.hadoop.mapreduce.RecordWriter trackedRW =
      new org.apache.hadoop.mapreduce.RecordWriter() {

    @Override
    public void write(Object key, Object value) throws IOException,
    InterruptedException {
      out.write(key, value);
    }

    @Override
    public void close(TaskAttemptContext context) throws IOException,
    InterruptedException {
    }
  };

  org.apache.hadoop.mapreduce.Reducer.Context reducerContext =
      createReduceContext(
          reducer, job, taskAttemptId,
          rIter, reduceInputKeyCounter,
          reduceInputValueCounter,
          trackedRW,
          committer,
          reporter, comparator, keyClass,
          valueClass);



  reducer.run(reducerContext);

  // Set progress to 1.0f if there was no exception,
  reporter.setProgress(1.0f);

  trackedRW.close(reducerContext);
}