org.apache.hadoop.io.WritableComparable Java Examples

The following examples show how to use org.apache.hadoop.io.WritableComparable. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RandomWriter.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Given an output filename, write a bunch of random records to it.
 */
public void map(WritableComparable key, 
                Writable value,
                Context context) throws IOException,InterruptedException {
  int itemCount = 0;
  while (numBytesToWrite > 0) {
    int keyLength = minKeySize + 
      (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0);
    randomKey.setSize(keyLength);
    randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength());
    int valueLength = minValueSize +
      (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0);
    randomValue.setSize(valueLength);
    randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength());
    context.write(randomKey, randomValue);
    numBytesToWrite -= keyLength + valueLength;
    context.getCounter(Counters.BYTES_WRITTEN).increment(keyLength + valueLength);
    context.getCounter(Counters.RECORDS_WRITTEN).increment(1);
    if (++itemCount % 200 == 0) {
      context.setStatus("wrote record " + itemCount + ". " + 
                         numBytesToWrite + " bytes left.");
    }
  }
  context.setStatus("done with " + itemCount + " records.");
}
 
Example #2
Source File: TSortComparator.java    From BigDataArchitect with Apache License 2.0 6 votes vote down vote up
@Override
public int compare(WritableComparable a, WritableComparable b) {
    TKey k1 = (TKey)a;
    TKey k2 = (TKey)b;
    //  年,月,温度,,且温度倒序:
    int c1 = Integer.compare(k1.getYear(), k2.getYear());
    if(c1 == 0 ){
        int c2 = Integer.compare(k1.getMonth(), k2.getMonth());
        if(c2 == 0){
            return  - Integer.compare(k1.getWd(),k2.getWd());
        }
        return c2;
    }

    return c1;


}
 
Example #3
Source File: StreamXmlRecordReader.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public synchronized boolean next(WritableComparable key, Writable value) throws IOException {
  numNext++;
  if (pos_ >= end_) {
    return false;
  }

  DataOutputBuffer buf = new DataOutputBuffer();
  if (!readUntilMatchBegin()) {
    return false;
  }
  if (!readUntilMatchEnd(buf)) {
    return false;
  }

  // There is only one elem..key/value splitting is not done here.
  byte[] record = new byte[buf.getLength()];
  System.arraycopy(buf.getData(), 0, record, 0, record.length);

  numRecStats(record, 0, record.length);

  ((Text) key).set(record);
  ((Text) value).set("");

  return true;
}
 
Example #4
Source File: RandomWriter.java    From tez with Apache License 2.0 6 votes vote down vote up
/**
 * Given an output filename, write a bunch of random records to it.
 */
public void map(WritableComparable key, 
                Writable value,
                Context context) throws IOException,InterruptedException {
  int itemCount = 0;
  while (numBytesToWrite > 0) {
    int keyLength = minKeySize + 
      (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0);
    randomKey.setSize(keyLength);
    randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength());
    int valueLength = minValueSize +
      (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0);
    randomValue.setSize(valueLength);
    randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength());
    context.write(randomKey, randomValue);
    numBytesToWrite -= keyLength + valueLength;
    context.getCounter(Counters.BYTES_WRITTEN).increment(keyLength + valueLength);
    context.getCounter(Counters.RECORDS_WRITTEN).increment(1);
    if (++itemCount % 200 == 0) {
      context.setStatus("wrote record " + itemCount + ". " + 
                         numBytesToWrite + " bytes left.");
    }
  }
  context.setStatus("done with " + itemCount + " records.");
}
 
Example #5
Source File: SortValidator.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  // 'key' == sortInput for sort-input; key == sortOutput for sort-output
  key = deduceInputFile(job);
  
  if (key == sortOutput) {
    partitioner = new HashPartitioner<WritableComparable, Writable>();
    
    // Figure the 'current' partition and no. of reduces of the 'sort'
    try {
      URI inputURI = new URI(job.get("map.input.file"));
      String inputFile = inputURI.getPath();
      partition = Integer.valueOf(
                                  inputFile.substring(inputFile.lastIndexOf("part")+5)
                                  ).intValue();
      noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1);
    } catch (Exception e) {
      System.err.println("Caught: " + e);
      System.exit(-1);
    }
  }
}
 
Example #6
Source File: SegmentReader.java    From anthelion with Apache License 2.0 6 votes vote down vote up
public RecordWriter<WritableComparable, Writable> getRecordWriter(
    final FileSystem fs, JobConf job,
    String name, final Progressable progress) throws IOException {

  final Path segmentDumpFile = new Path(FileOutputFormat.getOutputPath(job), name);

  // Get the old copy out of the way
  if (fs.exists(segmentDumpFile)) fs.delete(segmentDumpFile, true);

  final PrintStream printStream = new PrintStream(fs.create(segmentDumpFile));
  return new RecordWriter<WritableComparable, Writable>() {
    public synchronized void write(WritableComparable key, Writable value) throws IOException {
      printStream.println(value);
    }

    public synchronized void close(Reporter reporter) throws IOException {
      printStream.close();
    }
  };
}
 
Example #7
Source File: AggregationStorage.java    From Arabesque with Apache License 2.0 6 votes vote down vote up
public String toOutputString() {
    StringBuilder strBuilder = new StringBuilder();

    ArrayList<K> keys = new ArrayList<>(keyValueMap.keySet());

    if (WritableComparable.class.isAssignableFrom(keyClass)) {
        ArrayList<? extends WritableComparable> orderedKeys = (ArrayList<? extends WritableComparable>) keys;
        Collections.sort(orderedKeys);
    }

    for (K key : keys) {
        strBuilder.append(key.toString());
        strBuilder.append(": ");
        strBuilder.append(keyValueMap.get(key));
        strBuilder.append('\n');
    }

    return strBuilder.toString();
}
 
Example #8
Source File: PigOutputFormat.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext taskattemptcontext)
            throws IOException, InterruptedException {
    setupUdfEnvAndStores(taskattemptcontext);
    if(mapStores.size() + reduceStores.size() == 1) {
        // single store case
        POStore store;
        if(mapStores.size() == 1) {
            store = mapStores.get(0);
        } else {
            store = reduceStores.get(0);
        }
        StoreFuncInterface sFunc = store.getStoreFunc();
        // set output location
        PigOutputFormat.setLocation(taskattemptcontext, store);
        // The above call should have update the conf in the JobContext
        // to have the output location - now call checkOutputSpecs()
        RecordWriter writer = sFunc.getOutputFormat().getRecordWriter(
                taskattemptcontext);
        return new PigRecordWriter(writer, sFunc, Mode.SINGLE_STORE);
    } else {
       // multi store case - in this case, all writing is done through
       // MapReducePOStoreImpl - set up a dummy RecordWriter
       return new PigRecordWriter(null, null, Mode.MULTI_STORE);
    }
}
 
Example #9
Source File: DistCh.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/** Run a FileOperation */
public void map(Text key, FileOperation value,
    OutputCollector<WritableComparable<?>, Text> out, Reporter reporter
    ) throws IOException {
  try {
    value.run(jobconf);
    ++succeedcount;
    reporter.incrCounter(Counter.SUCCEED, 1);
  } catch (IOException e) {
    ++failcount;
    reporter.incrCounter(Counter.FAIL, 1);

    String s = "FAIL: " + value + ", " + StringUtils.stringifyException(e);
    out.collect(null, new Text(s));
    LOG.info(s);
  } finally {
    reporter.setStatus(getCountString());
  }
}
 
Example #10
Source File: TFileStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public RecordWriter<WritableComparable, Tuple> getRecordWriter(
                TaskAttemptContext job) throws IOException,
                InterruptedException {
    Configuration conf = job.getConfiguration();
    String codec = conf.get(PigConfiguration.PIG_TEMP_FILE_COMPRESSION_CODEC, "");
    if (!codec.equals("lzo") && !codec.equals("gz") && !codec.equals("gzip"))
        throw new IOException(
                        "Invalid temporary file compression codec [" + codec + "]. Expected compression codecs are gz(gzip) and lzo");
    if (codec.equals("gzip")) {
        codec = "gz";
    }
    mLog.info(codec + " compression codec in use");
    Path file = getDefaultWorkFile(job, "");
    return new TFileRecordWriter(file, codec, conf);
}
 
Example #11
Source File: TestMapRed.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void reduce(WritableComparable key, Iterator values,
                   OutputCollector output, Reporter reporter
                   ) throws IOException {
  if (first) {
    first = false;
    MapOutputFile mapOutputFile = new MapOutputFile(taskId.getJobID());
    mapOutputFile.setConf(conf);
    Path input = mapOutputFile.getInputFile(0, taskId);
    FileSystem fs = FileSystem.get(conf);
    assertTrue("reduce input exists " + input, fs.exists(input));
    SequenceFile.Reader rdr = 
      new SequenceFile.Reader(fs, input, conf);
    assertEquals("is reduce input compressed " + input, 
                 compressInput, 
                 rdr.isCompressed());
    rdr.close();          
  }
}
 
Example #12
Source File: TFileRecordWriter.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void write(WritableComparable wc, Tuple t) throws IOException,
                InterruptedException {
    DataOutputStream outputKey = writer.prepareAppendKey(KEY0.getLength());
    try {
        outputKey.write(KEY0.getBytes(), 0, KEY0.getLength());
    }
    finally {
        outputKey.close();
    }
    // we really only want to write the tuple (value) out here
    DataOutputStream outputValue = writer.prepareAppendValue(-1);

    try {
        sedes.writeDatum(outputValue, t);
    }
    finally {
        outputValue.close();
    }
}
 
Example #13
Source File: UtilsForTests.java    From big-c with Apache License 2.0 6 votes vote down vote up
/** The waiting function.  The reduce exits once it gets a signal. Here the
 * signal is the file existence. 
 */
public void reduce(WritableComparable key, Iterator<Writable> val, 
                   OutputCollector<WritableComparable, Writable> output,
                   Reporter reporter)
throws IOException {
  if (fs != null) {
    while (!fs.exists(signal)) {
      try {
        reporter.progress();
        synchronized (this) {
          this.wait(1000); // wait for 1 sec
        }
      } catch (InterruptedException ie) {
        System.out.println("Interrupted while the map was waiting for the"
                           + " signal.");
        break;
      }
    }
  } else {
    throw new IOException("Could not get the DFS!!");
  }
}
 
Example #14
Source File: RandomWriter.java    From hadoop-book with Apache License 2.0 6 votes vote down vote up
/**
 * Given an output filename, write a bunch of random records to it.
 */
public void map(WritableComparable key,
        Writable value,
        OutputCollector<BytesWritable, BytesWritable> output,
        Reporter reporter) throws IOException {
    int itemCount = 0;
    while (numBytesToWrite > 0) {
        int keyLength = minKeySize
                + (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0);
        randomKey.setSize(keyLength);
        randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength());
        int valueLength = minValueSize
                + (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0);
        randomValue.setSize(valueLength);
        randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength());
        output.collect(randomKey, randomValue);
        numBytesToWrite -= keyLength + valueLength;
        reporter.incrCounter(Counters.BYTES_WRITTEN, keyLength + valueLength);
        reporter.incrCounter(Counters.RECORDS_WRITTEN, 1);
        if (++itemCount % 200 == 0) {
            reporter.setStatus("wrote record " + itemCount + ". "
                    + numBytesToWrite + " bytes left.");
        }
    }
    reporter.setStatus("done with " + itemCount + " records.");
}
 
Example #15
Source File: UtilsForTests.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/** The waiting function.  The map exits once it gets a signal. Here the 
 * signal is the file existence. 
 */
public void map(WritableComparable key, Writable val, 
                OutputCollector<WritableComparable, Writable> output,
                Reporter reporter)
throws IOException {
  if (shouldWait(id)) {
    if (fs != null) {
      while (!fs.exists(getSignalFile(id))) {
        try {
          reporter.progress();
          synchronized (this) {
            this.wait(1000); // wait for 1 sec
          }
        } catch (InterruptedException ie) {
          System.out.println("Interrupted while the map was waiting for "
                             + " the signal.");
          break;
        }
      }
    } else {
      throw new IOException("Could not get the DFS!!");
    }
  }
}
 
Example #16
Source File: RandomWriter.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
/**
 * Given an output filename, write a bunch of random records to it.
 */
public void map(WritableComparable key, 
                Writable value,
                Context context) throws IOException,InterruptedException {
  int itemCount = 0;
  while (numBytesToWrite > 0) {
    int keyLength = minKeySize + 
      (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0);
    randomKey.setSize(keyLength);
    randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength());
    int valueLength = minValueSize +
      (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0);
    randomValue.setSize(valueLength);
    randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength());
    context.write(randomKey, randomValue);
    numBytesToWrite -= keyLength + valueLength;
    context.getCounter(Counters.BYTES_WRITTEN).increment(keyLength + valueLength);
    context.getCounter(Counters.RECORDS_WRITTEN).increment(1);
    if (++itemCount % 200 == 0) {
      context.setStatus("wrote record " + itemCount + ". " + 
                         numBytesToWrite + " bytes left.");
    }
  }
  context.setStatus("done with " + itemCount + " records.");
}
 
Example #17
Source File: SortValidator.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  // 'key' == sortInput for sort-input; key == sortOutput for sort-output
  key = deduceInputFile(job);
  
  if (key == sortOutput) {
    partitioner = new HashPartitioner<WritableComparable, Writable>();
    
    // Figure the 'current' partition and no. of reduces of the 'sort'
    try {
      URI inputURI = new URI(job.get(JobContext.MAP_INPUT_FILE));
      String inputFile = inputURI.getPath();
      // part file is of the form part-r-xxxxx
      partition = Integer.valueOf(inputFile.substring(
        inputFile.lastIndexOf("part") + 7)).intValue();
      noSortReducers = job.getInt(SORT_REDUCES, -1);
    } catch (Exception e) {
      System.err.println("Caught: " + e);
      System.exit(-1);
    }
  }
}
 
Example #18
Source File: MapFileReader.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * It a single record from the map file for the given index
 *
 * @param index Index, between 0 and numRecords()-1
 * @return Value from the MapFile
 * @throws IOException If an error occurs during reading
 */
public V getRecord(long index) throws IOException {
    //First: determine which reader to read from...
    int readerIdx = -1;
    for (int i = 0; i < recordIndexesEachReader.size(); i++) {
        Pair<Long, Long> p = recordIndexesEachReader.get(i);
        if (index >= p.getFirst() && index <= p.getSecond()) {
            readerIdx = i;
            break;
        }
    }
    if (readerIdx == -1) {
        throw new IllegalStateException("Index not found in any reader: " + index);
    }

    WritableComparable key = indexToKey.getKeyForIndex(index);
    Writable value = ReflectionUtils.newInstance(recordClass, null);

    V v = (V) readers[readerIdx].get(key, value);
    return v;
}
 
Example #19
Source File: SortValidator.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  // 'key' == sortInput for sort-input; key == sortOutput for sort-output
  key = deduceInputFile(job);
  
  if (key == sortOutput) {
    partitioner = new HashPartitioner<WritableComparable, Writable>();
    
    // Figure the 'current' partition and no. of reduces of the 'sort'
    try {
      URI inputURI = new URI(job.get("map.input.file"));
      String inputFile = inputURI.getPath();
      partition = Integer.valueOf(
                                  inputFile.substring(inputFile.lastIndexOf("part")+5)
                                  ).intValue();
      noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1);
    } catch (Exception e) {
      System.err.println("Caught: " + e);
      System.exit(-1);
    }
  }
}
 
Example #20
Source File: CompactWritablesDeserializer.java    From Cubert with Apache License 2.0 6 votes vote down vote up
private static final WritableComparable<?> createWritable(DataType type)
{
    switch (type)
    {
    case BOOLEAN:
        return new BooleanWritable();
    case BYTE:
        return new ByteWritable();
    case INT:
        return new IntWritable();
    case LONG:
        return new LongWritable();
    case FLOAT:
        return new FloatWritable();
    case DOUBLE:
        return new DoubleWritable();
    case STRING:
        return new Text();
    default:
        return null;
    }
}
 
Example #21
Source File: ExternalIdentityReducer.java    From big-c with Apache License 2.0 5 votes vote down vote up
public void reduce(WritableComparable key, Iterator<Writable> values,
                   OutputCollector<WritableComparable, Writable> output,
                   Reporter reporter)
  throws IOException {
  
  while (values.hasNext()) {
    output.collect(key, values.next());
  }
}
 
Example #22
Source File: DistCpV1.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Copies a file and validates the copy by checking the checksums.
 * If validation fails, retries (max number of tries is distcp.file.retries)
 * to copy the file.
 */
void copyWithRetries(FileStatus srcstat, Path relativedst,
                     OutputCollector<WritableComparable<?>, Text> out,
                     Reporter reporter) throws IOException {

  // max tries to copy when validation of copy fails
  final int maxRetries = job.getInt(FILE_RETRIES_LABEL, DEFAULT_FILE_RETRIES);
  // save update flag for later copies within the same map task
  final boolean saveUpdate = update;
  
  int retryCnt = 1;
  for (; retryCnt <= maxRetries; retryCnt++) {
    try {
      //copy the file and validate copy
      copy(srcstat, relativedst, out, reporter);
      break;// copy successful
    } catch (IOException e) {
      LOG.warn("Copy of " + srcstat.getPath() + " failed.", e);
      if (retryCnt < maxRetries) {// copy failed and need to retry
        LOG.info("Retrying copy of file " + srcstat.getPath());
        update = true; // set update flag for retries
      }
      else {// no more retries... Give up
        update = saveUpdate;
        throw new IOException("Copy of file failed even with " + retryCnt
                              + " tries.", e);
      }
    }
  }
}
 
Example #23
Source File: FailMapper.java    From big-c with Apache License 2.0 5 votes vote down vote up
public void map(WritableComparable key, Writable value,
    OutputCollector<WritableComparable, Writable> out, Reporter reporter)
    throws IOException {
  // NOTE- the next line is required for the TestDebugScript test to succeed
  System.err.println("failing map");
  throw new RuntimeException("failing map");
}
 
Example #24
Source File: TransformBaseRunner.java    From BigDataPlatform with GNU General Public License v3.0 5 votes vote down vote up
public void setupRunner(String jobName, Class<?> runnerClass,
    Class<? extends TableMapper<?, ?>> mapperClass, Class<? extends Reducer<?, ?, ?, ?>> reducerClass,
    Class<? extends WritableComparable<?>> mapOutputKeyClass,
    Class<? extends Writable> mapOutputValueClass,
    Class<? extends WritableComparable<?>> outputKeyClass,
    Class<? extends Writable> outputValueClass) {
  this.setupRunner(jobName, runnerClass, mapperClass, reducerClass, mapOutputKeyClass, mapOutputValueClass, outputKeyClass, outputValueClass, TransformerOutputFormat.class);
}
 
Example #25
Source File: UtilsForTests.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void map(WritableComparable key, Writable value,
    OutputCollector<WritableComparable, Writable> out, Reporter reporter)
    throws IOException {
  //NOTE- the next line is required for the TestDebugScript test to succeed
  System.err.println("failing map");
  throw new RuntimeException("failing map");
}
 
Example #26
Source File: ConfigUtils.java    From tez with Apache License 2.0 5 votes vote down vote up
public static <K> RawComparator<K> getIntermediateOutputKeyComparator(Configuration conf) {
  Class<? extends RawComparator> theClass = conf.getClass(
      TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS, null,
      RawComparator.class);
  if (theClass != null)
    return ReflectionUtils.newInstance(theClass, conf);
  return WritableComparator.get(getIntermediateOutputKeyClass(conf).asSubclass(
      WritableComparable.class));
}
 
Example #27
Source File: Display.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public TextRecordInputStream(FileStatus f) throws IOException {
  final Path fpath = f.getPath();
  final Configuration lconf = getConf();
  r = new SequenceFile.Reader(lconf, 
      SequenceFile.Reader.file(fpath));
  key = ReflectionUtils.newInstance(
      r.getKeyClass().asSubclass(WritableComparable.class), lconf);
  val = ReflectionUtils.newInstance(
      r.getValueClass().asSubclass(Writable.class), lconf);
  inbuf = new DataInputBuffer();
  outbuf = new DataOutputBuffer();
}
 
Example #28
Source File: PersonComparator.java    From hiped2 with Apache License 2.0 5 votes vote down vote up
@Override
public int compare(WritableComparable w1, WritableComparable w2) {

	Person p1 = (Person) w1;
	Person p2 = (Person) w2;

	int cmp = p1.getName().compareTo(p2.getName());
	if (cmp != 0) {
		return cmp;
	}

	return p1.getCommonFriends() == p2.getCommonFriends() ? 0 : (p1
			.getCommonFriends() > p2.getCommonFriends() ? -1 : 1);
}
 
Example #29
Source File: TestJobCounters.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void reduce(WritableComparable key, Iterator<Writable> val, 
                   OutputCollector<WritableComparable, Writable> output,
                   Reporter reporter)
throws IOException {
  assertNotNull("Reducer not configured!", loader);
  
  // load the memory
  loader.load();
  
  // work as identity reducer
  output.collect(key, key);
}
 
Example #30
Source File: MapFileOutputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable, V extends Writable>
Writable getEntry(MapFile.Reader[] readers,
                                Partitioner<K, V> partitioner,
                                K key,
                                V value) throws IOException {
  int part = partitioner.getPartition(key, value, readers.length);
  return readers[part].get(key, value);
}