org.apache.hadoop.io.UTF8 Java Examples

The following examples show how to use org.apache.hadoop.io.UTF8. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestFileSystem.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public static void readTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(READ_DIR, true);

  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);


  FileInputFormat.setInputPaths(job, CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(ReadMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, READ_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
 
Example #2
Source File: MRBench.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Create the job configuration.
 */
private JobConf setupJob(int numMaps, int numReduces, String jarFile) {
  JobConf jobConf = new JobConf(getConf());
  jobConf.setJarByClass(MRBench.class);
  FileInputFormat.addInputPath(jobConf, INPUT_DIR);
  
  jobConf.setInputFormat(TextInputFormat.class);
  jobConf.setOutputFormat(TextOutputFormat.class);
  
  jobConf.setOutputValueClass(UTF8.class);
  
  jobConf.setMapOutputKeyClass(UTF8.class);
  jobConf.setMapOutputValueClass(UTF8.class);
  
  if (null != jarFile) {
    jobConf.setJar(jarFile);
  }
  jobConf.setMapperClass(Map.class);
  jobConf.setReducerClass(Reduce.class);
  
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numReduces);
  jobConf
      .setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
  return jobConf; 
}
 
Example #3
Source File: TestFileSystem.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public static void seekTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(READ_DIR, true);

  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);

  FileInputFormat.setInputPaths(job,CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(SeekMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, READ_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
 
Example #4
Source File: TestFileSystem.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public static void writeTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(DATA_DIR, true);
  fs.delete(WRITE_DIR, true);
  
  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);

  FileInputFormat.setInputPaths(job, CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(WriteMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, WRITE_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
 
Example #5
Source File: MRBench.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Create the job configuration.
 */
private JobConf setupJob(int numMaps, int numReduces, String jarFile) {
  JobConf jobConf = new JobConf(getConf());
  jobConf.setJarByClass(MRBench.class);
  FileInputFormat.addInputPath(jobConf, INPUT_DIR);
  
  jobConf.setInputFormat(TextInputFormat.class);
  jobConf.setOutputFormat(TextOutputFormat.class);
  
  jobConf.setOutputValueClass(UTF8.class);
  
  jobConf.setMapOutputKeyClass(UTF8.class);
  jobConf.setMapOutputValueClass(UTF8.class);
  
  if (null != jarFile) {
    jobConf.setJar(jarFile);
  }
  jobConf.setMapperClass(Map.class);
  jobConf.setReducerClass(Reduce.class);
  
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numReduces);
  jobConf
      .setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
  return jobConf; 
}
 
Example #6
Source File: TestFileSystem.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public static void writeTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(DATA_DIR, true);
  fs.delete(WRITE_DIR, true);
  
  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);

  FileInputFormat.setInputPaths(job, CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(WriteMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, WRITE_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
 
Example #7
Source File: MRBench.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
/**
 * Create the job configuration.
 */
private static JobConf setupJob(int numMaps, int numReduces, String jarFile) {
  JobConf jobConf = new JobConf(MRBench.class);
  FileInputFormat.addInputPath(jobConf, INPUT_DIR);
  
  jobConf.setInputFormat(TextInputFormat.class);
  jobConf.setOutputFormat(TextOutputFormat.class);
  
  jobConf.setOutputValueClass(UTF8.class);
  
  jobConf.setMapOutputKeyClass(UTF8.class);
  jobConf.setMapOutputValueClass(UTF8.class);
  
  if (null != jarFile) {
    jobConf.setJar(jarFile);
  }
  jobConf.setMapperClass(Map.class);
  jobConf.setReducerClass(Reduce.class);
  
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numReduces);
  
  return jobConf; 
}
 
Example #8
Source File: KafkaKey.java    From HiveKa with Apache License 2.0 6 votes vote down vote up
@Override
public void readFields(DataInput in) throws IOException {
  this.leaderId = UTF8.readString(in);
  this.partition = in.readInt();
  this.beginOffset = in.readLong();
  this.offset = in.readLong();
  this.checksum = in.readLong();
  this.topic = in.readUTF();
  this.time = in.readLong();
  this.server = in.readUTF(); // left for legacy
  this.service = in.readUTF(); // left for legacy
  this.partitionMap = new MapWritable();
  try {
    this.partitionMap.readFields(in);
  } catch (IOException e) {
    this.setServer(this.server);
    this.setService(this.service);
  }
}
 
Example #9
Source File: MRBench.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * Create the job configuration.
 */
private JobConf setupJob(int numMaps, int numReduces, String jarFile) {
  JobConf jobConf = new JobConf(getConf());
  jobConf.setJarByClass(MRBench.class);
  FileInputFormat.addInputPath(jobConf, INPUT_DIR);
  
  jobConf.setInputFormat(TextInputFormat.class);
  jobConf.setOutputFormat(TextOutputFormat.class);
  
  jobConf.setOutputValueClass(UTF8.class);
  
  jobConf.setMapOutputKeyClass(UTF8.class);
  jobConf.setMapOutputValueClass(UTF8.class);
  
  if (null != jarFile) {
    jobConf.setJar(jarFile);
  }
  jobConf.setMapperClass(Map.class);
  jobConf.setReducerClass(Reduce.class);
  
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numReduces);
  
  return jobConf; 
}
 
Example #10
Source File: TestFileSystem.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public static void readTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(READ_DIR, true);

  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);


  FileInputFormat.setInputPaths(job, CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(ReadMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, READ_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
 
Example #11
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testIgnoreStatsWithSignedSortOrder() {
  ParquetMetadataConverter converter = new ParquetMetadataConverter();
  BinaryStatistics stats = new BinaryStatistics();
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("z"));
  stats.incrementNumNulls();

  PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY)
      .as(OriginalType.UTF8).named("b");
  Statistics convertedStats = converter.fromParquetStatistics(
      Version.FULL_VERSION,
      StatsHelper.V1.toParquetStatistics(stats),
      binaryType);

  Assert.assertFalse("Stats should not include min/max: " + convertedStats, convertedStats.hasNonNullValue());
  Assert.assertTrue("Stats should have null count: " + convertedStats, convertedStats.isNumNullsSet());
  Assert.assertEquals("Stats should have 3 nulls: " + convertedStats, 3L, convertedStats.getNumNulls());
}
 
Example #12
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper helper) {
  ParquetMetadataConverter converter = new ParquetMetadataConverter();
  BinaryStatistics stats = new BinaryStatistics();
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();

  PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b");
  Statistics convertedStats = converter.fromParquetStatistics(
      Version.FULL_VERSION,
      ParquetMetadataConverter.toParquetStatistics(stats),
      binaryType);

  Assert.assertFalse("Stats should not be empty: " + convertedStats, convertedStats.isEmpty());
  Assert.assertArrayEquals("min == max: " + convertedStats, convertedStats.getMaxBytes(), convertedStats.getMinBytes());
}
 
Example #13
Source File: IOMapperBase.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
/**
 * Map file name and offset into statistical data.
 * <p>
 * The map task is to get the 
 * <tt>key</tt>, which contains the file name, and the 
 * <tt>value</tt>, which is the offset within the file.
 * 
 * The parameters are passed to the abstract method 
 * {@link #doIO(Reporter,String,long)}, which performs the io operation, 
 * usually read or write data, and then 
 * {@link #collectStats(OutputCollector,String,long,Object)} 
 * is called to prepare stat data for a subsequent reducer.
 */
public void map(UTF8 key, 
                LongWritable value,
                OutputCollector<UTF8, UTF8> output, 
                Reporter reporter) throws IOException {
  String name = key.toString();
  long longValue = value.get();
  
  reporter.setStatus("starting " + name + " ::host = " + hostName);
  
  long tStart = System.currentTimeMillis();
  Object statValue = doIO(reporter, name, longValue);
  long tEnd = System.currentTimeMillis();
  long execTime = tEnd - tStart;
  collectStats(output, name, execTime, statValue);
  
  reporter.setStatus("finished " + name + " ::host = " + hostName);
}
 
Example #14
Source File: TestFileSystem.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public static void seekTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(READ_DIR, true);

  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);

  FileInputFormat.setInputPaths(job,CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(SeekMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, READ_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
 
Example #15
Source File: NamespaceInfo.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void readFields(DataInput in) throws IOException {
  buildVersion = UTF8.readString(in);
  layoutVersion = in.readInt();
  namespaceID = in.readInt();
  cTime = in.readLong();
  distributedUpgradeVersion = in.readInt();
}
 
Example #16
Source File: TestFileSystem.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public static void createControlFile(FileSystem fs,
                                     long megaBytes, int numFiles,
                                     long seed) throws Exception {

  LOG.info("creating control file: "+megaBytes+" bytes, "+numFiles+" files");

  Path controlFile = new Path(CONTROL_DIR, "files");
  fs.delete(controlFile, true);
  Random random = new Random(seed);

  SequenceFile.Writer writer =
    SequenceFile.createWriter(fs, conf, controlFile, 
                              UTF8.class, LongWritable.class, CompressionType.NONE);

  long totalSize = 0;
  long maxSize = ((megaBytes / numFiles) * 2) + 1;
  try {
    while (totalSize < megaBytes) {
      UTF8 name = new UTF8(Long.toString(random.nextLong()));

      long size = random.nextLong();
      if (size < 0)
        size = -size;
      size = size % maxSize;

      //LOG.info(" adding: name="+name+" size="+size);

      writer.append(name, new LongWritable(size));

      totalSize += size;
    }
  } finally {
    writer.close();
  }
  LOG.info("created control file for: "+totalSize+" bytes");
}
 
Example #17
Source File: AccumulatingReducer.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void reduce(UTF8 key, 
                   Iterator<UTF8> values,
                   OutputCollector<UTF8, UTF8> output, 
                   Reporter reporter
                   ) throws IOException {
  String field = key.toString();

  reporter.setStatus("starting " + field + " ::host = " + hostName);

  // concatenate strings
  if (field.startsWith("s:")) {
    String sSum = "";
    while (values.hasNext())
      sSum += values.next().toString() + ";";
    output.collect(key, new UTF8(sSum));
    reporter.setStatus("finished " + field + " ::host = " + hostName);
    return;
  }
  // sum long values
  if (field.startsWith("f:")) {
    float fSum = 0;
    while (values.hasNext())
      fSum += Float.parseFloat(values.next().toString());
    output.collect(key, new UTF8(String.valueOf(fSum)));
    reporter.setStatus("finished " + field + " ::host = " + hostName);
    return;
  }
  // sum long values
  if (field.startsWith("l:")) {
    long lSum = 0;
    while (values.hasNext()) {
      lSum += Long.parseLong(values.next().toString());
    }
    output.collect(key, new UTF8(String.valueOf(lSum)));
  }
  reporter.setStatus("finished " + field + " ::host = " + hostName);
}
 
Example #18
Source File: NamespaceInfo.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void write(DataOutput out) throws IOException {
  UTF8.writeString(out, getBuildVersion());
  out.writeInt(getLayoutVersion());
  out.writeInt(getNamespaceID());
  out.writeLong(getCTime());
  out.writeInt(getDistributedUpgradeVersion());
}
 
Example #19
Source File: DatanodeDescriptor.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/** Serialization for FSEditLog */
void readFieldsFromFSEditLog(DataInput in) throws IOException {
  this.name = UTF8.readString(in);
  this.storageID = UTF8.readString(in);
  this.infoPort = in.readShort() & 0x0000ffff;

  this.capacity = in.readLong();
  this.dfsUsed = in.readLong();
  this.remaining = in.readLong();
  this.lastUpdate = in.readLong();
  this.xceiverCount = in.readInt();
  this.location = Text.readString(in);
  this.hostName = Text.readString(in);
  setAdminState(WritableUtils.readEnum(in, AdminStates.class));
}
 
Example #20
Source File: FSImageSerialization.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Reading the path from the image and converting it to byte[][] directly
 * this saves us an array copy and conversions to and from String
 * @param in
 * @return the array each element of which is a byte[] representation 
 *            of a path component
 * @throws IOException
 */
@SuppressWarnings("deprecation")
public static byte[][] readPathComponents(DataInputStream in)
    throws IOException {
  UTF8 ustr = TL_DATA.get().U_STR;
  
  ustr.readFields(in);
  return DFSUtil.bytes2byteArray(ustr.getBytes(),
    ustr.getLength(), (byte) Path.SEPARATOR_CHAR);
}
 
Example #21
Source File: FSImageSerialization.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation")
public static byte[] readBytes(DataInputStream in) throws IOException {
  UTF8 ustr = TL_DATA.get().U_STR;
  ustr.readFields(in);
  int len = ustr.getLength();
  byte[] bytes = new byte[len];
  System.arraycopy(ustr.getBytes(), 0, bytes, 0, len);
  return bytes;
}
 
Example #22
Source File: TestFileSystem.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void map(UTF8 key, LongWritable value,
                OutputCollector<UTF8, LongWritable> collector,
                Reporter reporter)
  throws IOException {
  
  String name = key.toString();
  long size = value.get();
  long seed = Long.parseLong(name);

  random.setSeed(seed);
  reporter.setStatus("creating " + name);

  // write to temp file initially to permit parallel execution
  Path tempFile = new Path(DATA_DIR, name+suffix);
  OutputStream out = fs.create(tempFile);

  long written = 0;
  try {
    while (written < size) {
      if (fastCheck) {
        Arrays.fill(buffer, (byte)random.nextInt(Byte.MAX_VALUE));
      } else {
        random.nextBytes(buffer);
      }
      long remains = size - written;
      int length = (remains<=buffer.length) ? (int)remains : buffer.length;
      out.write(buffer, 0, length);
      written += length;
      reporter.setStatus("writing "+name+"@"+written+"/"+size);
    }
  } finally {
    out.close();
  }
  // rename to final location
  fs.rename(tempFile, new Path(DATA_DIR, name));

  collector.collect(new UTF8("bytes"), new LongWritable(written));

  reporter.setStatus("wrote " + name);
}
 
Example #23
Source File: MRBench.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void reduce(UTF8 key, Iterator<UTF8> values,
                   OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException 
{
  while(values.hasNext()) {
    output.collect(key, new UTF8(values.next().toString()));
  }
}
 
Example #24
Source File: MRBench.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void map(WritableComparable key, Text value,
                OutputCollector<UTF8, UTF8> output,
                Reporter reporter) throws IOException 
{
  String line = value.toString();
  output.collect(new UTF8(process(line)), new UTF8(""));		
}
 
Example #25
Source File: MRBench.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void reduce(UTF8 key, Iterator<UTF8> values,
                   OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException 
{
  while(values.hasNext()) {
    output.collect(key, new UTF8(values.next().toString()));
  }
}
 
Example #26
Source File: DatanodeID.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/** {@inheritDoc} */
public void readFields(DataInput in) throws IOException {
  name = UTF8.readString(in);
  storageID = UTF8.readString(in);
  // the infoPort read could be negative, if the port is a large number (more
  // than 15 bits in storage size (but less than 16 bits).
  // So chop off the first two bytes (and hence the signed bits) before 
  // setting the field.
  this.infoPort = in.readShort() & 0x0000ffff;
}
 
Example #27
Source File: DatanodeDescriptor.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/** Serialization for FSEditLog */
void readFieldsFromFSEditLog(DataInput in) throws IOException {
  this.name = UTF8.readString(in);
  this.storageID = UTF8.readString(in);
  this.infoPort = in.readShort() & 0x0000ffff;

  this.capacity = in.readLong();
  this.dfsUsed = in.readLong();
  this.remaining = in.readLong();
  this.lastUpdate = in.readLong();
  this.xceiverCount = in.readInt();
  this.location = Text.readString(in);
  this.hostName = Text.readString(in);
  setAdminState(WritableUtils.readEnum(in, AdminStates.class));
}
 
Example #28
Source File: NamespaceInfo.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void write(DataOutput out) throws IOException {
  UTF8.writeString(out, getBuildVersion());
  out.writeInt(getLayoutVersion());
  out.writeInt(getNamespaceID());
  out.writeLong(getCTime());
  out.writeInt(getDistributedUpgradeVersion());
}
 
Example #29
Source File: NamespaceInfo.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void readFields(DataInput in) throws IOException {
  buildVersion = UTF8.readString(in);
  layoutVersion = in.readInt();
  namespaceID = in.readInt();
  cTime = in.readLong();
  distributedUpgradeVersion = in.readInt();
}
 
Example #30
Source File: MRBench.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void reduce(UTF8 key, Iterator<UTF8> values,
                   OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException 
{
  while(values.hasNext()) {
    output.collect(key, new UTF8(values.next().toString()));
  }
}