Java Code Examples for org.apache.hadoop.io.Text#write()

The following examples show how to use org.apache.hadoop.io.Text#write() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MarkLogicInputSplit.java    From marklogic-contentpump with Apache License 2.0 6 votes vote down vote up
@Override
public void write(DataOutput out) throws IOException {
    out.writeLong(start);
    out.writeLong(length);
    Text forestIdText = new Text(forestId.toByteArray());
    forestIdText.write(out);
    if (hostName != null && hostName.length > 0) {
        Text.writeString(out, hostName[0]);
    }
    out.writeBoolean(isLastSplit);
    int replicaSize = (replicas != null) ? replicas.size() : 0;
    out.writeInt(replicaSize);
    for (int i=0; i < replicaSize; i++) {
        Text.writeString(out, replicas.get(i).getForest());
        Text.writeString(out, replicas.get(i).getHostName());
    } 
}
 
Example 2
Source File: TestIndexedSort.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public WritableSortable(int j) throws IOException {
  seed = r.nextLong();
  r.setSeed(seed);
  Text t = new Text();
  StringBuffer sb = new StringBuffer();
  indices = new int[j];
  offsets = new int[j];
  check = new String[j];
  DataOutputBuffer dob = new DataOutputBuffer();
  for (int i = 0; i < j; ++i) {
    indices[i] = i;
    offsets[i] = dob.getLength();
    genRandom(t, r.nextInt(15) + 1, sb);
    t.write(dob);
    check[i] = t.toString();
  }
  eob = dob.getLength();
  bytes = dob.getData();
  comparator = WritableComparator.get(Text.class);
}
 
Example 3
Source File: TestIndexedSort.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public WritableSortable(int j) throws IOException {
  seed = r.nextLong();
  r.setSeed(seed);
  Text t = new Text();
  StringBuffer sb = new StringBuffer();
  indices = new int[j];
  offsets = new int[j];
  check = new String[j];
  DataOutputBuffer dob = new DataOutputBuffer();
  for (int i = 0; i < j; ++i) {
    indices[i] = i;
    offsets[i] = dob.getLength();
    genRandom(t, r.nextInt(15) + 1, sb);
    t.write(dob);
    check[i] = t.toString();
  }
  eob = dob.getLength();
  bytes = dob.getData();
  comparator = WritableComparator.get(Text.class);
}
 
Example 4
Source File: TextSerializerTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Test
public void testDeserialize() throws IOException {
  // Use Hadoop's serializer, verify our deserializer can read the string back
  for (String textToSerialize : textsToSerialize) {
    ByteArrayOutputStream bOs = new ByteArrayOutputStream();
    DataOutputStream dataOutputStream = new DataOutputStream(bOs);

    Text hadoopText = new Text();
    hadoopText.set(textToSerialize);
    hadoopText.write(dataOutputStream);
    dataOutputStream.close();

    ByteArrayInputStream bIn = new ByteArrayInputStream(bOs.toByteArray());
    DataInputStream dataInputStream = new DataInputStream(bIn);

    String deserializedString = TextSerializer.readTextAsString(dataInputStream);

    Assert.assertEquals(deserializedString, textToSerialize);
  }
}
 
Example 5
Source File: TestIndexedSort.java    From big-c with Apache License 2.0 6 votes vote down vote up
public WritableSortable(int j) throws IOException {
  seed = r.nextLong();
  r.setSeed(seed);
  Text t = new Text();
  StringBuilder sb = new StringBuilder();
  indices = new int[j];
  offsets = new int[j];
  check = new String[j];
  DataOutputBuffer dob = new DataOutputBuffer();
  for (int i = 0; i < j; ++i) {
    indices[i] = i;
    offsets[i] = dob.getLength();
    genRandom(t, r.nextInt(15) + 1, sb);
    t.write(dob);
    check[i] = t.toString();
  }
  eob = dob.getLength();
  bytes = dob.getData();
  comparator = WritableComparator.get(Text.class);
}
 
Example 6
Source File: TestIndexedSort.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public WritableSortable(int j) throws IOException {
  seed = r.nextLong();
  r.setSeed(seed);
  Text t = new Text();
  StringBuilder sb = new StringBuilder();
  indices = new int[j];
  offsets = new int[j];
  check = new String[j];
  DataOutputBuffer dob = new DataOutputBuffer();
  for (int i = 0; i < j; ++i) {
    indices[i] = i;
    offsets[i] = dob.getLength();
    genRandom(t, r.nextInt(15) + 1, sb);
    t.write(dob);
    check[i] = t.toString();
  }
  eob = dob.getLength();
  bytes = dob.getData();
  comparator = WritableComparator.get(Text.class);
}
 
Example 7
Source File: TextSerializer.java    From pravega-samples with Apache License 2.0 5 votes vote down vote up
@Override
public ByteBuffer serialize(Text value) {
    ByteArrayOutputStream bout = new ByteArrayOutputStream();
    ObjectOutputStream oout;
    try {
        oout = new ObjectOutputStream(bout);
        value.write(oout);
        oout.close();
        bout.close();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return ByteBuffer.wrap(bout.toByteArray());
}
 
Example 8
Source File: RDFWritable.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
@Override
public void write(DataOutput out) throws IOException {
    if (graphUri == null) {
        out.writeByte(0);
    } else {
        out.writeByte(1);
        Text t = new Text(graphUri);
        t.write(out);
    }
    out.writeByte(type);
    if (value instanceof Text) {
        ((Text) value).write(out);
    } else if (value instanceof MarkLogicNode) {
        ((MarkLogicNode) value).write(out);
    } else if (value instanceof BytesWritable) {
        ((BytesWritable) value).write(out);
    }
    //serialize permissions
    if (permissions == null) {
        out.writeByte(0);
    } else {
        out.writeByte(permissions.length);
        for(int i=0; i<permissions.length; i++) {
            Text role = new Text(permissions[i].getRole());
            Text cap = new Text(permissions[i].getCapability().toString());
            role.write(out);
            cap.write(out);
        }
    }
}
 
Example 9
Source File: IEX2LevAMAZON.java    From Clusion with GNU General Public License v3.0 5 votes vote down vote up
@Override
public void write(DataOutput dataOutput) throws IOException {
	dataOutput.writeInt(this.values.size());

	for (Text text : values) {
		text.write(dataOutput);
	}
}
 
Example 10
Source File: CustomerFlowElement.java    From WIFIProbe with Apache License 2.0 5 votes vote down vote up
public void write(DataOutput dataOutput) throws IOException {
    Text text = new Text(wifiProb==null?"":wifiProb);
    text.write(dataOutput);

    IntWritable intWritable = new IntWritable();

    intWritable.set(inNoOutWifi);
    intWritable.write(dataOutput);
    intWritable.set(inNoOutStore);
    intWritable.write(dataOutput);

    intWritable.set(outNoInWifi);
    intWritable.write(dataOutput);
    intWritable.set(outNoInStore);
    intWritable.write(dataOutput);

    intWritable.set(inAndOutWifi);
    intWritable.write(dataOutput);
    intWritable.set(inAndOutStore);
    intWritable.write(dataOutput);

    intWritable.set(stayInWifi);
    intWritable.write(dataOutput);
    intWritable.set(stayInStore);
    intWritable.write(dataOutput);

    DoubleWritable doubleWritable = new DoubleWritable();
    doubleWritable.set(jumpRate);
    doubleWritable.write(dataOutput);
    doubleWritable.set(deepVisit);
    doubleWritable.write(dataOutput);
    doubleWritable.set(inStoreRate);
    doubleWritable.write(dataOutput);
}
 
Example 11
Source File: TaskState.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Override
public void write(DataOutput out) throws IOException {
  Text text = new Text();
  text.set(this.jobId);
  text.write(out);
  text.set(this.taskId);
  text.write(out);
  out.writeLong(this.startTime);
  out.writeLong(this.endTime);
  out.writeLong(this.duration);
  super.write(out);
}
 
Example 12
Source File: NewOldCustomElement.java    From WIFIProbe with Apache License 2.0 5 votes vote down vote up
public void write(DataOutput dataOutput) throws IOException {
    Text text = new Text(wifiProb==null?"":wifiProb);
    text.write(dataOutput);

    LongWritable longWritable = new LongWritable();
    longWritable.set(hour);
    longWritable.write(dataOutput);
    longWritable.set(newCustomer);
    longWritable.write(dataOutput);
    longWritable.set(oldCustomer);
    longWritable.write(dataOutput);

}
 
Example 13
Source File: TestMRCombiner.java    From tez with Apache License 2.0 5 votes vote down vote up
@Override
public DataInputBuffer getKey() throws IOException {
  DataInputBuffer key = new DataInputBuffer();
  Text text = new Text(keys[i]);
  DataOutputBuffer out = new DataOutputBuffer();
  text.write(out);
  key.reset(out.getData(), out.getLength());
  return key;
}
 
Example 14
Source File: CustomWritableWithCircle.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Override
public void write(DataOutput paramDataOutput)
    throws IOException
  {
    IntWritable localIntWritable = new IntWritable();

    localIntWritable.set(this.int1);
    localIntWritable.write(paramDataOutput);

    Text localText = new Text();
    localText.set(this.circle);
    localText.write(paramDataOutput);
  }
 
Example 15
Source File: TeraInputFormat.java    From pravega-samples with Apache License 2.0 4 votes vote down vote up
/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param job the job to sample
 * @param partFile where to write the output file to
 * @throws Throwable if something goes wrong
 */
public static void writePartitionFile(final JobContext job,
    Path partFile) throws Throwable  {
  long t1 = System.currentTimeMillis();
  Configuration conf = job.getConfiguration();
  //Instead of reading from hdfs, now the input is from Pravega stream
  final PravegaInputFormat inFormat = new PravegaInputFormat();
  final TextSampler sampler = new TextSampler();
  int partitions = job.getNumReduceTasks();
  long sampleSize =
      conf.getLong(TeraSortConfigKeys.SAMPLE_SIZE.key(),
          TeraSortConfigKeys.DEFAULT_SAMPLE_SIZE);
  final List<InputSplit> splits = inFormat.getSplits(job);
  long t2 = System.currentTimeMillis();
  System.out.println("Computing input splits took " + (t2 - t1) + "ms");
  int samples =
      Math.min(conf.getInt(TeraSortConfigKeys.NUM_PARTITIONS.key(),
                           TeraSortConfigKeys.DEFAULT_NUM_PARTITIONS),
          splits.size());
  System.out.println("Sampling " + samples + " splits of " + splits.size());
  final long recordsPerSample = sampleSize / samples;
  final int sampleStep = splits.size() / samples;
  Thread[] samplerReader = new Thread[samples];
  SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
  // take N samples from different parts of the input
  for(int i=0; i < samples; ++i) {
    final int idx = i;
    samplerReader[i] = 
      new Thread (threadGroup,"Sampler Reader " + idx) {
      {
        setDaemon(true);
      }
      public void run() {
        long records = 0;
        try {
          TaskAttemptContext context = new TaskAttemptContextImpl(
            job.getConfiguration(), new TaskAttemptID());
          RecordReader<Text, Text> reader =
            inFormat.createRecordReader(splits.get(sampleStep * idx),
            context);
          reader.initialize(splits.get(sampleStep * idx), context);
          while (reader.nextKeyValue()) {
            sampler.addKey(new Text(reader.getCurrentValue().toString().substring(0, 10)));
            records += 1;
            if (recordsPerSample <= records) {
              break;
            }
          }
        } catch (IOException ie){
          System.err.println("Got an exception while reading splits " +
              StringUtils.stringifyException(ie));
          throw new RuntimeException(ie);
        } catch (InterruptedException e) {
      	  
        }
      }
    };
    samplerReader[i].start();
  }
  FileSystem outFs = partFile.getFileSystem(conf);
  DataOutputStream writer = outFs.create(partFile, true, 64*1024, (short) 10, 
                                         outFs.getDefaultBlockSize(partFile));
  for (int i = 0; i < samples; i++) {
    try {
      samplerReader[i].join();
      if(threadGroup.getThrowable() != null){
        throw threadGroup.getThrowable();
      }
    } catch (InterruptedException e) {
    }
  }
  for(Text split : sampler.createPartitions(partitions)) {
    split.write(writer);
  }
  writer.close();
  long t3 = System.currentTimeMillis();
  System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}
 
Example 16
Source File: CustomWritable.java    From pxf with Apache License 2.0 4 votes vote down vote up
@Override
public void write(DataOutput out) throws IOException {
    // 0. Timestamp
    Text tms_text = new Text(tms);
    tms_text.write(out);

    // 1. num, int1, int2
    IntWritable intw = new IntWritable();

    for (int i = 0; i < num.length; i++) {
        intw.set(num[i]);
        intw.write(out);
    }

    intw.set(int1);
    intw.write(out);

    intw.set(int2);
    intw.write(out);

    // 2. st1
    Text txt = new Text();

    for (int i = 0; i < strings.length; i++) {
        txt.set(strings[i]);
        txt.write(out);
    }

    txt.set(st1);
    txt.write(out);

    // 3. doubles
    DoubleWritable dw = new DoubleWritable();
    for (int i = 0; i < dubs.length; i++) {
        dw.set(dubs[i]);
        dw.write(out);
    }

    dw.set(db);
    dw.write(out);

    // 4. floats
    FloatWritable fw = new FloatWritable();
    for (int i = 0; i < fts.length; i++) {
        fw.set(fts[i]);
        fw.write(out);
    }

    fw.set(ft);
    fw.write(out);

    // 5. longs
    LongWritable lw = new LongWritable();
    for (int i = 0; i < lngs.length; i++) {
        lw.set(lngs[i]);
        lw.write(out);
    }
    lw.set(lng);
    lw.write(out);

    // 6. booleans
    BooleanWritable bw = new BooleanWritable();
    for (int i = 0; i < bools.length; ++i) {
        bw.set(bools[i]);
        bw.write(out);
    }
    bw.set(bool);
    bw.write(out);

    // 7. shorts
    ShortWritable sw = new ShortWritable();
    for (int i = 0; i < shrts.length; ++i) {
        sw.set(shrts[i]);
        sw.write(out);
    }
    sw.set(shrt);
    sw.write(out);

    // 8. bytes
    // BytesWritable btsw = new BytesWritable(bts);
    // btsw.write(out);
    BytesWritable btsw = new BytesWritable();
    btsw.setCapacity(bts.length);
    btsw.setSize(bts.length);
    btsw.set(bts, 0, bts.length);
    btsw.write(out);
}
 
Example 17
Source File: TeraInputFormat.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param job the job to sample
 * @param partFile where to write the output file to
 * @throws Throwable if something goes wrong
 */
public static void writePartitionFile(final JobContext job, 
    Path partFile) throws Throwable  {
  long t1 = System.currentTimeMillis();
  Configuration conf = job.getConfiguration();
  final TeraInputFormat inFormat = new TeraInputFormat();
  final TextSampler sampler = new TextSampler();
  int partitions = job.getNumReduceTasks();
  long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
  final List<InputSplit> splits = inFormat.getSplits(job);
  long t2 = System.currentTimeMillis();
  System.out.println("Computing input splits took " + (t2 - t1) + "ms");
  int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
  System.out.println("Sampling " + samples + " splits of " + splits.size());
  final long recordsPerSample = sampleSize / samples;
  final int sampleStep = splits.size() / samples;
  Thread[] samplerReader = new Thread[samples];
  SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
  // take N samples from different parts of the input
  for(int i=0; i < samples; ++i) {
    final int idx = i;
    samplerReader[i] = 
      new Thread (threadGroup,"Sampler Reader " + idx) {
      {
        setDaemon(true);
      }
      public void run() {
        long records = 0;
        try {
          TaskAttemptContext context = new TaskAttemptContextImpl(
            job.getConfiguration(), new TaskAttemptID());
          RecordReader<Text, Text> reader = 
            inFormat.createRecordReader(splits.get(sampleStep * idx),
            context);
          reader.initialize(splits.get(sampleStep * idx), context);
          while (reader.nextKeyValue()) {
            sampler.addKey(new Text(reader.getCurrentKey()));
            records += 1;
            if (recordsPerSample <= records) {
              break;
            }
          }
        } catch (IOException ie){
          System.err.println("Got an exception while reading splits " +
              StringUtils.stringifyException(ie));
          throw new RuntimeException(ie);
        } catch (InterruptedException e) {
      	  
        }
      }
    };
    samplerReader[i].start();
  }
  FileSystem outFs = partFile.getFileSystem(conf);
  DataOutputStream writer = outFs.create(partFile, true, 64*1024, (short) 10, 
                                         outFs.getDefaultBlockSize(partFile));
  for (int i = 0; i < samples; i++) {
    try {
      samplerReader[i].join();
      if(threadGroup.getThrowable() != null){
        throw threadGroup.getThrowable();
      }
    } catch (InterruptedException e) {
    }
  }
  for(Text split : sampler.createPartitions(partitions)) {
    split.write(writer);
  }
  writer.close();
  long t3 = System.currentTimeMillis();
  System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}
 
Example 18
Source File: TeraInputFormat.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param job the job to sample
 * @param partFile where to write the output file to
 * @throws Throwable if something goes wrong
 */
public static void writePartitionFile(final JobContext job, 
    Path partFile) throws Throwable  {
  long t1 = System.currentTimeMillis();
  Configuration conf = job.getConfiguration();
  final TeraInputFormat inFormat = new TeraInputFormat();
  final TextSampler sampler = new TextSampler();
  int partitions = job.getNumReduceTasks();
  long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
  final List<InputSplit> splits = inFormat.getSplits(job);
  long t2 = System.currentTimeMillis();
  System.out.println("Computing input splits took " + (t2 - t1) + "ms");
  int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
  System.out.println("Sampling " + samples + " splits of " + splits.size());
  final long recordsPerSample = sampleSize / samples;
  final int sampleStep = splits.size() / samples;
  Thread[] samplerReader = new Thread[samples];
  SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
  // take N samples from different parts of the input
  for(int i=0; i < samples; ++i) {
    final int idx = i;
    samplerReader[i] = 
      new Thread (threadGroup,"Sampler Reader " + idx) {
      {
        setDaemon(true);
      }
      public void run() {
        long records = 0;
        try {
          TaskAttemptContext context = new TaskAttemptContextImpl(
            job.getConfiguration(), new TaskAttemptID());
          RecordReader<Text, Text> reader = 
            inFormat.createRecordReader(splits.get(sampleStep * idx),
            context);
          reader.initialize(splits.get(sampleStep * idx), context);
          while (reader.nextKeyValue()) {
            sampler.addKey(new Text(reader.getCurrentKey()));
            records += 1;
            if (recordsPerSample <= records) {
              break;
            }
          }
        } catch (IOException ie){
          System.err.println("Got an exception while reading splits " +
              StringUtils.stringifyException(ie));
          throw new RuntimeException(ie);
        } catch (InterruptedException e) {
      	  
        }
      }
    };
    samplerReader[i].start();
  }
  FileSystem outFs = partFile.getFileSystem(conf);
  DataOutputStream writer = outFs.create(partFile, true, 64*1024, (short) 10, 
                                         outFs.getDefaultBlockSize(partFile));
  for (int i = 0; i < samples; i++) {
    try {
      samplerReader[i].join();
      if(threadGroup.getThrowable() != null){
        throw threadGroup.getThrowable();
      }
    } catch (InterruptedException e) {
    }
  }
  for(Text split : sampler.createPartitions(partitions)) {
    split.write(writer);
  }
  writer.close();
  long t3 = System.currentTimeMillis();
  System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}
 
Example 19
Source File: TeraInputFormat.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param job the job to sample
 * @param partFile where to write the output file to
 * @throws Throwable if something goes wrong
 */
public static void writePartitionFile(final JobContext job, 
    Path partFile) throws Throwable  {
  long t1 = System.currentTimeMillis();
  Configuration conf = job.getConfiguration();
  final TeraInputFormat inFormat = new TeraInputFormat();
  final TextSampler sampler = new TextSampler();
  int partitions = job.getNumReduceTasks();
  long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
  final List<InputSplit> splits = inFormat.getSplits(job);
  long t2 = System.currentTimeMillis();
  System.out.println("Computing input splits took " + (t2 - t1) + "ms");
  int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
  System.out.println("Sampling " + samples + " splits of " + splits.size());
  final long recordsPerSample = sampleSize / samples;
  final int sampleStep = splits.size() / samples;
  Thread[] samplerReader = new Thread[samples];
  SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
  // take N samples from different parts of the input
  for(int i=0; i < samples; ++i) {
    final int idx = i;
    samplerReader[i] = 
      new Thread (threadGroup,"Sampler Reader " + idx) {
      {
        setDaemon(true);
      }
      public void run() {
        long records = 0;
        try {
          TaskAttemptContext context = new TaskAttemptContextImpl(
            job.getConfiguration(), new TaskAttemptID());
          RecordReader<Text, Text> reader = 
            inFormat.createRecordReader(splits.get(sampleStep * idx),
            context);
          reader.initialize(splits.get(sampleStep * idx), context);
          while (reader.nextKeyValue()) {
            sampler.addKey(new Text(reader.getCurrentKey()));
            records += 1;
            if (recordsPerSample <= records) {
              break;
            }
          }
        } catch (IOException ie){
          System.err.println("Got an exception while reading splits " +
              StringUtils.stringifyException(ie));
          throw new RuntimeException(ie);
        } catch (InterruptedException e) {
      	  
        }
      }
    };
    samplerReader[i].start();
  }
  FileSystem outFs = partFile.getFileSystem(conf);
  DataOutputStream writer = outFs.create(partFile, true, 64*1024, (short) 10, 
                                         outFs.getDefaultBlockSize(partFile));
  for (int i = 0; i < samples; i++) {
    try {
      samplerReader[i].join();
      if(threadGroup.getThrowable() != null){
        throw threadGroup.getThrowable();
      }
    } catch (InterruptedException e) {
    }
  }
  for(Text split : sampler.createPartitions(partitions)) {
    split.write(writer);
  }
  writer.close();
  long t3 = System.currentTimeMillis();
  System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}