org.apache.hadoop.io.NullWritable Java Examples

The following examples show how to use org.apache.hadoop.io.NullWritable. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CredentialsTestJob.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public Job createJob() 
throws IOException {
  Configuration conf = getConf();
  conf.setInt(MRJobConfig.NUM_MAPS, 1);
  Job job = Job.getInstance(conf, "test");
  job.setNumReduceTasks(1);
  job.setJarByClass(CredentialsTestJob.class);
  job.setNumReduceTasks(1);
  job.setMapperClass(CredentialsTestJob.CredentialsTestMapper.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(CredentialsTestJob.CredentialsTestReducer.class);
  job.setInputFormatClass(SleepJob.SleepInputFormat.class);
  job.setPartitionerClass(SleepJob.SleepJobPartitioner.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("test job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  return job;
}
 
Example #2
Source File: LoadJob.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
public void map(NullWritable ignored, GridmixRecord rec,
                Context context) throws IOException, InterruptedException {
  acc += ratio;
  while (acc >= 1.0 && !reduces.isEmpty()) {
    key.setSeed(r.nextLong());
    val.setSeed(r.nextLong());
    final int idx = r.nextInt(reduces.size());
    final RecordFactory f = reduces.get(idx);
    if (!f.next(key, val)) {
      reduces.remove(idx);
      continue;
    }
    context.write(key, val);
    acc -= 1.0;
    
    // match inline
    try {
      matcher.match();
    } catch (Exception e) {
      LOG.debug("Error in resource usage emulation! Message: ", e);
    }
  }
}
 
Example #3
Source File: IdentifierResolver.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Resolves a given identifier. This method has to be called before calling
 * any of the getters.
 */
public void resolve(String identifier) {
  if (identifier.equalsIgnoreCase(RAW_BYTES_ID)) {
    setInputWriterClass(RawBytesInputWriter.class);
    setOutputReaderClass(RawBytesOutputReader.class);
    setOutputKeyClass(BytesWritable.class);
    setOutputValueClass(BytesWritable.class);
  } else if (identifier.equalsIgnoreCase(TYPED_BYTES_ID)) {
    setInputWriterClass(TypedBytesInputWriter.class);
    setOutputReaderClass(TypedBytesOutputReader.class);
    setOutputKeyClass(TypedBytesWritable.class);
    setOutputValueClass(TypedBytesWritable.class);
  } else if (identifier.equalsIgnoreCase(KEY_ONLY_TEXT_ID)) {
    setInputWriterClass(KeyOnlyTextInputWriter.class);
    setOutputReaderClass(KeyOnlyTextOutputReader.class);
    setOutputKeyClass(Text.class);
    setOutputValueClass(NullWritable.class);
  } else { // assume TEXT_ID
    setInputWriterClass(TextInputWriter.class);
    setOutputReaderClass(TextOutputReader.class);
    setOutputKeyClass(Text.class);
    setOutputValueClass(Text.class);
  }
}
 
Example #4
Source File: TextOutputFormat.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public synchronized void write(K key, V value)
  throws IOException {

  boolean nullKey = key == null || key instanceof NullWritable;
  boolean nullValue = value == null || value instanceof NullWritable;
  if (nullKey && nullValue) {
    return;
  }
  if (!nullKey) {
    writeObject(key);
  }
  if (!(nullKey || nullValue)) {
    out.write(keyValueSeparator);
  }
  if (!nullValue) {
    writeObject(value);
  }
  out.write(newline);
}
 
Example #5
Source File: PostgreSQLCopyExportJob.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
@Override
protected void configureMapper(Job job, String tableName,
    String tableClassName) throws ClassNotFoundException, IOException {
  if (isHCatJob) {
    throw new IOException("Sqoop-HCatalog Integration is not supported.");
  }
  switch (getInputFileType()) {
    case AVRO_DATA_FILE:
      throw new IOException("Avro data file is not supported.");
    case SEQUENCE_FILE:
    case UNKNOWN:
    default:
      job.setMapperClass(getMapperClass());
  }

  // Concurrent writes of the same records would be problematic.
  ConfigurationHelper.setJobMapSpeculativeExecution(job, false);
  job.setMapOutputKeyClass(NullWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
}
 
Example #6
Source File: TestWrappedRRClassloader.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Tests the class loader set by 
 * {@link Configuration#setClassLoader(ClassLoader)}
 * is inherited by any {@link WrappedRecordReader}s created by
 * {@link CompositeRecordReader}
 */
public void testClassLoader() throws Exception {
  Configuration conf = new Configuration();
  Fake_ClassLoader classLoader = new Fake_ClassLoader();
  conf.setClassLoader(classLoader);
  assertTrue(conf.getClassLoader() instanceof Fake_ClassLoader);

  FileSystem fs = FileSystem.get(conf);
  Path testdir = new Path(System.getProperty("test.build.data", "/tmp"))
      .makeQualified(fs);

  Path base = new Path(testdir, "/empty");
  Path[] src = { new Path(base, "i0"), new Path("i1"), new Path("i2") };
  conf.set(CompositeInputFormat.JOIN_EXPR, 
    CompositeInputFormat.compose("outer", IF_ClassLoaderChecker.class, src));

  CompositeInputFormat<NullWritable> inputFormat = 
    new CompositeInputFormat<NullWritable>();
  // create dummy TaskAttemptID
  TaskAttemptID tid = new TaskAttemptID("jt", 1, TaskType.MAP, 0, 0);
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, tid.toString());
  inputFormat.createRecordReader
    (inputFormat.getSplits(Job.getInstance(conf)).get(0), 
     new TaskAttemptContextImpl(conf, tid));
}
 
Example #7
Source File: TestTotalOrderPartitioner.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void testTotalOrderBinarySearch() throws Exception {
  TotalOrderPartitioner<Text,NullWritable> partitioner =
    new TotalOrderPartitioner<Text,NullWritable>();
  Configuration conf = new Configuration();
  Path p = TestTotalOrderPartitioner.<Text>writePartitionFile(
      "totalorderbinarysearch", conf, splitStrings);
  conf.setBoolean(TotalOrderPartitioner.NATURAL_ORDER, false);
  conf.setClass(MRJobConfig.MAP_OUTPUT_KEY_CLASS, Text.class, Object.class);
  try {
    partitioner.setConf(conf);
    NullWritable nw = NullWritable.get();
    for (Check<Text> chk : testStrings) {
      assertEquals(chk.data.toString(), chk.part,
          partitioner.getPartition(chk.data, nw, splitStrings.length + 1));
    }
  } finally {
    p.getFileSystem(conf).delete(p, true);
  }
}
 
Example #8
Source File: AnalyserLogDataMapper.java    From BigDataArchitect with Apache License 2.0 6 votes vote down vote up
/**
 * 具体处理数据的方法
 * 
 * @param clientInfo
 * @param context
 * @param event
 * @throws InterruptedException
 * @throws IOException
 */
private void handleData(Map<String, String> clientInfo, EventEnum event, Context context) throws IOException, InterruptedException {
    String uuid = clientInfo.get(EventLogConstants.LOG_COLUMN_NAME_UUID);
    String memberId = clientInfo.get(EventLogConstants.LOG_COLUMN_NAME_MEMBER_ID);
    String serverTime = clientInfo.get(EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME);
    if (StringUtils.isNotBlank(serverTime)) {
        // 要求服务器时间不为空
        clientInfo.remove(EventLogConstants.LOG_COLUMN_NAME_USER_AGENT); // 浏览器信息去掉
        String rowkey = this.generateRowKey(uuid, memberId, event.alias, serverTime); // timestamp
                                                                                      // +
                                                                                      // (uuid+memberid+event).crc
        Put put = new Put(Bytes.toBytes(rowkey));
        for (Map.Entry<String, String> entry : clientInfo.entrySet()) {
            if (StringUtils.isNotBlank(entry.getKey()) && StringUtils.isNotBlank(entry.getValue())) {
                put.addColumn(family, Bytes.toBytes(entry.getKey()), Bytes.toBytes(entry.getValue()));
            }
        }
        context.write(NullWritable.get(), put);
        this.outputRecords++;
    } else {
        this.filterRecords++;
    }
}
 
Example #9
Source File: AnalyserLogDataMapper.java    From BigDataArchitect with Apache License 2.0 6 votes vote down vote up
/**
 * 具体处理数据的方法
 * 
 * @param clientInfo
 * @param context
 * @param event
 * @throws InterruptedException
 * @throws IOException
 */
private void handleData(Map<String, String> clientInfo, EventEnum event,
		Context context) throws IOException, InterruptedException {
	String uuid = clientInfo.get(EventLogConstants.LOG_COLUMN_NAME_UUID);
	String memberId = clientInfo.get(EventLogConstants.LOG_COLUMN_NAME_MEMBER_ID);
	String serverTime = clientInfo.get(EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME);
	if (StringUtils.isNotBlank(serverTime)) {
		// 要求服务器时间不为空
		clientInfo.remove(EventLogConstants.LOG_COLUMN_NAME_USER_AGENT); // 浏览器信息去掉
		String rowkey = this.generateRowKey(uuid, memberId, event.alias, serverTime); // timestamp
								// +
								// (uuid+memberid+event).crc
		Put put = new Put(Bytes.toBytes(rowkey));
		for (Map.Entry<String, String> entry : clientInfo.entrySet()) {
			if (StringUtils.isNotBlank(entry.getKey()) && StringUtils.isNotBlank(entry.getValue())) {
				put.addColumn(family, Bytes.toBytes(entry.getKey()), Bytes.toBytes(entry.getValue()));
			}
		}
		context.write(NullWritable.get(), put);
		this.outputRecords++;
	} else {
		this.filterRecords++;
	}
}
 
Example #10
Source File: LoadJob.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
  val.setSeed(r.nextLong());
  while (factory.next(null, val)) {
    context.write(NullWritable.get(), val);
    val.setSeed(r.nextLong());
    
    // match inline
    try {
      matcher.match();
    } catch (Exception e) {
      LOG.debug("Error in resource usage emulation! Message: ", e);
    }
  }
}
 
Example #11
Source File: TeraChecksum.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public int run(String[] args) throws Exception {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 2;
  }
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSum");
  job.setJarByClass(TeraChecksum.class);
  job.setMapperClass(ChecksumMapper.class);
  job.setReducerClass(ChecksumReducer.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(Unsigned16.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  job.setInputFormatClass(TeraInputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
Example #12
Source File: SleepJob.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public Job createJob(int numMapper, int numReducer, 
                     long mapSleepTime, int mapSleepCount, 
                     long reduceSleepTime, int reduceSleepCount) 
    throws IOException {
  Configuration conf = getConf();
  conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
  conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
  conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
  conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
  conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
  Job job = Job.getInstance(conf, "sleep");
  job.setNumReduceTasks(numReducer);
  job.setJarByClass(SleepJob.class);
  job.setMapperClass(SleepMapper.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(SleepReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(SleepInputFormat.class);
  job.setPartitionerClass(SleepJobPartitioner.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Sleep job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  return job;
}
 
Example #13
Source File: HdfsUtil.java    From ES-Fastloader with Apache License 2.0 6 votes vote down vote up
public static Job getHdfsJob(Configuration conf, TaskConfig taskConfig, IndexInfo indexInfo) throws Exception {
    Job job = Job.getInstance(conf, MAIN_CLASS);
    job.setJobName("DidiFastIndex_" + taskConfig.getEsTemplate());
    job.setJarByClass(FastIndex.class);
    job.setMapperClass(FastIndexMapper.class);
    job.setInputFormatClass(HCatInputFormat.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DefaultHCatRecord.class);
    HCatInputFormat.setInput(job, taskConfig.getHiveDB(), taskConfig.getHiveTable(), taskConfig.getFilterStr());

    job.setReducerClass(FastIndexReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(indexInfo.getReducerNum());
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(taskConfig.getHdfsMROutputPath()));

    return job;
}
 
Example #14
Source File: AvroRowsFunction.java    From tablasco with Apache License 2.0 6 votes vote down vote up
@Override
public List<Object> call(Tuple2<AvroWrapper, NullWritable> avroTuple)
{
    final GenericData.Record datum = (GenericData.Record) avroTuple._1().datum();
    List<Object> row = new ArrayList<>(this.headers.size());
    for (String header : this.headers)
    {
        Object value = datum.get(header);
        if (value instanceof CharSequence) // Avro Utf8 type
        {
            value = value.toString();
        }
        row.add(value);
    }
    return row;
}
 
Example #15
Source File: Step1.java    From MapReduce-Demo with MIT License 6 votes vote down vote up
public static boolean run(Configuration config, Map<String, String> paths)
		throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {
	String jobName = "step1";
	Job job = Job.getInstance(config, jobName);
	job.setJarByClass(Step1.class);
	job.setJar("export\\ItemCF.jar");
	job.setMapperClass(Step1_Mapper.class);
	job.setReducerClass(Step1_Reducer.class);
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(NullWritable.class);

	Path inPath = new Path(paths.get("Step1Input"));
	Path outpath = new Path(paths.get("Step1Output"));
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outpath);		
	FileSystem fs = FileSystem.get(config);
	if (fs.exists(outpath)) {
		fs.delete(outpath, true);
	}
	
	return job.waitForCompletion(true);
}
 
Example #16
Source File: MDSHiveLineReader.java    From multiple-dimension-spread with Apache License 2.0 6 votes vote down vote up
@Override
public boolean next( final NullWritable key, final ColumnAndIndex value ) throws IOException {
  if( currentSpread == null || currentIndex == currentIndexList.size() ){
    if( ! nextReader() ){
      updateCounter( reader.getReadStats() );
      isEnd = true;
      return false;
    }
  }

  spreadColumn.setSpread( currentSpread );
  value.column = spreadColumn;
  value.index =  currentIndexList.get( currentIndex );
  value.columnIndex = spreadCounter.get();
  currentIndex++;
  return true;
}
 
Example #17
Source File: MDSParserOutputFormat.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<NullWritable,IParser> getRecordWriter( final TaskAttemptContext taskAttemptContext ) throws IOException,InterruptedException{
  Configuration config = taskAttemptContext.getConfiguration(); 

  String extension = ".mds";
  Path file = getDefaultWorkFile( taskAttemptContext, extension );

  FileSystem fs = file.getFileSystem( config );
  long dfsBlockSize = Math.max( fs.getDefaultBlockSize( file ) , 1024 * 1024 * 256 );

  OutputStream out = fs.create( file , true , 4096 , fs.getDefaultReplication(file) , dfsBlockSize );

  return new MDSParserRecordWriter( out , new jp.co.yahoo.dataplatform.config.Configuration() );
}
 
Example #18
Source File: TSFHiveOutputFormat.java    From incubator-iotdb with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<NullWritable, HDFSTSRecord> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) {
  return new RecordWriter<NullWritable, HDFSTSRecord>() {
    @Override
    public void write(NullWritable key, HDFSTSRecord value) {
      throw new RuntimeException("Should not be called");
    }
    @Override
    public void close(Reporter reporter) {
    }
  };
}
 
Example #19
Source File: MapReduceTestUtil.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void initialize(InputSplit split, TaskAttemptContext context) 
    throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  keyclass = (Class<? extends K>) conf.getClass("test.fakeif.keyclass",
    NullWritable.class, WritableComparable.class);
  valclass = (Class<? extends V>) conf.getClass("test.fakeif.valclass",
    NullWritable.class, WritableComparable.class);
  
}
 
Example #20
Source File: ExportJobBase.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
protected void configureMapper(Job job, String tableName,
    String tableClassName) throws ClassNotFoundException, IOException {

  job.setMapperClass(getMapperClass());

  // Concurrent writes of the same records would be problematic.
  ConfigurationHelper.setJobMapSpeculativeExecution(job, false);

  job.setMapOutputKeyClass(SqoopRecord.class);
  job.setMapOutputValueClass(NullWritable.class);
}
 
Example #21
Source File: FlowSortReducer.java    From xxhadoop with Apache License 2.0 5 votes vote down vote up
@Override
protected void reduce(FlowBean key, Iterable<NullWritable> values,
		Reducer<FlowBean, NullWritable, FlowBean, NullWritable>.Context context)
		throws IOException, InterruptedException {

	//super.reduce(arg0, arg1, arg2);
	context.write(key, NullWritable.get());
}
 
Example #22
Source File: TestWrappedRecordReaderClassloader.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void configure(JobConf job) {
  keyclass = (Class<? extends K>) job.getClass("test.fakeif.keyclass",
      NullWritable.class, WritableComparable.class);
  valclass = (Class<? extends V>) job.getClass("test.fakeif.valclass",
      NullWritable.class, WritableComparable.class);
}
 
Example #23
Source File: TestGridMixClasses.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public DataInputBuffer getValue() throws IOException {
  ByteArrayOutputStream dt = new ByteArrayOutputStream();
  NullWritable key = NullWritable.get();
  key.write(new DataOutputStream(dt));
  DataInputBuffer result = new DataInputBuffer();
  byte[] b = dt.toByteArray();
  result.reset(b, 0, b.length);
  return result;
}
 
Example #24
Source File: TsFileRecordWriter.java    From incubator-iotdb with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized void write(NullWritable arg0, TSRecord tsRecord) throws IOException {
  try {
    tsFileWriter.write(tsRecord);
  } catch (WriteProcessException e) {
    e.printStackTrace();
  }
}
 
Example #25
Source File: TSFHiveInputFormatTest.java    From incubator-iotdb with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetRecordReader() {
  try {
    RecordReader<NullWritable, MapWritable> recordReader = inputFormat.getRecordReader(inputSplit, job, null);
    assertTrue(recordReader instanceof TSFHiveRecordReader);
  } catch (IOException e) {
    e.printStackTrace();
    fail();
  }
}
 
Example #26
Source File: TestMultipleInputs.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public void reduce(Text key, Iterable<Text> values, Context ctx)
    throws IOException, InterruptedException {
  count = 0;
  for (Text value : values)
    count++;
  ctx.write(NullWritable.get(), new Text(key.toString() + " " + count));
}
 
Example #27
Source File: MergeMapper.java    From MapReduce-Demo with MIT License 5 votes vote down vote up
@Override
protected void setup(Mapper<NullWritable, BytesWritable, Text, BytesWritable>.Context context)
		throws IOException, InterruptedException {
	InputSplit split = context.getInputSplit();
	Path path = ((FileSplit)split).getPath();//???
	fileNameKey = new Text(path.toString());
}
 
Example #28
Source File: OdpsImportJob.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
protected void configureMapper(Job job, String tableName,
                               String tableClassName) {
  job.setOutputKeyClass(SqoopRecord.class);
  job.setOutputValueClass(NullWritable.class);
  job.setMapperClass(getMapperClass());
}
 
Example #29
Source File: TSMRWriteExample.java    From incubator-iotdb with Apache License 2.0 5 votes vote down vote up
@Override
protected void map(NullWritable key, MapWritable value,
    Mapper<NullWritable, MapWritable, Text, MapWritable>.Context context)
    throws IOException, InterruptedException {

  Text deltaObjectId = (Text) value.get(new Text("device_id"));
  long timestamp = ((LongWritable) value.get(new Text("time_stamp"))).get();
  if (timestamp % 100000 == 0) {
    context.write(deltaObjectId, new MapWritable(value));
  }
}
 
Example #30
Source File: JavaObjectFileSource.java    From rheem with Apache License 2.0 5 votes vote down vote up
SequenceFileIterator(String path) throws IOException {
    final SequenceFile.Reader.Option fileOption = SequenceFile.Reader.file(new Path(path));
    this.sequenceFileReader = new SequenceFile.Reader(new Configuration(true), fileOption);
    Validate.isTrue(this.sequenceFileReader.getKeyClass().equals(NullWritable.class));
    Validate.isTrue(this.sequenceFileReader.getValueClass().equals(BytesWritable.class));
    this.tryAdvance();
}