org.apache.hadoop.io.NullWritable Java Exaples

Source File: CredentialsTestJob.java From hadoop with Apache License 2.0

6 votes

public Job createJob() 
throws IOException {
  Configuration conf = getConf();
  conf.setInt(MRJobConfig.NUM_MAPS, 1);
  Job job = Job.getInstance(conf, "test");
  job.setNumReduceTasks(1);
  job.setJarByClass(CredentialsTestJob.class);
  job.setNumReduceTasks(1);
  job.setMapperClass(CredentialsTestJob.CredentialsTestMapper.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(CredentialsTestJob.CredentialsTestReducer.class);
  job.setInputFormatClass(SleepJob.SleepInputFormat.class);
  job.setPartitionerClass(SleepJob.SleepJobPartitioner.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("test job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  return job;
}

Source File: LoadJob.java From hadoop with Apache License 2.0

6 votes

@Override
public void map(NullWritable ignored, GridmixRecord rec,
                Context context) throws IOException, InterruptedException {
  acc += ratio;
  while (acc >= 1.0 && !reduces.isEmpty()) {
    key.setSeed(r.nextLong());
    val.setSeed(r.nextLong());
    final int idx = r.nextInt(reduces.size());
    final RecordFactory f = reduces.get(idx);
    if (!f.next(key, val)) {
      reduces.remove(idx);
      continue;
    }
    context.write(key, val);
    acc -= 1.0;
    
    // match inline
    try {
      matcher.match();
    } catch (Exception e) {
      LOG.debug("Error in resource usage emulation! Message: ", e);
    }
  }
}

Source File: IdentifierResolver.java From hadoop with Apache License 2.0

6 votes

/**
 * Resolves a given identifier. This method has to be called before calling
 * any of the getters.
 */
public void resolve(String identifier) {
  if (identifier.equalsIgnoreCase(RAW_BYTES_ID)) {
    setInputWriterClass(RawBytesInputWriter.class);
    setOutputReaderClass(RawBytesOutputReader.class);
    setOutputKeyClass(BytesWritable.class);
    setOutputValueClass(BytesWritable.class);
  } else if (identifier.equalsIgnoreCase(TYPED_BYTES_ID)) {
    setInputWriterClass(TypedBytesInputWriter.class);
    setOutputReaderClass(TypedBytesOutputReader.class);
    setOutputKeyClass(TypedBytesWritable.class);
    setOutputValueClass(TypedBytesWritable.class);
  } else if (identifier.equalsIgnoreCase(KEY_ONLY_TEXT_ID)) {
    setInputWriterClass(KeyOnlyTextInputWriter.class);
    setOutputReaderClass(KeyOnlyTextOutputReader.class);
    setOutputKeyClass(Text.class);
    setOutputValueClass(NullWritable.class);
  } else { // assume TEXT_ID
    setInputWriterClass(TextInputWriter.class);
    setOutputReaderClass(TextOutputReader.class);
    setOutputKeyClass(Text.class);
    setOutputValueClass(Text.class);
  }
}

Source File: TextOutputFormat.java From hadoop with Apache License 2.0

6 votes

public synchronized void write(K key, V value)
  throws IOException {

  boolean nullKey = key == null || key instanceof NullWritable;
  boolean nullValue = value == null || value instanceof NullWritable;
  if (nullKey && nullValue) {
    return;
  }
  if (!nullKey) {
    writeObject(key);
  }
  if (!(nullKey || nullValue)) {
    out.write(keyValueSeparator);
  }
  if (!nullValue) {
    writeObject(value);
  }
  out.write(newline);
}

Source File: PostgreSQLCopyExportJob.java From aliyun-maxcompute-data-collectors with Apache License 2.0

6 votes

@Override
protected void configureMapper(Job job, String tableName,
    String tableClassName) throws ClassNotFoundException, IOException {
  if (isHCatJob) {
    throw new IOException("Sqoop-HCatalog Integration is not supported.");
  }
  switch (getInputFileType()) {
    case AVRO_DATA_FILE:
      throw new IOException("Avro data file is not supported.");
    case SEQUENCE_FILE:
    case UNKNOWN:
    default:
      job.setMapperClass(getMapperClass());
  }

  // Concurrent writes of the same records would be problematic.
  ConfigurationHelper.setJobMapSpeculativeExecution(job, false);
  job.setMapOutputKeyClass(NullWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
}

Source File: TestWrappedRRClassloader.java From hadoop with Apache License 2.0

6 votes

/**
 * Tests the class loader set by 
 * {@link Configuration#setClassLoader(ClassLoader)}
 * is inherited by any {@link WrappedRecordReader}s created by
 * {@link CompositeRecordReader}
 */
public void testClassLoader() throws Exception {
  Configuration conf = new Configuration();
  Fake_ClassLoader classLoader = new Fake_ClassLoader();
  conf.setClassLoader(classLoader);
  assertTrue(conf.getClassLoader() instanceof Fake_ClassLoader);

  FileSystem fs = FileSystem.get(conf);
  Path testdir = new Path(System.getProperty("test.build.data", "/tmp"))
      .makeQualified(fs);

  Path base = new Path(testdir, "/empty");
  Path[] src = { new Path(base, "i0"), new Path("i1"), new Path("i2") };
  conf.set(CompositeInputFormat.JOIN_EXPR, 
    CompositeInputFormat.compose("outer", IF_ClassLoaderChecker.class, src));

  CompositeInputFormat<NullWritable> inputFormat = 
    new CompositeInputFormat<NullWritable>();
  // create dummy TaskAttemptID
  TaskAttemptID tid = new TaskAttemptID("jt", 1, TaskType.MAP, 0, 0);
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, tid.toString());
  inputFormat.createRecordReader
    (inputFormat.getSplits(Job.getInstance(conf)).get(0), 
     new TaskAttemptContextImpl(conf, tid));
}

Source File: TestTotalOrderPartitioner.java From hadoop with Apache License 2.0

6 votes

public void testTotalOrderBinarySearch() throws Exception {
  TotalOrderPartitioner<Text,NullWritable> partitioner =
    new TotalOrderPartitioner<Text,NullWritable>();
  Configuration conf = new Configuration();
  Path p = TestTotalOrderPartitioner.<Text>writePartitionFile(
      "totalorderbinarysearch", conf, splitStrings);
  conf.setBoolean(TotalOrderPartitioner.NATURAL_ORDER, false);
  conf.setClass(MRJobConfig.MAP_OUTPUT_KEY_CLASS, Text.class, Object.class);
  try {
    partitioner.setConf(conf);
    NullWritable nw = NullWritable.get();
    for (Check<Text> chk : testStrings) {
      assertEquals(chk.data.toString(), chk.part,
          partitioner.getPartition(chk.data, nw, splitStrings.length + 1));
    }
  } finally {
    p.getFileSystem(conf).delete(p, true);
  }
}

Source File: AnalyserLogDataMapper.java From BigDataArchitect with Apache License 2.0

6 votes

/**
 * 具体处理数据的方法
 * 
 * @param clientInfo
 * @param context
 * @param event
 * @throws InterruptedException
 * @throws IOException
 */
private void handleData(Map<String, String> clientInfo, EventEnum event, Context context) throws IOException, InterruptedException {
    String uuid = clientInfo.get(EventLogConstants.LOG_COLUMN_NAME_UUID);
    String memberId = clientInfo.get(EventLogConstants.LOG_COLUMN_NAME_MEMBER_ID);
    String serverTime = clientInfo.get(EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME);
    if (StringUtils.isNotBlank(serverTime)) {
        // 要求服务器时间不为空
        clientInfo.remove(EventLogConstants.LOG_COLUMN_NAME_USER_AGENT); // 浏览器信息去掉
        String rowkey = this.generateRowKey(uuid, memberId, event.alias, serverTime); // timestamp
                                                                                      // +
                                                                                      // (uuid+memberid+event).crc
        Put put = new Put(Bytes.toBytes(rowkey));
        for (Map.Entry<String, String> entry : clientInfo.entrySet()) {
            if (StringUtils.isNotBlank(entry.getKey()) && StringUtils.isNotBlank(entry.getValue())) {
                put.addColumn(family, Bytes.toBytes(entry.getKey()), Bytes.toBytes(entry.getValue()));
            }
        }
        context.write(NullWritable.get(), put);
        this.outputRecords++;
    } else {
        this.filterRecords++;
    }
}

Source File: AnalyserLogDataMapper.java From BigDataArchitect with Apache License 2.0

6 votes

/**
 * 具体处理数据的方法
 * 
 * @param clientInfo
 * @param context
 * @param event
 * @throws InterruptedException
 * @throws IOException
 */
private void handleData(Map<String, String> clientInfo, EventEnum event,
		Context context) throws IOException, InterruptedException {
	String uuid = clientInfo.get(EventLogConstants.LOG_COLUMN_NAME_UUID);
	String memberId = clientInfo.get(EventLogConstants.LOG_COLUMN_NAME_MEMBER_ID);
	String serverTime = clientInfo.get(EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME);
	if (StringUtils.isNotBlank(serverTime)) {
		// 要求服务器时间不为空
		clientInfo.remove(EventLogConstants.LOG_COLUMN_NAME_USER_AGENT); // 浏览器信息去掉
		String rowkey = this.generateRowKey(uuid, memberId, event.alias, serverTime); // timestamp
								// +
								// (uuid+memberid+event).crc
		Put put = new Put(Bytes.toBytes(rowkey));
		for (Map.Entry<String, String> entry : clientInfo.entrySet()) {
			if (StringUtils.isNotBlank(entry.getKey()) && StringUtils.isNotBlank(entry.getValue())) {
				put.addColumn(family, Bytes.toBytes(entry.getKey()), Bytes.toBytes(entry.getValue()));
			}
		}
		context.write(NullWritable.get(), put);
		this.outputRecords++;
	} else {
		this.filterRecords++;
	}
}

Source File: LoadJob.java From hadoop with Apache License 2.0

6 votes

@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
  val.setSeed(r.nextLong());
  while (factory.next(null, val)) {
    context.write(NullWritable.get(), val);
    val.setSeed(r.nextLong());
    
    // match inline
    try {
      matcher.match();
    } catch (Exception e) {
      LOG.debug("Error in resource usage emulation! Message: ", e);
    }
  }
}

Source File: TeraChecksum.java From hadoop with Apache License 2.0

6 votes

public int run(String[] args) throws Exception {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 2;
  }
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSum");
  job.setJarByClass(TeraChecksum.class);
  job.setMapperClass(ChecksumMapper.class);
  job.setReducerClass(ChecksumReducer.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(Unsigned16.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  job.setInputFormatClass(TeraInputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}

Source File: SleepJob.java From hadoop with Apache License 2.0

6 votes

public Job createJob(int numMapper, int numReducer, 
                     long mapSleepTime, int mapSleepCount, 
                     long reduceSleepTime, int reduceSleepCount) 
    throws IOException {
  Configuration conf = getConf();
  conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
  conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
  conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
  conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
  conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
  Job job = Job.getInstance(conf, "sleep");
  job.setNumReduceTasks(numReducer);
  job.setJarByClass(SleepJob.class);
  job.setMapperClass(SleepMapper.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(SleepReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(SleepInputFormat.class);
  job.setPartitionerClass(SleepJobPartitioner.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Sleep job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  return job;
}

Source File: HdfsUtil.java From ES-Fastloader with Apache License 2.0

6 votes

public static Job getHdfsJob(Configuration conf, TaskConfig taskConfig, IndexInfo indexInfo) throws Exception {
    Job job = Job.getInstance(conf, MAIN_CLASS);
    job.setJobName("DidiFastIndex_" + taskConfig.getEsTemplate());
    job.setJarByClass(FastIndex.class);
    job.setMapperClass(FastIndexMapper.class);
    job.setInputFormatClass(HCatInputFormat.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DefaultHCatRecord.class);
    HCatInputFormat.setInput(job, taskConfig.getHiveDB(), taskConfig.getHiveTable(), taskConfig.getFilterStr());

    job.setReducerClass(FastIndexReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(indexInfo.getReducerNum());
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(taskConfig.getHdfsMROutputPath()));

    return job;
}

Source File: AvroRowsFunction.java From tablasco with Apache License 2.0

6 votes

@Override
public List<Object> call(Tuple2<AvroWrapper, NullWritable> avroTuple)
{
    final GenericData.Record datum = (GenericData.Record) avroTuple._1().datum();
    List<Object> row = new ArrayList<>(this.headers.size());
    for (String header : this.headers)
    {
        Object value = datum.get(header);
        if (value instanceof CharSequence) // Avro Utf8 type
        {
            value = value.toString();
        }
        row.add(value);
    }
    return row;
}

Source File: Step1.java From MapReduce-Demo with MIT License

6 votes

public static boolean run(Configuration config, Map<String, String> paths)
		throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {
	String jobName = "step1";
	Job job = Job.getInstance(config, jobName);
	job.setJarByClass(Step1.class);
	job.setJar("export\\ItemCF.jar");
	job.setMapperClass(Step1_Mapper.class);
	job.setReducerClass(Step1_Reducer.class);
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(NullWritable.class);

	Path inPath = new Path(paths.get("Step1Input"));
	Path outpath = new Path(paths.get("Step1Output"));
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outpath);		
	FileSystem fs = FileSystem.get(config);
	if (fs.exists(outpath)) {
		fs.delete(outpath, true);
	}
	
	return job.waitForCompletion(true);
}

Source File: MDSHiveLineReader.java From multiple-dimension-spread with Apache License 2.0

6 votes

@Override
public boolean next( final NullWritable key, final ColumnAndIndex value ) throws IOException {
  if( currentSpread == null || currentIndex == currentIndexList.size() ){
    if( ! nextReader() ){
      updateCounter( reader.getReadStats() );
      isEnd = true;
      return false;
    }
  }

  spreadColumn.setSpread( currentSpread );
  value.column = spreadColumn;
  value.index =  currentIndexList.get( currentIndex );
  value.columnIndex = spreadCounter.get();
  currentIndex++;
  return true;
}

Source File: MDSParserOutputFormat.java From multiple-dimension-spread with Apache License 2.0

5 votes

@Override
public RecordWriter<NullWritable,IParser> getRecordWriter( final TaskAttemptContext taskAttemptContext ) throws IOException,InterruptedException{
  Configuration config = taskAttemptContext.getConfiguration(); 

  String extension = ".mds";
  Path file = getDefaultWorkFile( taskAttemptContext, extension );

  FileSystem fs = file.getFileSystem( config );
  long dfsBlockSize = Math.max( fs.getDefaultBlockSize( file ) , 1024 * 1024 * 256 );

  OutputStream out = fs.create( file , true , 4096 , fs.getDefaultReplication(file) , dfsBlockSize );

  return new MDSParserRecordWriter( out , new jp.co.yahoo.dataplatform.config.Configuration() );
}

Source File: TSFHiveOutputFormat.java From incubator-iotdb with Apache License 2.0

5 votes

@Override
public RecordWriter<NullWritable, HDFSTSRecord> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) {
  return new RecordWriter<NullWritable, HDFSTSRecord>() {
    @Override
    public void write(NullWritable key, HDFSTSRecord value) {
      throw new RuntimeException("Should not be called");
    }
    @Override
    public void close(Reporter reporter) {
    }
  };
}

Source File: MapReduceTestUtil.java From hadoop with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void initialize(InputSplit split, TaskAttemptContext context) 
    throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  keyclass = (Class<? extends K>) conf.getClass("test.fakeif.keyclass",
    NullWritable.class, WritableComparable.class);
  valclass = (Class<? extends V>) conf.getClass("test.fakeif.valclass",
    NullWritable.class, WritableComparable.class);
  
}

Source File: ExportJobBase.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

@Override
protected void configureMapper(Job job, String tableName,
    String tableClassName) throws ClassNotFoundException, IOException {

  job.setMapperClass(getMapperClass());

  // Concurrent writes of the same records would be problematic.
  ConfigurationHelper.setJobMapSpeculativeExecution(job, false);

  job.setMapOutputKeyClass(SqoopRecord.class);
  job.setMapOutputValueClass(NullWritable.class);
}

Source File: FlowSortReducer.java From xxhadoop with Apache License 2.0

5 votes

@Override
protected void reduce(FlowBean key, Iterable<NullWritable> values,
		Reducer<FlowBean, NullWritable, FlowBean, NullWritable>.Context context)
		throws IOException, InterruptedException {

	//super.reduce(arg0, arg1, arg2);
	context.write(key, NullWritable.get());
}

Source File: TestWrappedRecordReaderClassloader.java From hadoop with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void configure(JobConf job) {
  keyclass = (Class<? extends K>) job.getClass("test.fakeif.keyclass",
      NullWritable.class, WritableComparable.class);
  valclass = (Class<? extends V>) job.getClass("test.fakeif.valclass",
      NullWritable.class, WritableComparable.class);
}

Source File: TestGridMixClasses.java From hadoop with Apache License 2.0

5 votes

@Override
public DataInputBuffer getValue() throws IOException {
  ByteArrayOutputStream dt = new ByteArrayOutputStream();
  NullWritable key = NullWritable.get();
  key.write(new DataOutputStream(dt));
  DataInputBuffer result = new DataInputBuffer();
  byte[] b = dt.toByteArray();
  result.reset(b, 0, b.length);
  return result;
}

Source File: TsFileRecordWriter.java From incubator-iotdb with Apache License 2.0

5 votes

@Override
public synchronized void write(NullWritable arg0, TSRecord tsRecord) throws IOException {
  try {
    tsFileWriter.write(tsRecord);
  } catch (WriteProcessException e) {
    e.printStackTrace();
  }
}

Source File: TSFHiveInputFormatTest.java From incubator-iotdb with Apache License 2.0

5 votes

@Test
public void testGetRecordReader() {
  try {
    RecordReader<NullWritable, MapWritable> recordReader = inputFormat.getRecordReader(inputSplit, job, null);
    assertTrue(recordReader instanceof TSFHiveRecordReader);
  } catch (IOException e) {
    e.printStackTrace();
    fail();
  }
}

Source File: TestMultipleInputs.java From hadoop with Apache License 2.0

5 votes

@Override
public void reduce(Text key, Iterable<Text> values, Context ctx)
    throws IOException, InterruptedException {
  count = 0;
  for (Text value : values)
    count++;
  ctx.write(NullWritable.get(), new Text(key.toString() + " " + count));
}

Source File: MergeMapper.java From MapReduce-Demo with MIT License

5 votes

@Override
protected void setup(Mapper<NullWritable, BytesWritable, Text, BytesWritable>.Context context)
		throws IOException, InterruptedException {
	InputSplit split = context.getInputSplit();
	Path path = ((FileSplit)split).getPath();//???
	fileNameKey = new Text(path.toString());
}

Source File: OdpsImportJob.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

@Override
protected void configureMapper(Job job, String tableName,
                               String tableClassName) {
  job.setOutputKeyClass(SqoopRecord.class);
  job.setOutputValueClass(NullWritable.class);
  job.setMapperClass(getMapperClass());
}

Source File: TSMRWriteExample.java From incubator-iotdb with Apache License 2.0

5 votes

@Override
protected void map(NullWritable key, MapWritable value,
    Mapper<NullWritable, MapWritable, Text, MapWritable>.Context context)
    throws IOException, InterruptedException {

  Text deltaObjectId = (Text) value.get(new Text("device_id"));
  long timestamp = ((LongWritable) value.get(new Text("time_stamp"))).get();
  if (timestamp % 100000 == 0) {
    context.write(deltaObjectId, new MapWritable(value));
  }
}

Source File: JavaObjectFileSource.java From rheem with Apache License 2.0

5 votes

SequenceFileIterator(String path) throws IOException {
    final SequenceFile.Reader.Option fileOption = SequenceFile.Reader.file(new Path(path));
    this.sequenceFileReader = new SequenceFile.Reader(new Configuration(true), fileOption);
    Validate.isTrue(this.sequenceFileReader.getKeyClass().equals(NullWritable.class));
    Validate.isTrue(this.sequenceFileReader.getValueClass().equals(BytesWritable.class));
    this.tryAdvance();
}

org.apache.hadoop.io.NullWritable Java Examples