org.apache.hadoop.mapreduce.RecordWriter Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.RecordWriter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExcelRowFileOutputFormat.java    From hadoopoffice with Apache License 2.0 6 votes vote down vote up
@Override
public RecordWriter<NullWritable, ArrayWritable> getRecordWriter(TaskAttemptContext context) throws IOException {
	// check if mimeType is set. If not assume new Excel format (.xlsx)
	Configuration conf=context.getConfiguration();
	String defaultConf=conf.get(HadoopOfficeWriteConfiguration.CONF_MIMETYPE,ExcelFileOutputFormat.DEFAULT_MIMETYPE);
	conf.set(HadoopOfficeWriteConfiguration.CONF_MIMETYPE,defaultConf);
	// add suffix	
	Path file = getDefaultWorkFile(context,ExcelFileOutputFormat.getSuffix(conf.get(HadoopOfficeWriteConfiguration.CONF_MIMETYPE)));


	 	try {
			return new ExcelRowRecordWriter<>(HadoopUtil.getDataOutputStream(conf,file,context,getCompressOutput(context),getOutputCompressorClass(context, ExcelFileOutputFormat.defaultCompressorClass)),file.getName(),conf);
		} catch (InvalidWriterConfigurationException | InvalidCellSpecificationException | FormatNotUnderstoodException
				| GeneralSecurityException | OfficeWriterException e) {
			LOG.error(e);
		}

	return null;
}
 
Example #2
Source File: TestFileOutputCommitter.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void writeOutput(RecordWriter theRecordWriter,
    TaskAttemptContext context) throws IOException, InterruptedException {
  NullWritable nullWritable = NullWritable.get();

  try {
    theRecordWriter.write(key1, val1);
    theRecordWriter.write(null, nullWritable);
    theRecordWriter.write(null, val1);
    theRecordWriter.write(nullWritable, val2);
    theRecordWriter.write(key2, nullWritable);
    theRecordWriter.write(key1, null);
    theRecordWriter.write(null, null);
    theRecordWriter.write(key2, val2);
  } finally {
    theRecordWriter.close(context);
  }
}
 
Example #3
Source File: MneMapreducePersonDataTest.java    From mnemonic with Apache License 2.0 6 votes vote down vote up
@Test(enabled = true)
public void testWritePersonData() throws Exception {
  NullWritable nada = NullWritable.get();
  MneDurableOutputSession<Person<Long>> sess =
      new MneDurableOutputSession<Person<Long>>(m_tacontext, null,
          MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
  MneDurableOutputValue<Person<Long>> mdvalue =
      new MneDurableOutputValue<Person<Long>>(sess);
  OutputFormat<NullWritable, MneDurableOutputValue<Person<Long>>> outputFormat =
      new MneOutputFormat<MneDurableOutputValue<Person<Long>>>();
  RecordWriter<NullWritable, MneDurableOutputValue<Person<Long>>> writer =
      outputFormat.getRecordWriter(m_tacontext);
  Person<Long> person = null;
  for (int i = 0; i < m_reccnt; ++i) {
    person = sess.newDurableObjectRecord();
    person.setAge((short) m_rand.nextInt(50));
    person.setName(String.format("Name: [%s]", Utils.genRandomString()), true);
    m_sumage += person.getAge();
    writer.write(nada, mdvalue.of(person));
  }
  writer.close(m_tacontext);
  sess.close();
}
 
Example #4
Source File: TestFileOutputCommitter.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void writeOutput(RecordWriter theRecordWriter,
    TaskAttemptContext context) throws IOException, InterruptedException {
  NullWritable nullWritable = NullWritable.get();

  try {
    theRecordWriter.write(key1, val1);
    theRecordWriter.write(null, nullWritable);
    theRecordWriter.write(null, val1);
    theRecordWriter.write(nullWritable, val2);
    theRecordWriter.write(key2, nullWritable);
    theRecordWriter.write(key1, null);
    theRecordWriter.write(null, null);
    theRecordWriter.write(key2, val2);
  } finally {
    theRecordWriter.close(context);
  }
}
 
Example #5
Source File: TestMRCJCFileOutputCommitter.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void writeOutput(RecordWriter theRecordWriter,
    TaskAttemptContext context) throws IOException, InterruptedException {
  NullWritable nullWritable = NullWritable.get();

  try {
    theRecordWriter.write(key1, val1);
    theRecordWriter.write(null, nullWritable);
    theRecordWriter.write(null, val1);
    theRecordWriter.write(nullWritable, val2);
    theRecordWriter.write(key2, nullWritable);
    theRecordWriter.write(key1, null);
    theRecordWriter.write(null, null);
    theRecordWriter.write(key2, val2);
  } finally {
    theRecordWriter.close(context);
  }
}
 
Example #6
Source File: Chain.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Add mapper that reads and writes from/to the queue
 */
@SuppressWarnings("unchecked")
void addMapper(ChainBlockingQueue<KeyValuePair<?, ?>> input,
    ChainBlockingQueue<KeyValuePair<?, ?>> output,
    TaskInputOutputContext context, int index) throws IOException,
    InterruptedException {
  Configuration conf = getConf(index);
  Class<?> keyClass = conf.getClass(MAPPER_INPUT_KEY_CLASS, Object.class);
  Class<?> valueClass = conf.getClass(MAPPER_INPUT_VALUE_CLASS, Object.class);
  Class<?> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class);
  Class<?> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS,
      Object.class);
  RecordReader rr = new ChainRecordReader(keyClass, valueClass, input, conf);
  RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output,
      conf);
  MapRunner runner = new MapRunner(mappers.get(index), createMapContext(rr,
      rw, context, getConf(index)), rr, rw);
  threads.add(runner);
}
 
Example #7
Source File: SafeFileOutputCommitterTest.java    From datawave with Apache License 2.0 6 votes vote down vote up
private void writeOutput(RecordWriter theRecordWriter, TaskAttemptContext context) throws IOException, InterruptedException {
    NullWritable nullWritable = NullWritable.get();
    
    try {
        theRecordWriter.write(key1, val1);
        theRecordWriter.write(null, nullWritable);
        theRecordWriter.write(null, val1);
        theRecordWriter.write(nullWritable, val2);
        theRecordWriter.write(key2, nullWritable);
        theRecordWriter.write(key1, null);
        theRecordWriter.write(null, null);
        theRecordWriter.write(key2, val2);
    } finally {
        theRecordWriter.close(context);
    }
}
 
Example #8
Source File: Chain.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Add mapper that reads and writes from/to the queue
 */
@SuppressWarnings("unchecked")
void addMapper(ChainBlockingQueue<KeyValuePair<?, ?>> input,
    ChainBlockingQueue<KeyValuePair<?, ?>> output,
    TaskInputOutputContext context, int index) throws IOException,
    InterruptedException {
  Configuration conf = getConf(index);
  Class<?> keyClass = conf.getClass(MAPPER_INPUT_KEY_CLASS, Object.class);
  Class<?> valueClass = conf.getClass(MAPPER_INPUT_VALUE_CLASS, Object.class);
  Class<?> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class);
  Class<?> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS,
      Object.class);
  RecordReader rr = new ChainRecordReader(keyClass, valueClass, input, conf);
  RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output,
      conf);
  MapRunner runner = new MapRunner(mappers.get(index), createMapContext(rr,
      rw, context, getConf(index)), rr, rw);
  threads.add(runner);
}
 
Example #9
Source File: TestMRCJCFileOutputCommitter.java    From big-c with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void writeOutput(RecordWriter theRecordWriter,
    TaskAttemptContext context) throws IOException, InterruptedException {
  NullWritable nullWritable = NullWritable.get();

  try {
    theRecordWriter.write(key1, val1);
    theRecordWriter.write(null, nullWritable);
    theRecordWriter.write(null, val1);
    theRecordWriter.write(nullWritable, val2);
    theRecordWriter.write(key2, nullWritable);
    theRecordWriter.write(key1, null);
    theRecordWriter.write(null, null);
    theRecordWriter.write(key2, val2);
  } finally {
    theRecordWriter.close(context);
  }
}
 
Example #10
Source File: SafeFileOutputCommitterTest.java    From datawave with Apache License 2.0 5 votes vote down vote up
private void testCommitterInternal(int version) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext);
    
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    
    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);
    
    // validate output
    validateContent(outDir);
    FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example #11
Source File: ChainReduceContextImpl.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public ChainReduceContextImpl(
    ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> base,
    RecordWriter<KEYOUT, VALUEOUT> output, Configuration conf) {
  this.base = base;
  this.rw = output;
  this.conf = conf;
}
 
Example #12
Source File: ExportOutputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
/** {@inheritDoc} */
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
    throws IOException {
  try {
    return new ExportRecordWriter(context);
  } catch (Exception e) {
    throw new IOException(e);
  }
}
 
Example #13
Source File: SqlServerExportBatchOutputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
/** {@inheritDoc} */
@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
    throws IOException {
  try {
    return new SqlServerExportBatchRecordWriter<K, V>(context);
  } catch (Exception e) {
    throw new IOException(e);
  }
}
 
Example #14
Source File: HDFSWriter.java    From ViraPipe with MIT License 5 votes vote down vote up
@Override
public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(TaskAttemptContext ctx,
                                                                     Path outputPath) throws IOException {
    // the writers require a header in order to create a codec, even if
    // the header isn't being written out
    setSAMHeader(samheader);
    setWriteHeader(writeHeader);

    return super.getRecordWriter(ctx, outputPath);
}
 
Example #15
Source File: Chain.java    From big-c with Apache License 2.0 5 votes vote down vote up
public MapRunner(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> mapper,
    Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context mapperContext,
    RecordReader<KEYIN, VALUEIN> rr, RecordWriter<KEYOUT, VALUEOUT> rw)
    throws IOException, InterruptedException {
  this.mapper = mapper;
  this.rr = rr;
  this.rw = rw;
  this.chainContext = mapperContext;
}
 
Example #16
Source File: SqlServerUpsertOutputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
/** {@inheritDoc} */
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
    throws IOException {
  try {
    return new SqlServerUpsertRecordWriter(context);
  } catch (Exception e) {
    throw new IOException(e);
  }
}
 
Example #17
Source File: PravegaFixedSegmentsOutputFormat.java    From pravega-samples with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<String, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    final String scopeName = Optional.ofNullable(conf.get(OUTPUT_SCOPE_NAME)).orElseThrow(() ->
            new IOException("The input scope name must be configured (" + OUTPUT_SCOPE_NAME + ")"));
    final String streamName = Optional.ofNullable(conf.get(OUTPUT_STREAM_NAME)).orElseThrow(() ->
            new IOException("The input stream name must be configured (" + OUTPUT_STREAM_NAME + ")"));
    final URI controllerURI = Optional.ofNullable(conf.get(OUTPUT_URI_STRING)).map(URI::create).orElseThrow(() ->
            new IOException("The Pravega controller URI must be configured (" + OUTPUT_URI_STRING + ")"));
    final String deserializerClassName = Optional.ofNullable(conf.get(OUTPUT_DESERIALIZER)).orElseThrow(() ->
            new IOException("The event deserializer must be configured (" + OUTPUT_DESERIALIZER + ")"));
    final int segments = Integer.parseInt(conf.get(OUTPUT_STREAM_SEGMENTS, "3"));

    StreamManager streamManager = StreamManager.create(controllerURI);
    streamManager.createScope(scopeName);

    StreamConfiguration streamConfig = StreamConfiguration.builder()
            .scalingPolicy(ScalingPolicy.fixed(segments))
            .build();

    streamManager.createStream(scopeName, streamName, streamConfig);
    EventStreamClientFactory clientFactory = (externalClientFactory != null) ? externalClientFactory :
            EventStreamClientFactory.withScope(scopeName, ClientConfig.builder().controllerURI(controllerURI).build());

    Serializer deserializer;
    try {
        Class<?> deserializerClass = Class.forName(deserializerClassName);
        deserializer = (Serializer<V>) deserializerClass.newInstance();
    } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
        log.error("Exception when creating deserializer: {}", e);
        throw new IOException(
                "Unable to create the event deserializer (" + deserializerClassName + ")", e);
    }

    EventStreamWriter<V> writer = clientFactory.createEventWriter(streamName, deserializer, EventWriterConfig.builder().build());

    return new PravegaOutputRecordWriter<V>(writer);
}
 
Example #18
Source File: TaskInputOutputContextImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
public TaskInputOutputContextImpl(Configuration conf, TaskAttemptID taskid,
                                  RecordWriter<KEYOUT,VALUEOUT> output,
                                  OutputCommitter committer,
                                  StatusReporter reporter) {
  super(conf, taskid, reporter);
  this.output = output;
  this.committer = committer;
}
 
Example #19
Source File: Chain.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Create a map context that is based on ChainMapContext and the given record
 * reader and record writer
 */
private <KEYIN, VALUEIN, KEYOUT, VALUEOUT> 
Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context createMapContext(
    RecordReader<KEYIN, VALUEIN> rr, RecordWriter<KEYOUT, VALUEOUT> rw,
    TaskInputOutputContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> context,
    Configuration conf) {
  MapContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> mapContext = 
    new ChainMapContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT>(
      context, rr, rw, conf);
  Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context mapperContext = 
    new WrappedMapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>()
      .getMapContext(mapContext);
  return mapperContext;
}
 
Example #20
Source File: ExportCallOutputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
/** {@inheritDoc} */
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
    throws IOException {
  try {
    return new ExportCallRecordWriter(context);
  } catch (Exception e) {
    throw new IOException(e);
  }
}
 
Example #21
Source File: LogOutputFormat.java    From 163-bigdate-note with GNU General Public License v3.0 5 votes vote down vote up
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    Iterator<RecordWriter<K, V>> values = this.recordWriter.values().iterator();
    while (values.hasNext()) {
        values.next().close(context);
    }
    this.recordWriter.clear();

}
 
Example #22
Source File: ContentReader.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<DocumentURI, DatabaseDocument> getRecordWriter(
        TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new CustomWriter(getOutputPath(context), 
            context.getConfiguration());
}
 
Example #23
Source File: Chain.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Add mapper(the last mapper) that reads input from
 * queue and writes output to the output context
 */
@SuppressWarnings("unchecked")
void addMapper(ChainBlockingQueue<KeyValuePair<?, ?>> input,
    TaskInputOutputContext outputContext, int index) throws IOException,
    InterruptedException {
  Configuration conf = getConf(index);
  Class<?> keyClass = conf.getClass(MAPPER_INPUT_KEY_CLASS, Object.class);
  Class<?> valueClass = conf.getClass(MAPPER_INPUT_VALUE_CLASS, Object.class);
  RecordReader rr = new ChainRecordReader(keyClass, valueClass, input, conf);
  RecordWriter rw = new ChainRecordWriter(outputContext);
  MapRunner runner = new MapRunner(mappers.get(index), createMapContext(rr,
      rw, outputContext, getConf(index)), rr, rw);
  threads.add(runner);
}
 
Example #24
Source File: LogOutputFormat.java    From 163-bigdate-note with GNU General Public License v3.0 5 votes vote down vote up
@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    if (writer == null) {
        writer = new MultiRecordWriter(job, getTaskOutputPath(job));
    }
    return writer;
}
 
Example #25
Source File: GenerateData.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<NullWritable,BytesWritable> getRecordWriter(
    TaskAttemptContext job) throws IOException {

  return new ChunkWriter(getDefaultWorkFile(job, ""),
      job.getConfiguration());
}
 
Example #26
Source File: LogOutputFormat.java    From 163-bigdate-note with GNU General Public License v3.0 5 votes vote down vote up
@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    if (writer == null) {
        writer = new MultiRecordWriter(job, getTaskOutputPath(job));
    }
    return writer;
}
 
Example #27
Source File: TeraOutputFormat.java    From pravega-samples with Apache License 2.0 5 votes vote down vote up
public RecordWriter<Text,Text> getRecordWriter(TaskAttemptContext job
                                               ) throws IOException {
  Path file = getDefaultWorkFile(job, "");
  FileSystem fs = file.getFileSystem(job.getConfiguration());
   FSDataOutputStream fileOut = fs.create(file);
  return new TeraRecordWriter(fileOut, job);
}
 
Example #28
Source File: SafeFileOutputCommitterTest.java    From datawave with Apache License 2.0 5 votes vote down vote up
private void testMapFileOutputCommitterInternal(int version) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext);
    
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    
    // write output
    MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeMapFileOutput(theRecordWriter, tContext);
    
    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);
    
    // validate output
    validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir);
    FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example #29
Source File: TransformerOutputFormat.java    From BigDataPlatform with GNU General Public License v3.0 5 votes vote down vote up
@Override
public RecordWriter<BaseDimension, BaseStatsValueWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    Connection conn = null;
    IDimensionConverter converter = DimensionConverterClient.createDimensionConverter(conf);
    try {
        conn = JdbcManager.getConnection(conf, GlobalConstants.WAREHOUSE_OF_WEBSITE);
        conn.setAutoCommit(false);
    } catch (SQLException e) {
        logger.error("获取数据库连接失败", e);
        throw new IOException("获取数据库连接失败", e);
    }
    return new TransformerRecordWriter(conn, conf, converter);
}
 
Example #30
Source File: SingleDocumentOutputFormat.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<DocumentURI, MarkLogicDocument> getRecordWriter(
    TaskAttemptContext contex) throws IOException, InterruptedException {
    Configuration conf = contex.getConfiguration();
    String p = conf.get(ConfigConstants.CONF_OUTPUT_FILEPATH);
    Path path = new Path(p);
    return new SingleDocumentWriter(path, conf);
}