Java Code Examples for org.apache.hadoop.mapreduce.Mapper.Context#write()

The following examples show how to use org.apache.hadoop.mapreduce.Mapper.Context#write() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PopulateTable.java    From HBase-ToHDFS with Apache License 2.0 6 votes vote down vote up
@Override
public void map(NullWritable key, NullWritable value, Context context) throws IOException, InterruptedException {

  int counter = 0;
  
  System.out.println("starting mapper");
  System.out.println();
  for (int i = 0; i < numberOfRecords; i++) {
    String keyRoot = StringUtils.leftPad(Integer.toString(r.nextInt(Short.MAX_VALUE)), 5, '0');

    if (i % 1000 == 0) {
      System.out.print(".");
    }

    for (int j = 0; j < 10; j++) {
      hKey.set(Bytes.toBytes(keyRoot + "|" + runID + "|" + taskId));
      kv = new KeyValue(hKey.get(), columnFamily, Bytes.toBytes("C" + j), Bytes.toBytes("counter:" + counter++ ));
      context.write(hKey, kv);
    }
  }

  System.out.println("finished mapper");
}
 
Example 2
Source File: WordCount.java    From stratio-cassandra with Apache License 2.0 6 votes vote down vote up
public void map(Map<String, ByteBuffer> keys, Map<String, ByteBuffer> columns, Context context) throws IOException, InterruptedException
{
    for (Entry<String, ByteBuffer> column : columns.entrySet())
    {
        if (!"line".equalsIgnoreCase(column.getKey()))
            continue;

        String value = ByteBufferUtil.string(column.getValue());

        StringTokenizer itr = new StringTokenizer(value);
        while (itr.hasMoreTokens())
        {
            word.set(itr.nextToken());
            context.write(word, one);
        }
    }
}
 
Example 3
Source File: DateSortDesc.java    From MapReduce-Demo with MIT License 5 votes vote down vote up
public void reduce(IntWritable key, Iterable<Text> values, Context context)
		throws IOException, InterruptedException {
	for (Text value : values) {
		// 排序后再次颠倒k-v,将日期作为key
		System.out.println(value.toString() + ":" + key.get());
		context.write(value, key);
	}
}
 
Example 4
Source File: DateSortDesc.java    From MapReduce-Demo with MIT License 5 votes vote down vote up
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
	String[] strs = value.toString().split("\t");
	num.set(Integer.parseInt(strs[1]));
	// 将次数作为key进行升序排序
	context.write(num, new Text(strs[0]));
	System.out.println(num.get() + "," + strs[0]);
}
 
Example 5
Source File: HalyardPreSplit.java    From Halyard with Apache License 2.0 5 votes vote down vote up
@Override
protected void setup(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    decimationFactor = conf.getInt(DECIMATION_FACTOR_PROPERTY, DEFAULT_DECIMATION_FACTOR);
    for (byte b = 1; b < 6; b++) {
        context.write(new ImmutableBytesWritable(new byte[] {b}), new LongWritable(1));
    }
    timestamp = conf.getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis());
}
 
Example 6
Source File: HalyardPreSplit.java    From Halyard with Apache License 2.0 5 votes vote down vote up
@Override
protected void map(LongWritable key, Statement value, final Context context) throws IOException, InterruptedException {
    if (counter++ == next) {
        next = counter + random.nextInt(decimationFactor);
        for (KeyValue keyValue: HalyardTableUtils.toKeyValues(value.getSubject(), value.getPredicate(), value.getObject(), value.getContext(), false, timestamp)) {
            context.write(new ImmutableBytesWritable(keyValue.getRowArray(), keyValue.getRowOffset(), keyValue.getRowLength()), new LongWritable(keyValue.getLength()));
        }
    }
}
 
Example 7
Source File: WordCount.java    From stratio-cassandra with Apache License 2.0 5 votes vote down vote up
public void reduce(Text word, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException
{
    int sum = 0;
    for (IntWritable val : values)
        sum += val.get();
    keys.put("word", ByteBufferUtil.bytes(word.toString()));
    context.write(keys, getBindVariables(word, sum));
}
 
Example 8
Source File: WordCount.java    From stratio-cassandra with Apache License 2.0 5 votes vote down vote up
public void map(Long key, Row row, Context context) throws IOException, InterruptedException
{
    String value = row.getString("line");
    logger.debug("read {}:{}={} from {}", new Object[] {key, "line", value, context.getInputSplit()});
    StringTokenizer itr = new StringTokenizer(value);
    while (itr.hasMoreTokens())
    {
        word.set(itr.nextToken());
        context.write(word, one);
    }
}
 
Example 9
Source File: WordCount.java    From stratio-cassandra with Apache License 2.0 5 votes vote down vote up
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException
{
    int sum = 0;
    for (IntWritable val : values)
        sum += val.get();
    context.write(key, new IntWritable(sum));
}
 
Example 10
Source File: TestReadWriteParquet.java    From parquet-examples with Apache License 2.0 4 votes vote down vote up
@Override
public void map(LongWritable key, Group value, Context context) throws IOException, InterruptedException {
    context.write(null, value);
       }
 
Example 11
Source File: TestReadParquet.java    From parquet-examples with Apache License 2.0 4 votes vote down vote up
@Override
public void map(LongWritable key, Group value, Context context) throws IOException, InterruptedException {
    NullWritable outKey = NullWritable.get();
    if(expectedFields == null) {
	// Get the file schema which may be different from the fields in a particular record) from the input split
	String fileSchema = ((ParquetInputSplit)context.getInputSplit()).getFileSchema();
	// System.err.println("file schema from context: " + fileSchema);
	RecordSchema schema = new RecordSchema(fileSchema);
	expectedFields = schema.getFields();
	//System.err.println("inferred schema: " + expectedFields.toString());
    }

    // No public accessor to the column values in a Group, so extract them from the string representation
    String line = value.toString();
    String[] fields = line.split("\n");

           StringBuilder csv = new StringBuilder();
    boolean hasContent = false;
    int i = 0;
    // Look for each expected column
    Iterator<FieldDescription> it = expectedFields.iterator();
    while(it.hasNext()) {
	if(hasContent ) {
	    csv.append(',');
	}
	String name = it.next().name;
	if(fields.length > i) {
	    String[] parts = fields[i].split(": ");
	    // We assume proper order, but there may be fields missing
	    if(parts[0].equals(name)) {
		boolean mustQuote = (parts[1].contains(",") || parts[1].contains("'"));
		if(mustQuote) {
		    csv.append('"');
		}
		csv.append(parts[1]);
		if(mustQuote) {
		    csv.append('"');
		}
		hasContent = true;
		i++;
	    }
	}
    }
    context.write(outKey, new Text(csv.toString()));
       }
 
Example 12
Source File: Decoder.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Having buffers of the right size is extremely important. If the the
 * buffer size is not a divisor of the block size, we may end up reading
 * across block boundaries.
 * 
 * If codec's simulateBlockFix is true, we use the old code to fix blocks
 * and verify the new code's result is the same as the old one.
 */
void fixErasedBlock(FileSystem srcFs, Path srcFile, FileSystem parityFs,
		Path parityFile, boolean fixSource, long blockSize,
		long errorOffset, long limit, boolean partial, OutputStream out,
		Context context, boolean skipVerify) throws IOException,
		InterruptedException {
	configureBuffers(blockSize);
	Progressable reporter = context;
	if (reporter == null) {
		reporter = RaidUtils.NULL_PROGRESSABLE;
	}

	LOG.info("Code: " + this.codec.id + " simulation: "
			+ this.codec.simulateBlockFix);
	if (this.codec.simulateBlockFix) {
		String oldId = getOldCodeId(srcFile);
		if (oldId == null) {
			// Couldn't find old codec for block fixing, throw exception
			// instead
			throw new IOException("Couldn't find old parity files for "
					+ srcFile + ". Won't reconstruct the block since code "
					+ this.codec.id + " is still under test");
		}
		if (partial) {
			throw new IOException(
					"Couldn't reconstruct the partial data because "
							+ "old decoders don't support it");
		}
		Decoder decoder = (oldId.equals("xor")) ? new XORDecoder(conf)
				: new ReedSolomonDecoder(conf);
		CRC32 newCRC = null;
		long newLen = 0;
		if (!skipVerify) {
			newCRC = new CRC32();
			newLen = this.fixErasedBlockImpl(srcFs, srcFile, parityFs,
					parityFile, fixSource, blockSize, errorOffset, limit,
					partial, null, reporter, newCRC);
		}
		CRC32 oldCRC = skipVerify ? null : new CRC32();
		long oldLen = decoder.fixErasedBlockImpl(srcFs, srcFile, parityFs,
				parityFile, fixSource, blockSize, errorOffset, limit,
				partial, out, reporter, oldCRC);

		if (!skipVerify) {
			if (newCRC.getValue() != oldCRC.getValue() || newLen != oldLen) {
				LOG.error(" New code "
						+ codec.id
						+ " produces different data from old code "
						+ oldId
						+ " during fixing "
						+ (fixSource ? srcFile.toString() : parityFile
								.toString()) + " (offset=" + errorOffset
						+ ", limit=" + limit + ")" + " checksum:"
						+ newCRC.getValue() + ", " + oldCRC.getValue()
						+ " len:" + newLen + ", " + oldLen);
				if (context != null) {
					context.getCounter(Counter.BLOCK_FIX_SIMULATION_FAILED)
							.increment(1L);
					String outkey;
					if (fixSource) {
						outkey = srcFile.toString();
					} else {
						outkey = parityFile.toString();
					}
					String outval = "simulation_failed";
					context.write(new Text(outkey), new Text(outval));
				}
			} else {
				LOG.info(" New code "
						+ codec.id
						+ " produces the same data with old code "
						+ oldId
						+ " during fixing "
						+ (fixSource ? srcFile.toString() : parityFile
								.toString()) + " (offset=" + errorOffset
						+ ", limit=" + limit + ")");
				if (context != null) {
					context.getCounter(
							Counter.BLOCK_FIX_SIMULATION_SUCCEEDED)
							.increment(1L);
				}
			}
		}
	} else {
		fixErasedBlockImpl(srcFs, srcFile, parityFs, parityFile, fixSource,
				blockSize, errorOffset, limit, partial, out, reporter, null);
	}
}
 
Example 13
Source File: CouchbaseIterationMapper.java    From laser with Apache License 2.0 4 votes vote down vote up
protected void map(BytesWritable key, BytesWritable value, Context context)
		throws IOException, InterruptedException {
	context.write(new Text(key.get()), new Text(value.get()));
}
 
Example 14
Source File: Sort.java    From wifi with Apache License 2.0 4 votes vote down vote up
public void map(Object key, Text value, Context context) throws IOException,InterruptedException
{
	String line = value.toString();
	data.set(Integer.parseInt(line));
	context.write(data, new IntWritable(1));
}
 
Example 15
Source File: ParquetExportMapper.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
@Override
protected void map(GenericRecord key, NullWritable val,
    Context context) throws IOException, InterruptedException {
  context.write(toSqoopRecord(key), NullWritable.get());
}
 
Example 16
Source File: PGBulkloadExportMapper.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
protected void cleanup(Context context)
  throws IOException, InterruptedException {
  LongWritable taskid =
      new LongWritable(context.getTaskAttemptID().getTaskID().getId());
  context.write(taskid, new Text(tmpTableName));

  if (writer != null) {
    writer.close();
  }
  if (out != null) {
    out.close();
  }
  try {
    if (thread != null) {
      thread.join();
    }
  } finally {
    // block until the process is done.
    if (null != process) {
      while (true) {
        try {
          int returnValue = process.waitFor();

          // Check pg_bulkload's process return value
          if (returnValue != 0) {
            throw new RuntimeException(
              "Unexpected return value from pg_bulkload: "+ returnValue);
          }
        } catch (InterruptedException ie) {
          // interrupted; loop around.
          LOG.debug("Caught interrupted exception waiting for process "
              + "pg_bulkload.bin to exit");
          //Clear the interrupted flag.   We have to call Thread.interrupted
          //to clear for interrupted exceptions from process.waitFor
          //See http://bugs.sun.com/view_bug.do?bug_id=6420270 for more info
          Thread.interrupted();
          continue;
        }
        break;
      }
    }
  }
  if (null != passwordFilename) {
    if (!new File(passwordFilename).delete()) {
      LOG.error("Could not remove postgresql password file "
                + passwordFilename);
      LOG.error("You should remove this file to protect your credentials.");
    }
  }
}
 
Example 17
Source File: MonthAscTempDescSort.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public void map(Object key,Text value,Context context) throws IOException, InterruptedException {
	String[] strings =value.toString().split(" ");
	String date = strings[0].substring(0, 7);
	temp.set(Integer.parseInt(strings[2].substring(0, strings[2].length()-1)));
	context.write(new Text(date), temp);
}