Java Code Examples for org.apache.hadoop.hbase.io.ImmutableBytesWritable

The following are top voted examples for showing how to use org.apache.hadoop.hbase.io.ImmutableBytesWritable. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: ditb   File: TestTableInputFormat.java   Source Code and License 9 votes vote down vote up
@Override
public void map(ImmutableBytesWritable key, Result value,
    OutputCollector<NullWritable,NullWritable> output,
    Reporter reporter) throws IOException {
  for (Cell cell : value.listCells()) {
    reporter.getCounter(TestTableInputFormat.class.getName() + ":row",
        Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
        .increment(1l);
    reporter.getCounter(TestTableInputFormat.class.getName() + ":family",
        Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
        .increment(1l);
    reporter.getCounter(TestTableInputFormat.class.getName() + ":value",
        Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
        .increment(1l);
  }
}
 
Example 2
Project: aliyun-maxcompute-data-collectors   File: HBaseBulkImportMapper.java   Source Code and License 6 votes vote down vote up
@Override
public void map(LongWritable key, SqoopRecord val, Context context)
    throws IOException, InterruptedException {
  try {
    // Loading of LOBs was delayed until we have a Context.
    val.loadLargeObjects(lobLoader);
  } catch (SQLException sqlE) {
    throw new IOException(sqlE);
  }
  Map<String, Object> fields = val.getFieldMap();

  List<Put> putList = putTransformer.getPutCommand(fields);
  for(Put put: putList){
    context.write(new ImmutableBytesWritable(put.getRow()), put);
  }
}
 
Example 3
Project: ditb   File: HRegionPartitioner.java   Source Code and License 6 votes vote down vote up
public int getPartition(ImmutableBytesWritable key, V2 value, int numPartitions) {
  byte[] region = null;
  // Only one region return 0
  if (this.startKeys.length == 1){
    return 0;
  }
  try {
    // Not sure if this is cached after a split so we could have problems
    // here if a region splits while mapping
    region = locator.getRegionLocation(key.get()).getRegionInfo().getStartKey();
  } catch (IOException e) {
    LOG.error(e);
  }
  for (int i = 0; i < this.startKeys.length; i++){
    if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){
      if (i >= numPartitions-1){
        // cover if we have less reduces then regions.
        return (Integer.toString(i).hashCode()
            & Integer.MAX_VALUE) % numPartitions;
      }
      return i;
    }
  }
  // if above fails to find start key that match we need to return something
  return 0;
}
 
Example 4
Project: ditb   File: RowCounter.java   Source Code and License 6 votes vote down vote up
/**
 * @param args
 * @return the JobConf
 * @throws IOException
 */
public JobConf createSubmittableJob(String[] args) throws IOException {
  JobConf c = new JobConf(getConf(), getClass());
  c.setJobName(NAME);
  // Columns are space delimited
  StringBuilder sb = new StringBuilder();
  final int columnoffset = 2;
  for (int i = columnoffset; i < args.length; i++) {
    if (i > columnoffset) {
      sb.append(" ");
    }
    sb.append(args[i]);
  }
  // Second argument is the table name.
  TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
    RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c);
  c.setNumReduceTasks(0);
  // First arg is the output directory.
  FileOutputFormat.setOutputPath(c, new Path(args[0]));
  return c;
}
 
Example 5
Project: ditb   File: TestTableMapReduce.java   Source Code and License 6 votes vote down vote up
/**
 * Pass the key, and reversed value to reduce
 *
 * @param key
 * @param value
 * @param context
 * @throws IOException
 */
public void map(ImmutableBytesWritable key, Result value,
  Context context)
throws IOException, InterruptedException {
  if (value.size() != 1) {
    throw new IOException("There should only be one input column");
  }
  Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
    cf = value.getMap();
  if(!cf.containsKey(INPUT_FAMILY)) {
    throw new IOException("Wrong input columns. Missing: '" +
      Bytes.toString(INPUT_FAMILY) + "'.");
  }

  // Get the original value and reverse it
  String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, null));
  StringBuilder newValue = new StringBuilder(originalValue);
  newValue.reverse();
  // Now set the value to be collected
  Put outval = new Put(key.get());
  outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
  context.write(key, outval);
}
 
Example 6
Project: ditb   File: TableMapReduceUtil.java   Source Code and License 6 votes vote down vote up
/**
 * Use this before submitting a TableReduce job. It will
 * appropriately set up the JobConf.
 *
 * @param table  The output table.
 * @param reducer  The reducer class to use.
 * @param job  The current job configuration to adjust.
 * @param partitioner  Partitioner to use. Pass <code>null</code> to use
 * default partitioner.
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @throws IOException When determining the region count fails.
 */
public static void initTableReduceJob(String table,
  Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
  boolean addDependencyJars) throws IOException {
  job.setOutputFormat(TableOutputFormat.class);
  job.setReducerClass(reducer);
  job.set(TableOutputFormat.OUTPUT_TABLE, table);
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(Put.class);
  job.setStrings("io.serializations", job.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName());
  if (partitioner == HRegionPartitioner.class) {
    job.setPartitionerClass(HRegionPartitioner.class);
    int regions =
      MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
    if (job.getNumReduceTasks() > regions) {
      job.setNumReduceTasks(regions);
    }
  } else if (partitioner != null) {
    job.setPartitionerClass(partitioner);
  }
  if (addDependencyJars) {
    addDependencyJars(job);
  }
  initCredentials(job);
}
 
Example 7
Project: ditb   File: TestHFileOutputFormat.java   Source Code and License 6 votes vote down vote up
/**
 * Write random values to the writer assuming a table created using
 * {@link #FAMILIES} as column family descriptors
 */
private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, KeyValue> writer,
    TaskAttemptContext context, Set<byte[]> families, int numRows)
    throws IOException, InterruptedException {
  byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
  int valLength = 10;
  byte valBytes[] = new byte[valLength];

  int taskId = context.getTaskAttemptID().getTaskID().getId();
  assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
  final byte [] qualifier = Bytes.toBytes("data");
  Random random = new Random();
  for (int i = 0; i < numRows; i++) {

    Bytes.putInt(keyBytes, 0, i);
    random.nextBytes(valBytes);
    ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);

    for (byte[] family : families) {
      KeyValue kv = new KeyValue(keyBytes, family, qualifier, valBytes);
      writer.write(key, kv);
    }
  }
}
 
Example 8
Project: ditb   File: HashTable.java   Source Code and License 6 votes vote down vote up
/**
 * Read the next key/hash pair.
 * Returns true if such a pair exists and false when at the end of the data.
 */
public boolean next() throws IOException {
  if (cachedNext) {
    cachedNext = false;
    return true;
  }
  key = new ImmutableBytesWritable();
  hash = new ImmutableBytesWritable();
  while (true) {
    boolean hasNext = mapFileReader.next(key, hash);
    if (hasNext) {
      return true;
    }
    hashFileIndex++;
    if (hashFileIndex < TableHash.this.numHashFiles) {
      mapFileReader.close();
      openHashFile();
    } else {
      key = null;
      hash = null;
      return false;
    }
  }
}
 
Example 9
Project: ditb   File: IndexBuilder.java   Source Code and License 6 votes vote down vote up
@Override
protected void map(ImmutableBytesWritable rowKey, Result result, Context context)
    throws IOException, InterruptedException {
  for(java.util.Map.Entry<byte[], ImmutableBytesWritable> index : indexes.entrySet()) {
    byte[] qualifier = index.getKey();
    ImmutableBytesWritable tableName = index.getValue();
    byte[] value = result.getValue(family, qualifier);
    if (value != null) {
      // original: row 123 attribute:phone 555-1212
      // index: row 555-1212 INDEX:ROW 123
      Put put = new Put(value);
      put.add(INDEX_COLUMN, INDEX_QUALIFIER, rowKey.get());
      context.write(tableName, put);
    }
  }
}
 
Example 10
Project: ditb   File: IndexBuilder.java   Source Code and License 6 votes vote down vote up
@Override
protected void setup(Context context) throws IOException,
    InterruptedException {
  Configuration configuration = context.getConfiguration();
  String tableName = configuration.get("index.tablename");
  String[] fields = configuration.getStrings("index.fields");
  String familyName = configuration.get("index.familyname");
  family = Bytes.toBytes(familyName);
  indexes = new TreeMap<byte[], ImmutableBytesWritable>(Bytes.BYTES_COMPARATOR);
  for(String field : fields) {
    // if the table is "people" and the field to index is "email", then the
    // index table will be called "people-email"
    indexes.put(Bytes.toBytes(field),
        new ImmutableBytesWritable(Bytes.toBytes(tableName + "-" + field)));
  }
}
 
Example 11
Project: ditb   File: IntegrationTestLoadAndVerify.java   Source Code and License 6 votes vote down vote up
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context)
    throws IOException, InterruptedException {
  BytesWritable bwKey = new BytesWritable(key.get());
  BytesWritable bwVal = new BytesWritable();
  for (Cell kv : value.listCells()) {
    if (Bytes.compareTo(TEST_QUALIFIER, 0, TEST_QUALIFIER.length,
                        kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength()) == 0) {
      context.write(bwKey, EMPTY);
    } else {
      bwVal.set(kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength());
      context.write(bwVal, bwKey);
    }
  }
}
 
Example 12
Project: ditb   File: SyncTable.java   Source Code and License 6 votes vote down vote up
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context)
    throws IOException, InterruptedException {
  try {
    // first, finish any hash batches that end before the scanned row
    while (nextSourceKey != null && key.compareTo(nextSourceKey) >= 0) {
      moveToNextBatch(context);
    }
    
    // next, add the scanned row (as long as we've reached the first batch)
    if (targetHasher.isBatchStarted()) {
      targetHasher.hashResult(value);
    }
  } catch (Throwable t) {
    mapperException = t;
    Throwables.propagateIfInstanceOf(t, IOException.class);
    Throwables.propagateIfInstanceOf(t, InterruptedException.class);
    Throwables.propagate(t);
  }
}
 
Example 13
Project: ditb   File: TestHFileOutputFormat2.java   Source Code and License 6 votes vote down vote up
/**
 * Write random values to the writer assuming a table created using
 * {@link #FAMILIES} as column family descriptors
 */
private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer,
    TaskAttemptContext context, Set<byte[]> families, int numRows)
    throws IOException, InterruptedException {
  byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
  int valLength = 10;
  byte valBytes[] = new byte[valLength];

  int taskId = context.getTaskAttemptID().getTaskID().getId();
  assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
  final byte [] qualifier = Bytes.toBytes("data");
  Random random = new Random();
  for (int i = 0; i < numRows; i++) {

    Bytes.putInt(keyBytes, 0, i);
    random.nextBytes(valBytes);
    ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);

    for (byte[] family : families) {
      Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
      writer.write(key, kv);
    }
  }
}
 
Example 14
Project: ditb   File: HTableDescriptor.java   Source Code and License 6 votes vote down vote up
/**
 * Check if the table has an attached co-processor represented by the name className
 *
 * @param classNameToMatch - Class name of the co-processor
 * @return true of the table has a co-processor className
 */
public boolean hasCoprocessor(String classNameToMatch) {
  Matcher keyMatcher;
  Matcher valueMatcher;
  for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e:
      this.values.entrySet()) {
    keyMatcher =
        HConstants.CP_HTD_ATTR_KEY_PATTERN.matcher(
            Bytes.toString(e.getKey().get()));
    if (!keyMatcher.matches()) {
      continue;
    }
    String className = getCoprocessorClassNameFromSpecStr(Bytes.toString(e.getValue().get()));
    if (className == null) continue;
    if (className.equals(classNameToMatch.trim())) {
      return true;
    }
  }
  return false;
}
 
Example 15
Project: ditb   File: TestTableSnapshotInputFormat.java   Source Code and License 6 votes vote down vote up
@Override
protected void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
    int numRegions, int expectedNumSplits) throws Exception {
  setupCluster();
  TableName tableName = TableName.valueOf("testWithMockedMapReduce");
  try {
    createTableAndSnapshot(
      util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions);

    JobConf job = new JobConf(util.getConfiguration());
    Path tmpTableDir = util.getRandomDir();

    TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
      COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
      NullWritable.class, job, false, tmpTableDir);

    // mapred doesn't support start and end keys? o.O
    verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());

  } finally {
    util.getHBaseAdmin().deleteSnapshot(snapshotName);
    util.deleteTable(tableName);
    tearDownCluster();
  }
}
 
Example 16
Project: ditb   File: Import.java   Source Code and License 6 votes vote down vote up
/**
 * @param row  The current table row key.
 * @param value  The columns.
 * @param context  The current context.
 * @throws IOException When something is broken with the data.
 */
@Override
public void map(ImmutableBytesWritable row, Result value,
  Context context)
throws IOException {
  try {
    if (LOG.isTraceEnabled()) {
      LOG.trace("Considering the row."
          + Bytes.toString(row.get(), row.getOffset(), row.getLength()));
    }
    if (filter == null || !filter.filterRowKey(row.get(), row.getOffset(), row.getLength())) {
      for (Cell kv : value.rawCells()) {
        kv = filterKv(filter, kv);
        // skip if we filtered it out
        if (kv == null) continue;
        // TODO get rid of ensureKeyValue
        context.write(row, KeyValueUtil.ensureKeyValueTypeForMR(convertKv(kv, cfRenameMap)));
      }
    }
  } catch (InterruptedException e) {
    e.printStackTrace();
  }
}
 
Example 17
Project: ditb   File: TestTableSnapshotInputFormat.java   Source Code and License 6 votes vote down vote up
@Override
public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
    int numRegions, int expectedNumSplits) throws Exception {
  setupCluster();
  TableName tableName = TableName.valueOf("testWithMockedMapReduce");
  try {
    createTableAndSnapshot(
      util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions);

    Job job = new Job(util.getConfiguration());
    Path tmpTableDir = util.getRandomDir();
    Scan scan = new Scan(getStartRow(), getEndRow()); // limit the scan

    TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
        scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
        NullWritable.class, job, false, tmpTableDir);

    verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());

  } finally {
    util.getHBaseAdmin().deleteSnapshot(snapshotName);
    util.deleteTable(tableName);
    tearDownCluster();
  }
}
 
Example 18
Project: ditb   File: TableSnapshotInputFormatTestBase.java   Source Code and License 6 votes vote down vote up
protected static void verifyRowFromMap(ImmutableBytesWritable key, Result result)
  throws IOException {
  byte[] row = key.get();
  CellScanner scanner = result.cellScanner();
  while (scanner.advance()) {
    Cell cell = scanner.current();

    //assert that all Cells in the Result have the same key
    Assert.assertEquals(0, Bytes.compareTo(row, 0, row.length,
      cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
  }

  for (int j = 0; j < FAMILIES.length; j++) {
    byte[] actual = result.getValue(FAMILIES[j], null);
    Assert.assertArrayEquals("Row in snapshot does not match, expected:" + Bytes.toString(row)
      + " ,actual:" + Bytes.toString(actual), row, actual);
  }
}
 
Example 19
Project: ditb   File: TestTableMapReduceUtil.java   Source Code and License 6 votes vote down vote up
@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceWithPartitionerEvaluation()
    throws IOException {
  Configuration cfg = UTIL.getConfiguration();
  JobConf jobConf = new JobConf(cfg);
  try {
    jobConf.setJobName("process row task");
    jobConf.setNumReduceTasks(2);
    TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
        ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
        jobConf);

    TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
        ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class);
    RunningJob job = JobClient.runJob(jobConf);
    assertTrue(job.isSuccessful());
  } finally {
    if (jobConf != null)
      FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
  }
}
 
Example 20
Project: ditb   File: HTableDescriptor.java   Source Code and License 6 votes vote down vote up
/**
 * Add coprocessor to values Map
 * @param specStr The Coprocessor specification all in in one String formatted so matches
 * {@link HConstants#CP_HTD_ATTR_VALUE_PATTERN}
 * @return Returns <code>this</code>
 */
private HTableDescriptor addCoprocessorToMap(final String specStr) {
  if (specStr == null) return this;
  // generate a coprocessor key
  int maxCoprocessorNumber = 0;
  Matcher keyMatcher;
  for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e:
      this.values.entrySet()) {
    keyMatcher =
        HConstants.CP_HTD_ATTR_KEY_PATTERN.matcher(
            Bytes.toString(e.getKey().get()));
    if (!keyMatcher.matches()) {
      continue;
    }
    maxCoprocessorNumber = Math.max(Integer.parseInt(keyMatcher.group(1)), maxCoprocessorNumber);
  }
  maxCoprocessorNumber++;
  String key = "coprocessor$" + Integer.toString(maxCoprocessorNumber);
  this.values.put(new ImmutableBytesWritable(Bytes.toBytes(key)),
    new ImmutableBytesWritable(Bytes.toBytes(specStr)));
  return this;
}
 
Example 21
Project: ditb   File: TestTableInputFormat.java   Source Code and License 6 votes vote down vote up
/**
 * Create table data and run tests on specified htable using the
 * o.a.h.hbase.mapred API.
 * 
 * @param table
 * @throws IOException
 */
static void runTestMapred(Table table) throws IOException {
  org.apache.hadoop.hbase.mapred.TableRecordReader trr = 
      new org.apache.hadoop.hbase.mapred.TableRecordReader();
  trr.setStartRow("aaa".getBytes());
  trr.setEndRow("zzz".getBytes());
  trr.setHTable(table);
  trr.setInputColumns(columns);

  trr.init();
  Result r = new Result();
  ImmutableBytesWritable key = new ImmutableBytesWritable();

  boolean more = trr.next(key, r);
  assertTrue(more);
  checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());

  more = trr.next(key, r);
  assertTrue(more);
  checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());

  // no more data
  more = trr.next(key, r);
  assertFalse(more);
}
 
Example 22
Project: ditb   File: TestRowCounter.java   Source Code and License 6 votes vote down vote up
@Test
@SuppressWarnings({ "deprecation" })
public void shouldCreateAndRunSubmittableJob() throws Exception {
  RowCounter rCounter = new RowCounter();
  rCounter.setConf(HBaseConfiguration.create());
  String[] args = new String[] { "\temp", "tableA", "column1", "column2",
      "column3" };
  JobConf jobConfig = rCounter.createSubmittableJob(args);

  assertNotNull(jobConfig);
  assertEquals(0, jobConfig.getNumReduceTasks());
  assertEquals("rowcounter", jobConfig.getJobName());
  assertEquals(jobConfig.getMapOutputValueClass(), Result.class);
  assertEquals(jobConfig.getMapperClass(), RowCounterMapper.class);
  assertEquals(jobConfig.get(TableInputFormat.COLUMN_LIST), Joiner.on(' ')
      .join("column1", "column2", "column3"));
  assertEquals(jobConfig.getMapOutputKeyClass(), ImmutableBytesWritable.class);
}
 
Example 23
Project: ditb   File: TestIdentityTableMap.java   Source Code and License 6 votes vote down vote up
@Test
@SuppressWarnings({ "deprecation", "unchecked" })
public void shouldCollectPredefinedTimes() throws IOException {
  int recordNumber = 999;
  Result resultMock = mock(Result.class);
  IdentityTableMap identityTableMap = null;
  try {
    Reporter reporterMock = mock(Reporter.class);
    identityTableMap = new IdentityTableMap();
    ImmutableBytesWritable bytesWritableMock = mock(ImmutableBytesWritable.class);
    OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
        mock(OutputCollector.class);

    for (int i = 0; i < recordNumber; i++)
      identityTableMap.map(bytesWritableMock, resultMock, outputCollectorMock,
          reporterMock);

    verify(outputCollectorMock, times(recordNumber)).collect(
        Mockito.any(ImmutableBytesWritable.class), Mockito.any(Result.class));
  } finally {
    if (identityTableMap != null)
      identityTableMap.close();
  }
}
 
Example 24
Project: ditb   File: TestTableSnapshotInputFormat.java   Source Code and License 5 votes vote down vote up
private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits,
    byte[] startRow, byte[] stopRow)
    throws IOException, InterruptedException {
  TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
  List<InputSplit> splits = tsif.getSplits(job);

  Assert.assertEquals(expectedNumSplits, splits.size());

  HBaseTestingUtility.SeenRowTracker rowTracker =
      new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);

  for (int i = 0; i < splits.size(); i++) {
    // validate input split
    InputSplit split = splits.get(i);
    Assert.assertTrue(split instanceof TableSnapshotRegionSplit);

    // validate record reader
    TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
    when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
    RecordReader<ImmutableBytesWritable, Result> rr =
        tsif.createRecordReader(split, taskAttemptContext);
    rr.initialize(split, taskAttemptContext);

    // validate we can read all the data back
    while (rr.nextKeyValue()) {
      byte[] row = rr.getCurrentKey().get();
      verifyRowFromMap(rr.getCurrentKey(), rr.getCurrentValue());
      rowTracker.addRow(row);
    }

    rr.close();
  }

  // validate all rows are seen
  rowTracker.validate();
}
 
Example 25
Project: ditb   File: TestTableSnapshotInputFormat.java   Source Code and License 5 votes vote down vote up
@Override
public void map(ImmutableBytesWritable key, Result value,
    OutputCollector<ImmutableBytesWritable, NullWritable> collector, Reporter reporter)
    throws IOException {
  verifyRowFromMap(key, value);
  collector.collect(key, NullWritable.get());
}
 
Example 26
Project: ditb   File: TestTableInputFormat.java   Source Code and License 5 votes vote down vote up
/**
 * Create table data and run tests on specified htable using the
 * o.a.h.hbase.mapreduce API.
 *
 * @param table
 * @throws IOException
 * @throws InterruptedException
 */
static void runTestMapreduce(Table table) throws IOException,
    InterruptedException {
  org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl trr =
      new org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl();
  Scan s = new Scan();
  s.setStartRow("aaa".getBytes());
  s.setStopRow("zzz".getBytes());
  s.addFamily(FAMILY);
  trr.setScan(s);
  trr.setHTable(table);

  trr.initialize(null, null);
  Result r = new Result();
  ImmutableBytesWritable key = new ImmutableBytesWritable();

  boolean more = trr.nextKeyValue();
  assertTrue(more);
  key = trr.getCurrentKey();
  r = trr.getCurrentValue();
  checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());

  more = trr.nextKeyValue();
  assertTrue(more);
  key = trr.getCurrentKey();
  r = trr.getCurrentValue();
  checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());

  // no more data
  more = trr.nextKeyValue();
  assertFalse(more);
}
 
Example 27
Project: ditb   File: GroupingTableMap.java   Source Code and License 5 votes vote down vote up
/**
 * Create a key by concatenating multiple column values.
 * Override this function in order to produce different types of keys.
 *
 * @param vals
 * @return key generated by concatenating multiple column values
 */
protected ImmutableBytesWritable createGroupKey(byte[][] vals) {
  if(vals == null) {
    return null;
  }
  StringBuilder sb =  new StringBuilder();
  for(int i = 0; i < vals.length; i++) {
    if(i > 0) {
      sb.append(" ");
    }
    sb.append(Bytes.toString(vals[i]));
  }
  return new ImmutableBytesWritable(Bytes.toBytes(sb.toString()));
}
 
Example 28
Project: ditb   File: RowCounter.java   Source Code and License 5 votes vote down vote up
public void map(ImmutableBytesWritable row, Result values,
    OutputCollector<ImmutableBytesWritable, Result> output,
    Reporter reporter)
throws IOException {
    // Count every row containing data, whether it's in qualifiers or values
    reporter.incrCounter(Counters.ROWS, 1);
}
 
Example 29
Project: ditb   File: TableSnapshotInputFormat.java   Source Code and License 5 votes vote down vote up
@Override
public boolean next(ImmutableBytesWritable key, Result value) throws IOException {
  if (!delegate.nextKeyValue()) {
    return false;
  }
  ImmutableBytesWritable currentKey = delegate.getCurrentKey();
  key.set(currentKey.get(), currentKey.getOffset(), currentKey.getLength());
  value.copyFrom(delegate.getCurrentValue());
  return true;
}
 
Example 30
Project: ditb   File: TestMapReduceExamples.java   Source Code and License 5 votes vote down vote up
/**
 * Test SampleUploader from examples
 */

@SuppressWarnings("unchecked")
@Test
public void testSampleUploader() throws Exception {

  Configuration configuration = new Configuration();
  Uploader uploader = new Uploader();
  Mapper<LongWritable, Text, ImmutableBytesWritable, Put>.Context ctx = mock(Context.class);
  doAnswer(new Answer<Void>() {

    @Override
    public Void answer(InvocationOnMock invocation) throws Throwable {
      ImmutableBytesWritable writer = (ImmutableBytesWritable) invocation.getArguments()[0];
      Put put = (Put) invocation.getArguments()[1];
      assertEquals("row", Bytes.toString(writer.get()));
      assertEquals("row", Bytes.toString(put.getRow()));
      return null;
    }
  }).when(ctx).write(any(ImmutableBytesWritable.class), any(Put.class));

  uploader.map(null, new Text("row,family,qualifier,value"), ctx);

  Path dir = util.getDataTestDirOnTestFS("testSampleUploader");

  String[] args = { dir.toString(), "simpleTable" };
  Job job = SampleUploader.configureJob(configuration, args);
  assertEquals(SequenceFileInputFormat.class, job.getInputFormatClass());

}
 
Example 31
Project: ditb   File: HColumnDescriptor.java   Source Code and License 5 votes vote down vote up
/**
 * @param key The key.
 * @return The value.
 */
public byte[] getValue(byte[] key) {
  ImmutableBytesWritable ibw = values.get(new ImmutableBytesWritable(key));
  if (ibw == null)
    return null;
  return ibw.get();
}
 
Example 32
Project: ditb   File: HRegionPartitioner.java   Source Code and License 5 votes vote down vote up
/**
 * Gets the partition number for a given key (hence record) given the total
 * number of partitions i.e. number of reduce-tasks for the job.
 *
 * <p>Typically a hash function on a all or a subset of the key.</p>
 *
 * @param key  The key to be partitioned.
 * @param value  The entry value.
 * @param numPartitions  The total number of partitions.
 * @return The partition number for the <code>key</code>.
 * @see org.apache.hadoop.mapreduce.Partitioner#getPartition(
 *   java.lang.Object, java.lang.Object, int)
 */
@Override
public int getPartition(ImmutableBytesWritable key,
    VALUE value, int numPartitions) {
  byte[] region = null;
  // Only one region return 0
  if (this.startKeys.length == 1){
    return 0;
  }
  try {
    // Not sure if this is cached after a split so we could have problems
    // here if a region splits while mapping
    region = this.locator.getRegionLocation(key.get()).getRegionInfo().getStartKey();
  } catch (IOException e) {
    LOG.error(e);
  }
  for (int i = 0; i < this.startKeys.length; i++){
    if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){
      if (i >= numPartitions-1){
        // cover if we have less reduces then regions.
        return (Integer.toString(i).hashCode()
            & Integer.MAX_VALUE) % numPartitions;
      }
      return i;
    }
  }
  // if above fails to find start key that match we need to return something
  return 0;
}
 
Example 33
Project: ditb   File: TestMultiTableSnapshotInputFormat.java   Source Code and License 5 votes vote down vote up
@Override
public void map(ImmutableBytesWritable key, Result value,
    OutputCollector<ImmutableBytesWritable, ImmutableBytesWritable> outputCollector,
    Reporter reporter) throws IOException {
  makeAssertions(key, value);
  outputCollector.collect(key, key);
}
 
Example 34
Project: ditb   File: HashTable.java   Source Code and License 5 votes vote down vote up
private void readPartitionFile(FileSystem fs, Configuration conf, Path path)
     throws IOException {
  @SuppressWarnings("deprecation")
  SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
  ImmutableBytesWritable key = new ImmutableBytesWritable();
  partitions = new ArrayList<ImmutableBytesWritable>();
  while (reader.next(key)) {
    partitions.add(new ImmutableBytesWritable(key.copyBytes()));
  }
  reader.close();
  
  if (!Ordering.natural().isOrdered(partitions)) {
    throw new IOException("Partitions are not ordered!");
  }
}
 
Example 35
Project: ditb   File: IntegrationTestBulkLoad.java   Source Code and License 5 votes vote down vote up
@Override
protected void map(LongWritable key, LongWritable value, Context context)
    throws IOException, InterruptedException {
  long chainId = value.get();
  LOG.info("Starting mapper with chainId:" + chainId);

  byte[] chainIdArray = Bytes.toBytes(chainId);
  long currentRow = 0;

  long chainLength = context.getConfiguration().getLong(CHAIN_LENGTH_KEY, CHAIN_LENGTH);
  long nextRow = getNextRow(0, chainLength);

  for (long i = 0; i < chainLength; i++) {
    byte[] rk = Bytes.toBytes(currentRow);

    // Next link in the chain.
    KeyValue linkKv = new KeyValue(rk, CHAIN_FAM, chainIdArray, Bytes.toBytes(nextRow));
    // What link in the chain this is.
    KeyValue sortKv = new KeyValue(rk, SORT_FAM, chainIdArray, Bytes.toBytes(i));
    // Added data so that large stores are created.
    KeyValue dataKv = new KeyValue(rk, DATA_FAM, chainIdArray,
      Bytes.toBytes(RandomStringUtils.randomAlphabetic(50))
    );

    // Emit the key values.
    context.write(new ImmutableBytesWritable(rk), linkKv);
    context.write(new ImmutableBytesWritable(rk), sortKv);
    context.write(new ImmutableBytesWritable(rk), dataKv);
    // Move to the next row.
    currentRow = nextRow;
    nextRow = getNextRow(i+1, chainLength);
  }
}
 
Example 36
Project: ditb   File: TestTableInputFormatScanBase.java   Source Code and License 5 votes vote down vote up
/**
 * Tests an MR Scan initialized from properties set in the Configuration.
 * 
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
protected void testScanFromConfiguration(String start, String stop, String last)
throws IOException, InterruptedException, ClassNotFoundException {
  String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase() : "Empty") +
    "To" + (stop != null ? stop.toUpperCase() : "Empty");
  Configuration c = new Configuration(TEST_UTIL.getConfiguration());
  c.set(TableInputFormat.INPUT_TABLE, Bytes.toString(TABLE_NAME));
  c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY));
  c.set(KEY_STARTROW, start != null ? start : "");
  c.set(KEY_LASTROW, last != null ? last : "");

  if (start != null) {
    c.set(TableInputFormat.SCAN_ROW_START, start);
  }

  if (stop != null) {
    c.set(TableInputFormat.SCAN_ROW_STOP, stop);
  }

  Job job = new Job(c, jobName);
  job.setMapperClass(ScanMapper.class);
  job.setReducerClass(ScanReducer.class);
  job.setMapOutputKeyClass(ImmutableBytesWritable.class);
  job.setMapOutputValueClass(ImmutableBytesWritable.class);
  job.setInputFormatClass(TableInputFormat.class);
  job.setNumReduceTasks(1);
  FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
  TableMapReduceUtil.addDependencyJars(job);
  assertTrue(job.waitForCompletion(true));
}
 
Example 37
Project: ditb   File: HTableDescriptor.java   Source Code and License 5 votes vote down vote up
/**
 * Remove a coprocessor from those set on the table
 * @param className Class name of the co-processor
 */
public void removeCoprocessor(String className) {
  ImmutableBytesWritable match = null;
  Matcher keyMatcher;
  Matcher valueMatcher;
  for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e : this.values
      .entrySet()) {
    keyMatcher = HConstants.CP_HTD_ATTR_KEY_PATTERN.matcher(Bytes.toString(e
        .getKey().get()));
    if (!keyMatcher.matches()) {
      continue;
    }
    valueMatcher = HConstants.CP_HTD_ATTR_VALUE_PATTERN.matcher(Bytes
        .toString(e.getValue().get()));
    if (!valueMatcher.matches()) {
      continue;
    }
    // get className and compare
    String clazz = valueMatcher.group(2).trim(); // classname is the 2nd field
    // remove the CP if it is present
    if (clazz.equals(className.trim())) {
      match = e.getKey();
      break;
    }
  }
  // if we found a match, remove it
  if (match != null)
    remove(match);
}
 
Example 38
Project: ditb   File: HashTable.java   Source Code and License 5 votes vote down vote up
@Override
protected void setup(Context context) throws IOException, InterruptedException {
  targetBatchSize = context.getConfiguration()
      .getLong(HASH_BATCH_SIZE_CONF_KEY, DEFAULT_BATCH_SIZE);
  hasher = new ResultHasher();
  
  TableSplit split = (TableSplit) context.getInputSplit();
  hasher.startBatch(new ImmutableBytesWritable(split.getStartRow()));
}
 
Example 39
Project: ditb   File: TestTableSnapshotInputFormat.java   Source Code and License 5 votes vote down vote up
public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
    String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
    int expectedNumSplits, boolean shutdownCluster) throws Exception {

  //create the table and snapshot
  createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);

  if (shutdownCluster) {
    util.shutdownMiniHBaseCluster();
  }

  try {
    // create the job
    Job job = new Job(util.getConfiguration());
    Scan scan = new Scan(startRow, endRow); // limit the scan

    job.setJarByClass(util.getClass());
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
      TestTableSnapshotInputFormat.class);

    TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
      scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
      NullWritable.class, job, true, tableDir);

    job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputFormatClass(NullOutputFormat.class);

    Assert.assertTrue(job.waitForCompletion(true));
  } finally {
    if (!shutdownCluster) {
      util.getHBaseAdmin().deleteSnapshot(snapshotName);
      util.deleteTable(tableName);
    }
  }
}
 
Example 40
Project: ditb   File: TestGroupingTableMap.java   Source Code and License 5 votes vote down vote up
@Test
@SuppressWarnings({ "deprecation", "unchecked" })
public void shouldNotCallCollectonSinceFindUniqueKeyValueMoreThanOnes()
    throws Exception {
  GroupingTableMap gTableMap = null;
  try {
    Result result = mock(Result.class);
    Reporter reporter = mock(Reporter.class);
    gTableMap = new GroupingTableMap();
    Configuration cfg = new Configuration();
    cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
    JobConf jobConf = new JobConf(cfg);
    gTableMap.configure(jobConf);

    byte[] row = {};
    List<Cell> keyValues = ImmutableList.<Cell>of(
        new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("1111")),
        new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("2222")),
        new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), Bytes.toBytes("3333")));
    when(result.listCells()).thenReturn(keyValues);
    OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
        mock(OutputCollector.class);
    gTableMap.map(null, result, outputCollectorMock, reporter);
    verify(result).listCells();
    verifyZeroInteractions(outputCollectorMock);
  } finally {
    if (gTableMap != null)
      gTableMap.close();    
  }
}
 
Example 41
Project: ditb   File: SyncTable.java   Source Code and License 5 votes vote down vote up
/**
 * Finish the currently open hash batch.
 * Compare the target hash to the given source hash.
 * If they do not match, then sync the covered key range.
 */
private void finishBatchAndCompareHashes(Context context)
    throws IOException, InterruptedException {
  targetHasher.finishBatch();
  context.getCounter(Counter.BATCHES).increment(1);
  if (targetHasher.getBatchSize() == 0) {
    context.getCounter(Counter.EMPTY_BATCHES).increment(1);
  }
  ImmutableBytesWritable targetHash = targetHasher.getBatchHash();
  if (targetHash.equals(currentSourceHash)) {
    context.getCounter(Counter.HASHES_MATCHED).increment(1);
  } else {
    context.getCounter(Counter.HASHES_NOT_MATCHED).increment(1);
    
    ImmutableBytesWritable stopRow = nextSourceKey == null
                                      ? new ImmutableBytesWritable(sourceTableHash.stopRow)
                                      : nextSourceKey;
    
    if (LOG.isDebugEnabled()) {
      LOG.debug("Hash mismatch.  Key range: " + toHex(targetHasher.getBatchStartKey())
          + " to " + toHex(stopRow)
          + " sourceHash: " + toHex(currentSourceHash)
          + " targetHash: " + toHex(targetHash));
    }
    
    syncRange(context, targetHasher.getBatchStartKey(), stopRow);
  }
}
 
Example 42
Project: ditb   File: TestTableSnapshotInputFormat.java   Source Code and License 5 votes vote down vote up
@Override
protected void map(ImmutableBytesWritable key, Result value,
    Context context) throws IOException, InterruptedException {
  // Validate a single row coming from the snapshot, and emit the row key
  verifyRowFromMap(key, value);
  context.write(key, NullWritable.get());
}
 
Example 43
Project: ditb   File: IntegrationTestBigLinkedList.java   Source Code and License 5 votes vote down vote up
@Override
public void setup(Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation>.Context context)
    throws IOException {
  super.setup(context);
  try {
    this.keysToFind = readKeysToSearch(context.getConfiguration());
    LOG.info("Loaded keys to find: count=" + this.keysToFind.size());
  } catch (InterruptedException e) {
    throw new InterruptedIOException(e.toString());
  }
}
 
Example 44
Project: ditb   File: HFileOutputFormat2.java   Source Code and License 5 votes vote down vote up
/**
 * Return the start keys of all of the regions in this table,
 * as a list of ImmutableBytesWritable.
 */
private static List<ImmutableBytesWritable> getRegionStartKeys(RegionLocator table)
throws IOException {
  byte[][] byteKeys = table.getStartKeys();
  ArrayList<ImmutableBytesWritable> ret =
    new ArrayList<ImmutableBytesWritable>(byteKeys.length);
  for (byte[] byteKey : byteKeys) {
    ret.add(new ImmutableBytesWritable(byteKey));
  }
  return ret;
}
 
Example 45
Project: ditb   File: HFileOutputFormat2.java   Source Code and License 5 votes vote down vote up
/**
 * Write out a {@link SequenceFile} that can be read by
 * {@link TotalOrderPartitioner} that contains the split points in startKeys.
 */
@SuppressWarnings("deprecation")
private static void writePartitions(Configuration conf, Path partitionsPath,
    List<ImmutableBytesWritable> startKeys) throws IOException {
  LOG.info("Writing partition information to " + partitionsPath);
  if (startKeys.isEmpty()) {
    throw new IllegalArgumentException("No regions passed");
  }

  // We're generating a list of split points, and we don't ever
  // have keys < the first region (which has an empty start key)
  // so we need to remove it. Otherwise we would end up with an
  // empty reducer with index 0
  TreeSet<ImmutableBytesWritable> sorted =
    new TreeSet<ImmutableBytesWritable>(startKeys);

  ImmutableBytesWritable first = sorted.first();
  if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
    throw new IllegalArgumentException(
        "First region of table should have empty start key. Instead has: "
        + Bytes.toStringBinary(first.get()));
  }
  sorted.remove(first);

  // Write the actual file
  FileSystem fs = partitionsPath.getFileSystem(conf);
  SequenceFile.Writer writer = SequenceFile.createWriter(
    fs, conf, partitionsPath, ImmutableBytesWritable.class,
    NullWritable.class);

  try {
    for (ImmutableBytesWritable startKey : sorted) {
      writer.append(startKey, NullWritable.get());
    }
  } finally {
    writer.close();
  }
}
 
Example 46
Project: ditb   File: Import.java   Source Code and License 5 votes vote down vote up
/**
 * @param row  The current table row key.
 * @param value  The columns.
 * @param context  The current context.
 * @throws IOException When something is broken with the data.
 */
@Override
public void map(ImmutableBytesWritable row, Result value,
  Context context)
throws IOException {
  try {
    writeResult(row, value, context);
  } catch (InterruptedException e) {
    e.printStackTrace();
  }
}
 
Example 47
Project: ditb   File: Import.java   Source Code and License 5 votes vote down vote up
private void writeResult(ImmutableBytesWritable key, Result result, Context context)
throws IOException, InterruptedException {
  Put put = null;
  Delete delete = null;
  if (LOG.isTraceEnabled()) {
    LOG.trace("Considering the row."
        + Bytes.toString(key.get(), key.getOffset(), key.getLength()));
  }
  if (filter == null || !filter.filterRowKey(key.get(), key.getOffset(), key.getLength())) {
    processKV(key, result, context, put, delete);
  }
}
 
Example 48
Project: ditb   File: TestTableSnapshotInputFormat.java   Source Code and License 5 votes vote down vote up
@Test
public void testInitTableSnapshotMapperJobConfig() throws Exception {
  setupCluster();
  TableName tableName = TableName.valueOf("testInitTableSnapshotMapperJobConfig");
  String snapshotName = "foo";

  try {
    createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
    Job job = new Job(UTIL.getConfiguration());
    Path tmpTableDir = UTIL.getRandomDir();

    TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
      new Scan(), TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
      NullWritable.class, job, false, tmpTableDir);

    // TODO: would be better to examine directly the cache instance that results from this
    // config. Currently this is not possible because BlockCache initialization is static.
    Assert.assertEquals(
      "Snapshot job should be configured for default LruBlockCache.",
      HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
      job.getConfiguration().getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
    Assert.assertEquals(
      "Snapshot job should not use BucketCache.",
      0, job.getConfiguration().getFloat("hbase.bucketcache.size", -1), 0.01);
  } finally {
    UTIL.getHBaseAdmin().deleteSnapshot(snapshotName);
    UTIL.deleteTable(tableName);
    tearDownCluster();
  }
}
 
Example 49
Project: ditb   File: TestHFileOutputFormat.java   Source Code and License 5 votes vote down vote up
protected void map(
    NullWritable n1, NullWritable n2,
    Mapper<NullWritable, NullWritable,
           ImmutableBytesWritable,KeyValue>.Context context)
    throws java.io.IOException ,InterruptedException
{

  byte keyBytes[] = new byte[keyLength];
  byte valBytes[] = new byte[valLength];

  int taskId = context.getTaskAttemptID().getTaskID().getId();
  assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";

  Random random = new Random();
  for (int i = 0; i < ROWSPERSPLIT; i++) {

    random.nextBytes(keyBytes);
    // Ensure that unique tasks generate unique keys
    keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
    random.nextBytes(valBytes);
    ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);

    for (byte[] family : TestHFileOutputFormat.FAMILIES) {
      KeyValue kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes);
      context.write(key, kv);
    }
  }
}
 
Example 50
Project: ditb   File: MultiTableOutputFormat.java   Source Code and License 5 votes vote down vote up
@Override
public RecordWriter<ImmutableBytesWritable, Mutation> getRecordWriter(TaskAttemptContext context)
    throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  return new MultiTableRecordWriter(HBaseConfiguration.create(conf),
      conf.getBoolean(WAL_PROPERTY, WAL_ON));
}
 
Example 51
Project: ditb   File: MultiTableInputFormatTestBase.java   Source Code and License 5 votes vote down vote up
protected void makeAssertions(ImmutableBytesWritable key,
    Iterable<ImmutableBytesWritable> values) {
  int count = 0;
  for (ImmutableBytesWritable value : values) {
    String val = Bytes.toStringBinary(value.get());
    LOG.debug("reduce: key[" + count + "] -> " +
        Bytes.toStringBinary(key.get()) + ", value -> " + val);
    if (first == null) first = val;
    last = val;
    count++;
  }
  assertEquals(3, count);
}
 
Example 52
Project: ditb   File: TestGroupingTableMap.java   Source Code and License 5 votes vote down vote up
@Test
@SuppressWarnings({ "deprecation", "unchecked" })
public void shouldCreateNewKeyAlthoughExtraKey() throws Exception {
  GroupingTableMap gTableMap = null;
  try {
    Result result = mock(Result.class);
    Reporter reporter = mock(Reporter.class);
    gTableMap = new GroupingTableMap();
    Configuration cfg = new Configuration();
    cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
    JobConf jobConf = new JobConf(cfg);
    gTableMap.configure(jobConf);

    byte[] row = {};
    List<Cell> keyValues = ImmutableList.<Cell>of(
        new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("1111")),
        new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), Bytes.toBytes("2222")),
        new KeyValue(row, "familyC".getBytes(), "qualifierC".getBytes(), Bytes.toBytes("3333")));
    when(result.listCells()).thenReturn(keyValues);
    OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock =
        mock(OutputCollector.class);
    gTableMap.map(null, result, outputCollectorMock, reporter);
    verify(result).listCells();
    verify(outputCollectorMock, times(1))
      .collect(any(ImmutableBytesWritable.class), any(Result.class));
    verifyNoMoreInteractions(outputCollectorMock);
  } finally {
    if (gTableMap != null)
      gTableMap.close();
  }
}
 
Example 53
Project: ditb   File: MultiTableInputFormatTestBase.java   Source Code and License 5 votes vote down vote up
public void makeAssertions(ImmutableBytesWritable key, Result value) throws IOException {
  if (value.size() != 1) {
    throw new IOException("There should only be one input column");
  }
  Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
      value.getMap();
  if (!cf.containsKey(INPUT_FAMILY)) {
    throw new IOException("Wrong input columns. Missing: '" +
        Bytes.toString(INPUT_FAMILY) + "'.");
  }
  String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
  LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
      ", value -> " + val);
}
 
Example 54
Project: ditb   File: TestRowCounter.java   Source Code and License 5 votes vote down vote up
@Test
@SuppressWarnings({ "deprecation", "unchecked" })
public void shouldRegInReportEveryIncomingRow() throws IOException {
  int iterationNumber = 999;
  RowCounter.RowCounterMapper mapper = new RowCounter.RowCounterMapper();
  Reporter reporter = mock(Reporter.class);
  for (int i = 0; i < iterationNumber; i++)
    mapper.map(mock(ImmutableBytesWritable.class), mock(Result.class),
        mock(OutputCollector.class), reporter);

  Mockito.verify(reporter, times(iterationNumber)).incrCounter(
      any(Enum.class), anyInt());
}
 
Example 55
Project: ditb   File: TestGroupingTableMapper.java   Source Code and License 5 votes vote down vote up
/**
 * Test GroupingTableMapper class
 */
@Test
public void testGroupingTableMapper() throws Exception {

  GroupingTableMapper mapper = new GroupingTableMapper();
  Configuration configuration = new Configuration();
  configuration.set(GroupingTableMapper.GROUP_COLUMNS, "family1:clm family2:clm");
  mapper.setConf(configuration);

  Result result = mock(Result.class);
  @SuppressWarnings("unchecked")
  Mapper<ImmutableBytesWritable, Result, ImmutableBytesWritable, Result>.Context context =
      mock(Mapper.Context.class);
  context.write(any(ImmutableBytesWritable.class), any(Result.class));
  List<Cell> keyValue = new ArrayList<Cell>();
  byte[] row = {};
  keyValue.add(new KeyValue(row, Bytes.toBytes("family2"), Bytes.toBytes("clm"), Bytes
      .toBytes("value1")));
  keyValue.add(new KeyValue(row, Bytes.toBytes("family1"), Bytes.toBytes("clm"), Bytes
      .toBytes("value2")));
  when(result.listCells()).thenReturn(keyValue);
  mapper.map(null, result, context);
  // template data
  byte[][] data = { Bytes.toBytes("value1"), Bytes.toBytes("value2") };
  ImmutableBytesWritable ibw = mapper.createGroupKey(data);
  verify(context).write(ibw, result);
}
 
Example 56
Project: big_data   File: NewInstallUserMapper.java   Source Code and License 4 votes vote down vote up
/**
 * map 读取hbase中的数据,输入数据为:hbase表中每一行。 输出key类型:StatsUserDimension
 * value类型:TimeOutputValue
 */
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context)
		throws IOException, InterruptedException {
	String uuid = Bytes.toString(value.getValue(family, Bytes.toBytes(EventLogConstants.LOG_COLUMN_NAME_UUID)));
	String serverTime = Bytes
			.toString(value.getValue(family, Bytes.toBytes(EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME)));
	String platform = Bytes
			.toString(value.getValue(family, Bytes.toBytes(EventLogConstants.LOG_COLUMN_NAME_PLATFORM)));
	if (StringUtils.isBlank(uuid) || StringUtils.isBlank(serverTime) || StringUtils.isBlank(platform)) {
		logger.warn("uuid&servertime&platform不能为空");
		return;
	}
	long longOfTime = Long.valueOf(serverTime.trim());
	timeOutputValue.setId(uuid); // 设置id为uuid
	timeOutputValue.setTime(longOfTime); // 设置时间为服务器时间
	DateDimension dateDimension = DateDimension.buildDate(longOfTime, DateEnum.DAY);
	List<PlatformDimension> platformDimensions = PlatformDimension.buildList(platform);

	// 设置date维度
	StatsCommonDimension statsCommonDimension = this.statsUserDimension.getStatsCommon();
	statsCommonDimension.setDate(dateDimension);
	// 写browser相关的数据
	String browserName = Bytes
			.toString(value.getValue(family, Bytes.toBytes(EventLogConstants.LOG_COLUMN_NAME_BROWSER_NAME)));
	String browserVersion = Bytes
			.toString(value.getValue(family, Bytes.toBytes(EventLogConstants.LOG_COLUMN_NAME_BROWSER_VERSION)));
	List<BrowserDimension> browserDimensions = BrowserDimension.buildList(browserName, browserVersion);
	BrowserDimension defaultBrowser = new BrowserDimension("", "");
	for (PlatformDimension pf : platformDimensions) {
		// 1. 设置为一个默认值
		statsUserDimension.setBrowser(defaultBrowser);
		// 2. 解决有空的browser输出的bug
		// statsUserDimension.getBrowser().clean();
		statsCommonDimension.setKpi(newInstallUserKpi);
		statsCommonDimension.setPlatform(pf);
		context.write(statsUserDimension, timeOutputValue);
		for (BrowserDimension br : browserDimensions) {
			statsCommonDimension.setKpi(newInstallUserOfBrowserKpi);
			// 1.
			statsUserDimension.setBrowser(br);
			// 2. 由于上面需要进行clean操作,故将该值进行clone后填充
			// statsUserDimension.setBrowser(WritableUtils.clone(br,
			// context.getConfiguration()));
			context.write(statsUserDimension, timeOutputValue);
		}
	}
}
 
Example 57
Project: ditb   File: TableInputFormatBase.java   Source Code and License 4 votes vote down vote up
/**
 * Builds a TableRecordReader. If no TableRecordReader was provided, uses
 * the default.
 *
 * @see org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit,
 *      JobConf, Reporter)
 */
public RecordReader<ImmutableBytesWritable, Result> getRecordReader(
    InputSplit split, JobConf job, Reporter reporter)
throws IOException {
  // In case a subclass uses the deprecated approach or calls initializeTable directly
  if (table == null) {
    initialize(job);
  }
  // null check in case our child overrides getTable to not throw.
  try {
    if (getTable() == null) {
      // initialize() must not have been implemented in the subclass.
      throw new IOException(INITIALIZATION_ERROR);
    }
  } catch (IllegalStateException exception) {
    throw new IOException(INITIALIZATION_ERROR, exception);
  }

  TableSplit tSplit = (TableSplit) split;
  // if no table record reader was provided use default
  final TableRecordReader trr = this.tableRecordReader == null ? new TableRecordReader() :
      this.tableRecordReader;
  trr.setStartRow(tSplit.getStartRow());
  trr.setEndRow(tSplit.getEndRow());
  trr.setHTable(this.table);
  trr.setInputColumns(this.inputColumns);
  trr.setRowFilter(this.rowFilter);
  trr.init();
  return new RecordReader<ImmutableBytesWritable, Result>() {

    @Override
    public void close() throws IOException {
      trr.close();
      closeTable();
    }

    @Override
    public ImmutableBytesWritable createKey() {
      return trr.createKey();
    }

    @Override
    public Result createValue() {
      return trr.createValue();
    }

    @Override
    public long getPos() throws IOException {
      return trr.getPos();
    }

    @Override
    public float getProgress() throws IOException {
      return trr.getProgress();
    }

    @Override
    public boolean next(ImmutableBytesWritable key, Result value) throws IOException {
      return trr.next(key, value);
    }
  };
}
 
Example 58
Project: ditb   File: TestMultiTableSnapshotInputFormat.java   Source Code and License 4 votes vote down vote up
@Override
public void reduce(ImmutableBytesWritable key, Iterator<ImmutableBytesWritable> values,
    OutputCollector<NullWritable, NullWritable> outputCollector, Reporter reporter)
    throws IOException {
  makeAssertions(key, Lists.newArrayList(values));
}
 
Example 59
Project: ditb   File: TableOutputFormat.java   Source Code and License 4 votes vote down vote up
public void write(ImmutableBytesWritable key, Put value) throws IOException {
  m_mutator.mutate(new Put(value));
}
 
Example 60
Project: ditb   File: MultiTableSnapshotInputFormat.java   Source Code and License 4 votes vote down vote up
@Override
public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf job,
    Reporter reporter) throws IOException {
  return new TableSnapshotRecordReader((TableSnapshotRegionSplit) split, job);
}