Java Code Examples for org.apache.hadoop.hbase.io.hfile.HFile#createReader()

The following examples show how to use org.apache.hadoop.hbase.io.hfile.HFile#createReader() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HFileCorruptionChecker.java    From hbase-operator-tools with Apache License 2.0 5 votes vote down vote up
/**
 * Checks a path to see if it is a valid mob file.
 *
 * @param p
 *          full Path to a mob file.
 * @throws IOException
 *           This is a connectivity related exception
 */
protected void checkMobFile(Path p) throws IOException {
  HFile.Reader r = null;
  try {
    r = HFile.createReader(fs, p, cacheConf, true, conf);
  } catch (CorruptHFileException che) {
    LOG.warn("Found corrupt mob file " + p, che);
    corruptedMobFiles.add(p);
    if (inQuarantineMode) {
      Path dest = createQuarantinePath(p);
      LOG.warn("Quarantining corrupt mob file " + p + " into " + dest);
      boolean success = fs.mkdirs(dest.getParent());
      success = success ? fs.rename(p, dest): false;
      if (!success) {
        failureMobFiles.add(p);
      } else {
        quarantinedMobFiles.add(dest);
      }
    }
    return;
  } catch (FileNotFoundException fnfe) {
    LOG.warn("Mob file " + p + " was missing.  Likely removed due to compaction?");
    missedMobFiles.add(p);
  } finally {
    mobFilesChecked.addAndGet(1);
    if (r != null) {
      r.close(true);
    }
  }
}
 
Example 2
Source File: HFileCorruptionChecker.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Checks a path to see if it is a valid mob file.
 *
 * @param p
 *          full Path to a mob file.
 * @throws IOException
 *           This is a connectivity related exception
 */
protected void checkMobFile(Path p) throws IOException {
  HFile.Reader r = null;
  try {
    r = HFile.createReader(fs, p, cacheConf, true, conf);
  } catch (CorruptHFileException che) {
    LOG.warn("Found corrupt mob file " + p, che);
    corruptedMobFiles.add(p);
    if (inQuarantineMode) {
      Path dest = createQuarantinePath(p);
      LOG.warn("Quarantining corrupt mob file " + p + " into " + dest);
      boolean success = fs.mkdirs(dest.getParent());
      success = success ? fs.rename(p, dest): false;
      if (!success) {
        failureMobFiles.add(p);
      } else {
        quarantinedMobFiles.add(dest);
      }
    }
    return;
  } catch (FileNotFoundException fnfe) {
    LOG.warn("Mob file " + p + " was missing.  Likely removed due to compaction?");
    missedMobFiles.add(p);
  } finally {
    mobFilesChecked.addAndGet(1);
    if (r != null) {
      r.close(true);
    }
  }
}
 
Example 3
Source File: TestImportTsv.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Method returns the total KVs in given hfile
 * @param fs File System
 * @param p HFile path
 * @return KV count in the given hfile
 * @throws IOException
 */
private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
  Configuration conf = util.getConfiguration();
  HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
  HFileScanner scanner = reader.getScanner(false, false);
  scanner.seekTo();
  int count = 0;
  do {
    count++;
  } while (scanner.next());
  reader.close();
  return count;
}
 
Example 4
Source File: CompressionTest.java    From hbase with Apache License 2.0 5 votes vote down vote up
public static void doSmokeTest(FileSystem fs, Path path, String codec)
throws Exception {
  Configuration conf = HBaseConfiguration.create();
  HFileContext context = new HFileContextBuilder()
                         .withCompression(HFileWriterImpl.compressionByName(codec)).build();
  HFile.Writer writer = HFile.getWriterFactoryNoCache(conf)
      .withPath(fs, path)
      .withFileContext(context)
      .create();
  // Write any-old Cell...
  final byte [] rowKey = Bytes.toBytes("compressiontestkey");
  Cell c = ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
    .setRow(rowKey)
    .setFamily(HConstants.EMPTY_BYTE_ARRAY)
    .setQualifier(HConstants.EMPTY_BYTE_ARRAY)
    .setTimestamp(HConstants.LATEST_TIMESTAMP)
    .setType(KeyValue.Type.Maximum.getCode())
    .setValue(Bytes.toBytes("compressiontestval"))
    .build();
  writer.append(c);
  writer.appendFileInfo(Bytes.toBytes("compressioninfokey"), Bytes.toBytes("compressioninfoval"));
  writer.close();
  Cell cc = null;
  HFile.Reader reader = HFile.createReader(fs, path, CacheConfig.DISABLED, true, conf);
  try {
    HFileScanner scanner = reader.getScanner(false, true);
    scanner.seekTo(); // position to the start of file
    // Scanner does not do Cells yet. Do below for now till fixed.
    cc = scanner.getCell();
    if (CellComparator.getInstance().compareRows(c, cc) != 0) {
      throw new Exception("Read back incorrect result: " + c.toString() + " vs " + cc.toString());
    }
  } finally {
    reader.close();
  }
}
 
Example 5
Source File: TestBulkLoadHFiles.java    From hbase with Apache License 2.0 5 votes vote down vote up
private int verifyHFile(Path p) throws IOException {
  Configuration conf = util.getConfiguration();
  HFile.Reader reader =
    HFile.createReader(p.getFileSystem(conf), p, new CacheConfig(conf), true, conf);
  HFileScanner scanner = reader.getScanner(false, false);
  scanner.seekTo();
  int count = 0;
  do {
    count++;
  } while (scanner.next());
  assertTrue(count > 0);
  reader.close();
  return count;
}
 
Example 6
Source File: TestHalfStoreFileReader.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Test the scanner and reseek of a half hfile scanner. The scanner API demands that seekTo and
 * reseekTo() only return < 0 if the key lies before the start of the file (with no position on
 * the scanner). Returning 0 if perfect match (rare), and return > 1 if we got an imperfect match.
 * The latter case being the most common, we should generally be returning 1, and if we do, there
 * may or may not be a 'next' in the scanner/file. A bug in the half file scanner was returning -1
 * at the end of the bottom half, and that was causing the infrastructure above to go null causing
 * NPEs and other problems. This test reproduces that failure, and also tests both the bottom and
 * top of the file while we are at it.
 * @throws IOException
 */
@Test
public void testHalfScanAndReseek() throws IOException {
  String root_dir = TEST_UTIL.getDataTestDir().toString();
  Path p = new Path(root_dir, "test");

  Configuration conf = TEST_UTIL.getConfiguration();
  FileSystem fs = FileSystem.get(conf);
  CacheConfig cacheConf = new CacheConfig(conf);
  HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build();
  HFile.Writer w =
      HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta).create();

  // write some things.
  List<KeyValue> items = genSomeKeys();
  for (KeyValue kv : items) {
    w.append(kv);
  }
  w.close();

  HFile.Reader r = HFile.createReader(fs, p, cacheConf, true, conf);
  Cell midKV = r.midKey().get();
  byte[] midkey = CellUtil.cloneRow(midKV);

  // System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey));

  Reference bottom = new Reference(midkey, Reference.Range.bottom);
  doTestOfScanAndReseek(p, fs, bottom, cacheConf);

  Reference top = new Reference(midkey, Reference.Range.top);
  doTestOfScanAndReseek(p, fs, top, cacheConf);

  r.close();
}
 
Example 7
Source File: HFileSortedOplog.java    From gemfirexd-oss with Apache License 2.0 5 votes vote down vote up
public HFileReader() throws IOException {
  try {
    FileSystem fs = fsProvider.getFS();
    reader = HFile.createReader(fs, path, cacheConf);
    fileInfo = reader.loadFileInfo();
    closed = new AtomicBoolean(false);

    validate();
    if (reader.getComparator() instanceof DelegatingSerializedComparator) {
      loadComparators((DelegatingSerializedComparator) reader.getComparator());
    }

    // read the old HLL if it exists so that a CardinalityMergeException will trigger a Major Compaction
    byte[] hll = fileInfo.get(Meta.LOCAL_CARDINALITY_ESTIMATE.toBytes());
    if (hll != null) {
      entryCountEstimate = estimator = HyperLogLog.Builder.build(hll);
    } else if ((hll = fileInfo.get(Meta.LOCAL_CARDINALITY_ESTIMATE_V2.toBytes())) != null) {
      entryCountEstimate = estimator = HyperLogLog.Builder.build(hll);
    } else {
      estimator = new HyperLogLog(HdfsSortedOplogOrganizer.HLL_CONSTANT);
    }
    
    previousFS = fs;
  } catch (IOException e) {
    logger.fine("IO Error while creating reader", e);
    throw e;
  }
}
 
Example 8
Source File: HFileGeneratorTest.java    From terrapin with Apache License 2.0 5 votes vote down vote up
@Test
public void testGenerateHFiles() throws IOException {
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  int numOfPart = 10;
  int numOfKeys = 1000;
  HFileGenerator.generateHFiles(fs, conf, outputDir,
      PartitionerType.CASCADING, numOfPart, numOfKeys);
  FilenameFilter hfileFilter = new FilenameFilter() {
    @Override
    public boolean accept(File dir, String name) {
      return name.startsWith(Constants.FILE_PREFIX);
    }
  };
  File[] hfiles = outputDir.listFiles(hfileFilter);
  assertEquals(numOfPart, hfiles.length);

  int count = 0;
  for(File hfile : hfiles) {
    HColumnDescriptor columnDescriptor = new HColumnDescriptor();
    columnDescriptor.setBlockCacheEnabled(false);
    HFile.Reader reader =
        HFile.createReader(fs, new Path(hfile.toURI()), new CacheConfig(conf, columnDescriptor));
    count += reader.getEntries();
    reader.close();
  }
  assertEquals(numOfKeys, count);
}
 
Example 9
Source File: HFileReader.java    From terrapin with Apache License 2.0 5 votes vote down vote up
public HFileReader(FileSystem fs,
                   String path,
                   CacheConfig cacheConf,
                   FuturePool futurePool) throws IOException {
  this.reader = HFile.createReader(fs, new TerrapinPath(path), cacheConf);
  this.futurePool = futurePool;
  this.fileSet = TerrapinUtil.extractFileSetFromPath(path);
  setUpStatsKeys();
}
 
Example 10
Source File: TestImportTSVWithVisibilityLabels.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Method returns the total KVs in given hfile
 * @param fs File System
 * @param p HFile path
 * @return KV count in the given hfile
 * @throws IOException
 */
private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
  Configuration conf = util.getConfiguration();
  HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
  HFileScanner scanner = reader.getScanner(false, false);
  scanner.seekTo();
  int count = 0;
  do {
    count++;
  } while (scanner.next());
  reader.close();
  return count;
}
 
Example 11
Source File: TestEncryptionRandomKeying.java    From hbase with Apache License 2.0 5 votes vote down vote up
private static byte[] extractHFileKey(Path path) throws Exception {
  HFile.Reader reader = HFile.createReader(TEST_UTIL.getTestFileSystem(), path,
    new CacheConfig(conf), true, conf);
  try {
    Encryption.Context cryptoContext = reader.getFileContext().getEncryptionContext();
    assertNotNull("Reader has a null crypto context", cryptoContext);
    Key key = cryptoContext.getKey();
    if (key == null) {
      return null;
    }
    return key.getEncoded();
  } finally {
    reader.close();
  }
}
 
Example 12
Source File: HFileStats.java    From warp10-platform with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  
  Configuration conf = new Configuration();
  CacheConfig cacheConf = new CacheConfig(conf);
  
  FileSystem fs = FileSystem.newInstance(conf);
  
  FileStatus[] pathes = fs.globStatus(new Path(args[0]));
  
  long bytes = 0L;
  long cells = 0L;

  for (FileStatus status: pathes) {    
    try {
      HFile.Reader reader = HFile.createReader(fs, status.getPath(), cacheConf, conf);
      bytes += reader.length();
      cells += reader.getEntries();

      System.out.println(status.getPath() + " >>> " + reader.length() + " bytes " + reader.getEntries() + " cells");
    
      reader.close();      
    } catch (Exception e) {
      continue;
    }      
  }

  System.out.println("TOTAL: " + cells + " cells " + bytes + " bytes " + (bytes/(double) cells) + " bytes/cell");
 
  long ts = System.currentTimeMillis();

  System.out.println(ts * 1000 + "// hbase.bytes{} " + bytes);
  System.out.println(ts * 1000 + "// hbase.datapoints{} " + cells);
}
 
Example 13
Source File: HFileSortedOplog.java    From gemfirexd-oss with Apache License 2.0 5 votes vote down vote up
public HFileReader() throws IOException {
  try {
    FileSystem fs = fsProvider.getFS();
    reader = HFile.createReader(fs, path, cacheConf);
    fileInfo = reader.loadFileInfo();
    closed = new AtomicBoolean(false);

    validate();
    if (reader.getComparator() instanceof DelegatingSerializedComparator) {
      loadComparators((DelegatingSerializedComparator) reader.getComparator());
    }

    // read the old HLL if it exists so that a CardinalityMergeException will trigger a Major Compaction
    byte[] hll = fileInfo.get(Meta.LOCAL_CARDINALITY_ESTIMATE.toBytes());
    if (hll != null) {
      entryCountEstimate = estimator = HyperLogLog.Builder.build(hll);
    } else if ((hll = fileInfo.get(Meta.LOCAL_CARDINALITY_ESTIMATE_V2.toBytes())) != null) {
      entryCountEstimate = estimator = HyperLogLog.Builder.build(hll);
    } else {
      estimator = new HyperLogLog(HdfsSortedOplogOrganizer.HLL_CONSTANT);
    }
    
    previousFS = fs;
  } catch (IOException e) {
    logger.fine("IO Error while creating reader", e);
    throw e;
  }
}
 
Example 14
Source File: TestHStore.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Verify that compression and data block encoding are respected by the
 * Store.createWriterInTmp() method, used on store flush.
 */
@Test
public void testCreateWriter() throws Exception {
  Configuration conf = HBaseConfiguration.create();
  FileSystem fs = FileSystem.get(conf);

  ColumnFamilyDescriptor hcd = ColumnFamilyDescriptorBuilder.newBuilder(family)
      .setCompressionType(Compression.Algorithm.GZ).setDataBlockEncoding(DataBlockEncoding.DIFF)
      .build();
  init(name.getMethodName(), conf, hcd);

  // Test createWriterInTmp()
  StoreFileWriter writer =
      store.createWriterInTmp(4, hcd.getCompressionType(), false, true, false, false);
  Path path = writer.getPath();
  writer.append(new KeyValue(row, family, qf1, Bytes.toBytes(1)));
  writer.append(new KeyValue(row, family, qf2, Bytes.toBytes(2)));
  writer.append(new KeyValue(row2, family, qf1, Bytes.toBytes(3)));
  writer.append(new KeyValue(row2, family, qf2, Bytes.toBytes(4)));
  writer.close();

  // Verify that compression and encoding settings are respected
  HFile.Reader reader = HFile.createReader(fs, path, new CacheConfig(conf), true, conf);
  assertEquals(hcd.getCompressionType(), reader.getTrailer().getCompressionCodec());
  assertEquals(hcd.getDataBlockEncoding(), reader.getDataBlockEncoding());
  reader.close();
}
 
Example 15
Source File: HDFSSplitIteratorJUnitTest.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
public void testNHoplogNBlockIter() throws Exception {
  Path path1 = new Path(testDataDir, "region/0/1-1-1.hop");
  Hoplog oplog = new HFileSortedOplog(hdfsStore, path1,
      blockCache, stats, storeStats);
  createHoplog(2000, oplog);
  
  FileSystem fs = hdfsStore.getFileSystem();
  Reader reader = HFile.createReader(fs, path1, new CacheConfig(fs.getConf()));
  BlockIndexReader bir = reader.getDataBlockIndexReader();
  int blockCount = bir.getRootBlockCount();
  reader.close();
  
  // make sure there are more than 1 hfile blocks in the hoplog
  assertTrue(1 < blockCount);
  
  Path path2 = new Path(testDataDir, "region/0/1-2-1.hop");
  oplog = new HFileSortedOplog(hdfsStore, path2,
      blockCache, stats, storeStats);
  createHoplog(2000, oplog);

  Path path3 = new Path(testDataDir, "region/0/1-3-1.hop");
  oplog = new HFileSortedOplog(hdfsStore, path3,
      blockCache, stats, storeStats);
  createHoplog(2000, oplog);
  
  Path[] paths = {path1, path2, path3, path1, path2, path3};
  long half = oplog.getSize()/2;
  long[] starts = {0, 0, 0, half + 1, half + 1, half + 1};
  long[] lengths = {half, half, half, oplog.getSize(), oplog.getSize(), oplog.getSize()};
  HDFSSplitIterator iter = HDFSSplitIterator.newInstance(
      hdfsStore.getFileSystem(), paths, starts, lengths, 0, 0);
  
  int[] keyCounts = new int[2000];
  while (iter.hasNext()) {
    boolean success = iter.next();
    assertTrue(success);
    String key = new String((byte[])iter.getKey()).substring("key-".length());
    keyCounts[Integer.valueOf(key) - 100000] ++;
  }
  
  for (int i : keyCounts) {
    assertEquals(3, i);
  }
}
 
Example 16
Source File: HBaseFsck.java    From hbase with Apache License 2.0 4 votes vote down vote up
public void checkRegionBoundaries() {
  try {
    ByteArrayComparator comparator = new ByteArrayComparator();
    List<RegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
    final RegionBoundariesInformation currentRegionBoundariesInformation =
        new RegionBoundariesInformation();
    Path hbaseRoot = CommonFSUtils.getRootDir(getConf());
    for (RegionInfo regionInfo : regions) {
      Path tableDir = CommonFSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
      currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
      // For each region, get the start and stop key from the META and compare them to the
      // same information from the Stores.
      Path path = new Path(tableDir, regionInfo.getEncodedName());
      FileSystem fs = path.getFileSystem(getConf());
      FileStatus[] files = fs.listStatus(path);
      // For all the column families in this region...
      byte[] storeFirstKey = null;
      byte[] storeLastKey = null;
      for (FileStatus file : files) {
        String fileName = file.getPath().toString();
        fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
        if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
          FileStatus[] storeFiles = fs.listStatus(file.getPath());
          // For all the stores in this column family.
          for (FileStatus storeFile : storeFiles) {
            HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(),
              CacheConfig.DISABLED, true, getConf());
            if ((reader.getFirstKey() != null)
                && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
                    ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey()) > 0))) {
              storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey().get()).getKey();
            }
            if ((reader.getLastKey() != null)
                && ((storeLastKey == null) || (comparator.compare(storeLastKey,
                    ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey())) < 0)) {
              storeLastKey = ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey();
            }
            reader.close();
          }
        }
      }
      currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
      currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
      currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
      currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
      if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
        currentRegionBoundariesInformation.metaFirstKey = null;
      if (currentRegionBoundariesInformation.metaLastKey.length == 0)
        currentRegionBoundariesInformation.metaLastKey = null;

      // For a region to be correct, we need the META start key to be smaller or equal to the
      // smallest start key from all the stores, and the start key from the next META entry to
      // be bigger than the last key from all the current stores. First region start key is null;
      // Last region end key is null; some regions can be empty and not have any store.

      boolean valid = true;
      // Checking start key.
      if ((currentRegionBoundariesInformation.storesFirstKey != null)
          && (currentRegionBoundariesInformation.metaFirstKey != null)) {
        valid = valid
            && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
              currentRegionBoundariesInformation.metaFirstKey) >= 0;
      }
      // Checking stop key.
      if ((currentRegionBoundariesInformation.storesLastKey != null)
          && (currentRegionBoundariesInformation.metaLastKey != null)) {
        valid = valid
            && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
              currentRegionBoundariesInformation.metaLastKey) < 0;
      }
      if (!valid) {
        errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
          tablesInfo.get(regionInfo.getTable()));
        LOG.warn("Region's boundaries not aligned between stores and META for:");
        LOG.warn(Objects.toString(currentRegionBoundariesInformation));
      }
    }
  } catch (IOException e) {
    LOG.error(e.toString(), e);
  }
}
 
Example 17
Source File: TestHalfStoreFileReader.java    From hbase with Apache License 2.0 4 votes vote down vote up
@Test
public void testHalfScanner() throws IOException {
  String root_dir = TEST_UTIL.getDataTestDir().toString();
  Path p = new Path(root_dir, "test");
  Configuration conf = TEST_UTIL.getConfiguration();
  FileSystem fs = FileSystem.get(conf);
  CacheConfig cacheConf = new CacheConfig(conf);
  HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build();
  HFile.Writer w =
      HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta).create();

  // write some things.
  List<KeyValue> items = genSomeKeys();
  for (KeyValue kv : items) {
    w.append(kv);
  }
  w.close();

  HFile.Reader r = HFile.createReader(fs, p, cacheConf, true, conf);
  Cell midKV = r.midKey().get();
  byte[] midkey = CellUtil.cloneRow(midKV);

  Reference bottom = new Reference(midkey, Reference.Range.bottom);
  Reference top = new Reference(midkey, Reference.Range.top);

  // Ugly code to get the item before the midkey
  KeyValue beforeMidKey = null;
  for (KeyValue item : items) {
    if (CellComparatorImpl.COMPARATOR.compare(item, midKV) >= 0) {
      break;
    }
    beforeMidKey = item;
  }
  System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey));
  System.out.println("beforeMidKey: " + beforeMidKey);

  // Seek on the splitKey, should be in top, not in bottom
  Cell foundKeyValue = doTestOfSeekBefore(p, fs, bottom, midKV, cacheConf);
  assertEquals(beforeMidKey, foundKeyValue);

  // Seek tot the last thing should be the penultimate on the top, the one before the midkey on
  // the bottom.
  foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(items.size() - 1), cacheConf);
  assertEquals(items.get(items.size() - 2), foundKeyValue);

  foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(items.size() - 1), cacheConf);
  assertEquals(beforeMidKey, foundKeyValue);

  // Try and seek before something that is in the bottom.
  foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(0), cacheConf);
  assertNull(foundKeyValue);

  // Try and seek before the first thing.
  foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(0), cacheConf);
  assertNull(foundKeyValue);

  // Try and seek before the second thing in the top and bottom.
  foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(1), cacheConf);
  assertNull(foundKeyValue);

  foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(1), cacheConf);
  assertEquals(items.get(0), foundKeyValue);

  // Try to seek before the splitKey in the top file
  foundKeyValue = doTestOfSeekBefore(p, fs, top, midKV, cacheConf);
  assertNull(foundKeyValue);
}
 
Example 18
Source File: TestHFileOutputFormat2.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * Run small MR job.
 */
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
public void testWritingPEData() throws Exception {
  Configuration conf = util.getConfiguration();
  Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
  FileSystem fs = testDir.getFileSystem(conf);

  // Set down this value or we OOME in eclipse.
  conf.setInt("mapreduce.task.io.sort.mb", 20);
  // Write a few files.
  long hregionMaxFilesize = 10 * 1024;
  conf.setLong(HConstants.HREGION_MAX_FILESIZE, hregionMaxFilesize);

  Job job = new Job(conf, "testWritingPEData");
  setupRandomGeneratorMapper(job, false);
  // This partitioner doesn't work well for number keys but using it anyways
  // just to demonstrate how to configure it.
  byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
  byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];

  Arrays.fill(startKey, (byte)0);
  Arrays.fill(endKey, (byte)0xff);

  job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
  // Set start and end rows for partitioner.
  SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
  SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
  job.setReducerClass(CellSortReducer.class);
  job.setOutputFormatClass(HFileOutputFormat2.class);
  job.setNumReduceTasks(4);
  job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName(),
      CellSerialization.class.getName());

  FileOutputFormat.setOutputPath(job, testDir);
  assertTrue(job.waitForCompletion(false));
  FileStatus [] files = fs.listStatus(testDir);
  assertTrue(files.length > 0);

  //check output file num and size.
  for (byte[] family : FAMILIES) {
    long kvCount= 0;
    RemoteIterator<LocatedFileStatus> iterator =
            fs.listFiles(testDir.suffix("/" + new String(family)), true);
    while (iterator.hasNext()) {
      LocatedFileStatus keyFileStatus = iterator.next();
      HFile.Reader reader =
              HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), true, conf);
      HFileScanner scanner = reader.getScanner(false, false, false);

      kvCount += reader.getEntries();
      scanner.seekTo();
      long perKVSize = scanner.getCell().getSerializedSize();
      assertTrue("Data size of each file should not be too large.",
              perKVSize * reader.getEntries() <= hregionMaxFilesize);
    }
    assertEquals("Should write expected data in output file.", ROWSPERSPLIT, kvCount);
  }
}
 
Example 19
Source File: HStore.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * This throws a WrongRegionException if the HFile does not fit in this region, or an
 * InvalidHFileException if the HFile is not valid.
 */
public void assertBulkLoadHFileOk(Path srcPath) throws IOException {
  HFile.Reader reader  = null;
  try {
    LOG.info("Validating hfile at " + srcPath + " for inclusion in " + this);
    FileSystem srcFs = srcPath.getFileSystem(conf);
    srcFs.access(srcPath, FsAction.READ_WRITE);
    reader = HFile.createReader(srcFs, srcPath, cacheConf, isPrimaryReplicaStore(), conf);

    Optional<byte[]> firstKey = reader.getFirstRowKey();
    Preconditions.checkState(firstKey.isPresent(), "First key can not be null");
    Optional<Cell> lk = reader.getLastKey();
    Preconditions.checkState(lk.isPresent(), "Last key can not be null");
    byte[] lastKey =  CellUtil.cloneRow(lk.get());

    if (LOG.isDebugEnabled()) {
      LOG.debug("HFile bounds: first=" + Bytes.toStringBinary(firstKey.get()) +
          " last=" + Bytes.toStringBinary(lastKey));
      LOG.debug("Region bounds: first=" +
          Bytes.toStringBinary(getRegionInfo().getStartKey()) +
          " last=" + Bytes.toStringBinary(getRegionInfo().getEndKey()));
    }

    if (!this.getRegionInfo().containsRange(firstKey.get(), lastKey)) {
      throw new WrongRegionException(
          "Bulk load file " + srcPath.toString() + " does not fit inside region "
          + this.getRegionInfo().getRegionNameAsString());
    }

    if(reader.length() > conf.getLong(HConstants.HREGION_MAX_FILESIZE,
        HConstants.DEFAULT_MAX_FILE_SIZE)) {
      LOG.warn("Trying to bulk load hfile " + srcPath + " with size: " +
          reader.length() + " bytes can be problematic as it may lead to oversplitting.");
    }

    if (verifyBulkLoads) {
      long verificationStartTime = EnvironmentEdgeManager.currentTime();
      LOG.info("Full verification started for bulk load hfile: {}", srcPath);
      Cell prevCell = null;
      HFileScanner scanner = reader.getScanner(false, false, false);
      scanner.seekTo();
      do {
        Cell cell = scanner.getCell();
        if (prevCell != null) {
          if (comparator.compareRows(prevCell, cell) > 0) {
            throw new InvalidHFileException("Previous row is greater than"
                + " current row: path=" + srcPath + " previous="
                + CellUtil.getCellKeyAsString(prevCell) + " current="
                + CellUtil.getCellKeyAsString(cell));
          }
          if (CellComparator.getInstance().compareFamilies(prevCell, cell) != 0) {
            throw new InvalidHFileException("Previous key had different"
                + " family compared to current key: path=" + srcPath
                + " previous="
                + Bytes.toStringBinary(prevCell.getFamilyArray(), prevCell.getFamilyOffset(),
                    prevCell.getFamilyLength())
                + " current="
                + Bytes.toStringBinary(cell.getFamilyArray(), cell.getFamilyOffset(),
                    cell.getFamilyLength()));
          }
        }
        prevCell = cell;
      } while (scanner.next());
      LOG.info("Full verification complete for bulk load hfile: " + srcPath.toString() +
        " took " + (EnvironmentEdgeManager.currentTime() - verificationStartTime) + " ms");
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
}
 
Example 20
Source File: TestHFileOutputFormat2.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * Test that {@link HFileOutputFormat2} RecordWriter writes tags such as ttl into
 * hfile.
 */
@Test
public void test_WritingTagData()
    throws Exception {
  Configuration conf = new Configuration(this.util.getConfiguration());
  final String HFILE_FORMAT_VERSION_CONF_KEY = "hfile.format.version";
  conf.setInt(HFILE_FORMAT_VERSION_CONF_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS);
  RecordWriter<ImmutableBytesWritable, Cell> writer = null;
  TaskAttemptContext context = null;
  Path dir =
      util.getDataTestDir("WritingTagData");
  try {
    conf.set(HFileOutputFormat2.OUTPUT_TABLE_NAME_CONF_KEY, TABLE_NAMES[0].getNameAsString());
    // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
    conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
    Job job = new Job(conf);
    FileOutputFormat.setOutputPath(job, dir);
    context = createTestTaskAttemptContext(job);
    HFileOutputFormat2 hof = new HFileOutputFormat2();
    writer = hof.getRecordWriter(context);
    final byte [] b = Bytes.toBytes("b");

    List< Tag > tags = new ArrayList<>();
    tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(978670)));
    KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b, tags);
    writer.write(new ImmutableBytesWritable(), kv);
    writer.close(context);
    writer = null;
    FileSystem fs = dir.getFileSystem(conf);
    RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true);
    while(iterator.hasNext()) {
      LocatedFileStatus keyFileStatus = iterator.next();
      HFile.Reader reader =
          HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), true, conf);
      HFileScanner scanner = reader.getScanner(false, false, false);
      scanner.seekTo();
      Cell cell = scanner.getCell();
      List<Tag> tagsFromCell = PrivateCellUtil.getTags(cell);
      assertTrue(tagsFromCell.size() > 0);
      for (Tag tag : tagsFromCell) {
        assertTrue(tag.getType() == TagType.TTL_TAG_TYPE);
      }
    }
  } finally {
    if (writer != null && context != null) writer.close(context);
    dir.getFileSystem(conf).delete(dir, true);
  }
}