Java Code Examples for org.apache.hadoop.hbase.io.hfile.HFile#Reader

The following examples show how to use org.apache.hadoop.hbase.io.hfile.HFile#Reader . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HFileCorruptionChecker.java    From hbase-operator-tools with Apache License 2.0 5 votes vote down vote up
/**
 * Checks a path to see if it is a valid mob file.
 *
 * @param p
 *          full Path to a mob file.
 * @throws IOException
 *           This is a connectivity related exception
 */
protected void checkMobFile(Path p) throws IOException {
  HFile.Reader r = null;
  try {
    r = HFile.createReader(fs, p, cacheConf, true, conf);
  } catch (CorruptHFileException che) {
    LOG.warn("Found corrupt mob file " + p, che);
    corruptedMobFiles.add(p);
    if (inQuarantineMode) {
      Path dest = createQuarantinePath(p);
      LOG.warn("Quarantining corrupt mob file " + p + " into " + dest);
      boolean success = fs.mkdirs(dest.getParent());
      success = success ? fs.rename(p, dest): false;
      if (!success) {
        failureMobFiles.add(p);
      } else {
        quarantinedMobFiles.add(dest);
      }
    }
    return;
  } catch (FileNotFoundException fnfe) {
    LOG.warn("Mob file " + p + " was missing.  Likely removed due to compaction?");
    missedMobFiles.add(p);
  } finally {
    mobFilesChecked.addAndGet(1);
    if (r != null) {
      r.close(true);
    }
  }
}
 
Example 2
Source File: TestHStore.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Verify that compression and data block encoding are respected by the
 * Store.createWriterInTmp() method, used on store flush.
 */
@Test
public void testCreateWriter() throws Exception {
  Configuration conf = HBaseConfiguration.create();
  FileSystem fs = FileSystem.get(conf);

  ColumnFamilyDescriptor hcd = ColumnFamilyDescriptorBuilder.newBuilder(family)
      .setCompressionType(Compression.Algorithm.GZ).setDataBlockEncoding(DataBlockEncoding.DIFF)
      .build();
  init(name.getMethodName(), conf, hcd);

  // Test createWriterInTmp()
  StoreFileWriter writer =
      store.createWriterInTmp(4, hcd.getCompressionType(), false, true, false, false);
  Path path = writer.getPath();
  writer.append(new KeyValue(row, family, qf1, Bytes.toBytes(1)));
  writer.append(new KeyValue(row, family, qf2, Bytes.toBytes(2)));
  writer.append(new KeyValue(row2, family, qf1, Bytes.toBytes(3)));
  writer.append(new KeyValue(row2, family, qf2, Bytes.toBytes(4)));
  writer.close();

  // Verify that compression and encoding settings are respected
  HFile.Reader reader = HFile.createReader(fs, path, new CacheConfig(conf), true, conf);
  assertEquals(hcd.getCompressionType(), reader.getTrailer().getCompressionCodec());
  assertEquals(hcd.getDataBlockEncoding(), reader.getDataBlockEncoding());
  reader.close();
}
 
Example 3
Source File: TestHBaseFsckEncryption.java    From hbase with Apache License 2.0 5 votes vote down vote up
private byte[] extractHFileKey(Path path) throws Exception {
  HFile.Reader reader = HFile.createReader(TEST_UTIL.getTestFileSystem(), path,
    new CacheConfig(conf), true, conf);
  try {
    Encryption.Context cryptoContext = reader.getFileContext().getEncryptionContext();
    assertNotNull("Reader has a null crypto context", cryptoContext);
    Key key = cryptoContext.getKey();
    assertNotNull("Crypto context has no key", key);
    return key.getEncoded();
  } finally {
    reader.close();
  }
}
 
Example 4
Source File: TestBulkLoadHFiles.java    From hbase with Apache License 2.0 5 votes vote down vote up
private int verifyHFile(Path p) throws IOException {
  Configuration conf = util.getConfiguration();
  HFile.Reader reader =
    HFile.createReader(p.getFileSystem(conf), p, new CacheConfig(conf), true, conf);
  HFileScanner scanner = reader.getScanner(false, false);
  scanner.seekTo();
  int count = 0;
  do {
    count++;
  } while (scanner.next());
  assertTrue(count > 0);
  reader.close();
  return count;
}
 
Example 5
Source File: HFileCorruptionChecker.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Checks a path to see if it is a valid mob file.
 *
 * @param p
 *          full Path to a mob file.
 * @throws IOException
 *           This is a connectivity related exception
 */
protected void checkMobFile(Path p) throws IOException {
  HFile.Reader r = null;
  try {
    r = HFile.createReader(fs, p, cacheConf, true, conf);
  } catch (CorruptHFileException che) {
    LOG.warn("Found corrupt mob file " + p, che);
    corruptedMobFiles.add(p);
    if (inQuarantineMode) {
      Path dest = createQuarantinePath(p);
      LOG.warn("Quarantining corrupt mob file " + p + " into " + dest);
      boolean success = fs.mkdirs(dest.getParent());
      success = success ? fs.rename(p, dest): false;
      if (!success) {
        failureMobFiles.add(p);
      } else {
        quarantinedMobFiles.add(dest);
      }
    }
    return;
  } catch (FileNotFoundException fnfe) {
    LOG.warn("Mob file " + p + " was missing.  Likely removed due to compaction?");
    missedMobFiles.add(p);
  } finally {
    mobFilesChecked.addAndGet(1);
    if (r != null) {
      r.close(true);
    }
  }
}
 
Example 6
Source File: HFileCorruptionChecker.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Checks a path to see if it is a valid hfile.
 *
 * @param p
 *          full Path to an HFile
 * @throws IOException
 *           This is a connectivity related exception
 */
protected void checkHFile(Path p) throws IOException {
  HFile.Reader r = null;
  try {
    r = HFile.createReader(fs, p, cacheConf, true, conf);
  } catch (CorruptHFileException che) {
    LOG.warn("Found corrupt HFile " + p, che);
    corrupted.add(p);
    if (inQuarantineMode) {
      Path dest = createQuarantinePath(p);
      LOG.warn("Quarantining corrupt HFile " + p + " into " + dest);
      boolean success = fs.mkdirs(dest.getParent());
      success = success ? fs.rename(p, dest): false;
      if (!success) {
        failures.add(p);
      } else {
        quarantined.add(dest);
      }
    }
    return;
  } catch (FileNotFoundException fnfe) {
    LOG.warn("HFile " + p + " was missing.  Likely removed due to compaction/split?");
    missing.add(p);
  } finally {
    hfilesChecked.addAndGet(1);
    if (r != null) {
      r.close(true);
    }
  }
}
 
Example 7
Source File: TestHMobStore.java    From hbase with Apache License 2.0 5 votes vote down vote up
private void checkMobHFileEncrytption(Collection<HStoreFile> storefiles) {
  HStoreFile storeFile = storefiles.iterator().next();
  HFile.Reader reader = storeFile.getReader().getHFileReader();
  byte[] encryptionKey = reader.getTrailer().getEncryptionKey();
  Assert.assertTrue(null != encryptionKey);
  Assert.assertTrue(reader.getFileContext().getEncryptionContext().getCipher().getName()
      .equals(HConstants.CIPHER_AES));
}
 
Example 8
Source File: TestImportTsv.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Method returns the total KVs in given hfile
 * @param fs File System
 * @param p HFile path
 * @return KV count in the given hfile
 * @throws IOException
 */
private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
  Configuration conf = util.getConfiguration();
  HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
  HFileScanner scanner = reader.getScanner(false, false);
  scanner.seekTo();
  int count = 0;
  do {
    count++;
  } while (scanner.next());
  reader.close();
  return count;
}
 
Example 9
Source File: HFileGeneratorTest.java    From terrapin with Apache License 2.0 5 votes vote down vote up
@Test
public void testGenerateHFiles() throws IOException {
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  int numOfPart = 10;
  int numOfKeys = 1000;
  HFileGenerator.generateHFiles(fs, conf, outputDir,
      PartitionerType.CASCADING, numOfPart, numOfKeys);
  FilenameFilter hfileFilter = new FilenameFilter() {
    @Override
    public boolean accept(File dir, String name) {
      return name.startsWith(Constants.FILE_PREFIX);
    }
  };
  File[] hfiles = outputDir.listFiles(hfileFilter);
  assertEquals(numOfPart, hfiles.length);

  int count = 0;
  for(File hfile : hfiles) {
    HColumnDescriptor columnDescriptor = new HColumnDescriptor();
    columnDescriptor.setBlockCacheEnabled(false);
    HFile.Reader reader =
        HFile.createReader(fs, new Path(hfile.toURI()), new CacheConfig(conf, columnDescriptor));
    count += reader.getEntries();
    reader.close();
  }
  assertEquals(numOfKeys, count);
}
 
Example 10
Source File: TestEncryptionKeyRotation.java    From hbase with Apache License 2.0 5 votes vote down vote up
private static byte[] extractHFileKey(Path path) throws Exception {
  HFile.Reader reader = HFile.createReader(TEST_UTIL.getTestFileSystem(), path,
    new CacheConfig(conf), true, conf);
  try {
    Encryption.Context cryptoContext = reader.getFileContext().getEncryptionContext();
    assertNotNull("Reader has a null crypto context", cryptoContext);
    Key key = cryptoContext.getKey();
    assertNotNull("Crypto context has no key", key);
    return key.getEncoded();
  } finally {
    reader.close();
  }
}
 
Example 11
Source File: HFileStats.java    From warp10-platform with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  
  Configuration conf = new Configuration();
  CacheConfig cacheConf = new CacheConfig(conf);
  
  FileSystem fs = FileSystem.newInstance(conf);
  
  FileStatus[] pathes = fs.globStatus(new Path(args[0]));
  
  long bytes = 0L;
  long cells = 0L;

  for (FileStatus status: pathes) {    
    try {
      HFile.Reader reader = HFile.createReader(fs, status.getPath(), cacheConf, conf);
      bytes += reader.length();
      cells += reader.getEntries();

      System.out.println(status.getPath() + " >>> " + reader.length() + " bytes " + reader.getEntries() + " cells");
    
      reader.close();      
    } catch (Exception e) {
      continue;
    }      
  }

  System.out.println("TOTAL: " + cells + " cells " + bytes + " bytes " + (bytes/(double) cells) + " bytes/cell");
 
  long ts = System.currentTimeMillis();

  System.out.println(ts * 1000 + "// hbase.bytes{} " + bytes);
  System.out.println(ts * 1000 + "// hbase.datapoints{} " + cells);
}
 
Example 12
Source File: MizoHFileIterator.java    From mizo with Apache License 2.0 5 votes vote down vote up
/**
 * Creates an inner HFileScanner object for a given HFile path
 */
public static HFileScanner createScanner(FileSystem fs, Path path) throws IOException {
    Configuration config = fs.getConf();
    HFile.Reader reader = HFile.createReader(fs, path, getCacheConfig(config), config);

    HFileScanner scanner = reader.getScanner(false, false);
    scanner.seekTo();

    return scanner;
}
 
Example 13
Source File: HFileCorruptionChecker.java    From hbase-operator-tools with Apache License 2.0 5 votes vote down vote up
/**
 * Checks a path to see if it is a valid hfile.
 *
 * @param p
 *          full Path to an HFile
 * @throws IOException
 *           This is a connectivity related exception
 */
protected void checkHFile(Path p) throws IOException {
  HFile.Reader r = null;
  try {
    r = HFile.createReader(fs, p, cacheConf, true, conf);
  } catch (CorruptHFileException che) {
    LOG.warn("Found corrupt HFile " + p, che);
    corrupted.add(p);
    if (inQuarantineMode) {
      Path dest = createQuarantinePath(p);
      LOG.warn("Quarantining corrupt HFile " + p + " into " + dest);
      boolean success = fs.mkdirs(dest.getParent());
      success = success ? fs.rename(p, dest): false;
      if (!success) {
        failures.add(p);
      } else {
        quarantined.add(dest);
      }
    }
    return;
  } catch (FileNotFoundException fnfe) {
    LOG.warn("HFile " + p + " was missing.  Likely removed due to compaction/split?");
    missing.add(p);
  } finally {
    hfilesChecked.addAndGet(1);
    if (r != null) {
      r.close(true);
    }
  }
}
 
Example 14
Source File: RestoreTool.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * Calculate region boundaries and add all the column families to the table descriptor
 * @param regionDirList region dir list
 * @return a set of keys to store the boundaries
 */
byte[][] generateBoundaryKeys(ArrayList<Path> regionDirList) throws IOException {
  TreeMap<byte[], Integer> map = new TreeMap<>(Bytes.BYTES_COMPARATOR);
  // Build a set of keys to store the boundaries
  // calculate region boundaries and add all the column families to the table descriptor
  for (Path regionDir : regionDirList) {
    LOG.debug("Parsing region dir: " + regionDir);
    Path hfofDir = regionDir;

    if (!fs.exists(hfofDir)) {
      LOG.warn("HFileOutputFormat dir " + hfofDir + " not found");
    }

    FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
    if (familyDirStatuses == null) {
      throw new IOException("No families found in " + hfofDir);
    }

    for (FileStatus stat : familyDirStatuses) {
      if (!stat.isDirectory()) {
        LOG.warn("Skipping non-directory " + stat.getPath());
        continue;
      }
      boolean isIgnore = false;
      String pathName = stat.getPath().getName();
      for (String ignore : ignoreDirs) {
        if (pathName.contains(ignore)) {
          LOG.warn("Skipping non-family directory" + pathName);
          isIgnore = true;
          break;
        }
      }
      if (isIgnore) {
        continue;
      }
      Path familyDir = stat.getPath();
      LOG.debug("Parsing family dir [" + familyDir.toString() + " in region [" + regionDir + "]");
      // Skip _logs, etc
      if (familyDir.getName().startsWith("_") || familyDir.getName().startsWith(".")) {
        continue;
      }

      // start to parse hfile inside one family dir
      Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
      for (Path hfile : hfiles) {
        if (hfile.getName().startsWith("_") || hfile.getName().startsWith(".")
            || StoreFileInfo.isReference(hfile.getName())
            || HFileLink.isHFileLink(hfile.getName())) {
          continue;
        }
        HFile.Reader reader = HFile.createReader(fs, hfile, conf);
        final byte[] first, last;
        try {
          first = reader.getFirstRowKey().get();
          last = reader.getLastRowKey().get();
          LOG.debug("Trying to figure out region boundaries hfile=" + hfile + " first="
              + Bytes.toStringBinary(first) + " last=" + Bytes.toStringBinary(last));

          // To eventually infer start key-end key boundaries
          Integer value = map.containsKey(first) ? (Integer) map.get(first) : 0;
          map.put(first, value + 1);
          value = map.containsKey(last) ? (Integer) map.get(last) : 0;
          map.put(last, value - 1);
        } finally {
          reader.close();
        }
      }
    }
  }
  return BulkLoadHFilesTool.inferBoundaries(map);
}
 
Example 15
Source File: HBaseFsck.java    From hbase with Apache License 2.0 4 votes vote down vote up
public void checkRegionBoundaries() {
  try {
    ByteArrayComparator comparator = new ByteArrayComparator();
    List<RegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
    final RegionBoundariesInformation currentRegionBoundariesInformation =
        new RegionBoundariesInformation();
    Path hbaseRoot = CommonFSUtils.getRootDir(getConf());
    for (RegionInfo regionInfo : regions) {
      Path tableDir = CommonFSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
      currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
      // For each region, get the start and stop key from the META and compare them to the
      // same information from the Stores.
      Path path = new Path(tableDir, regionInfo.getEncodedName());
      FileSystem fs = path.getFileSystem(getConf());
      FileStatus[] files = fs.listStatus(path);
      // For all the column families in this region...
      byte[] storeFirstKey = null;
      byte[] storeLastKey = null;
      for (FileStatus file : files) {
        String fileName = file.getPath().toString();
        fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
        if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
          FileStatus[] storeFiles = fs.listStatus(file.getPath());
          // For all the stores in this column family.
          for (FileStatus storeFile : storeFiles) {
            HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(),
              CacheConfig.DISABLED, true, getConf());
            if ((reader.getFirstKey() != null)
                && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
                    ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey()) > 0))) {
              storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey().get()).getKey();
            }
            if ((reader.getLastKey() != null)
                && ((storeLastKey == null) || (comparator.compare(storeLastKey,
                    ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey())) < 0)) {
              storeLastKey = ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey();
            }
            reader.close();
          }
        }
      }
      currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
      currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
      currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
      currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
      if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
        currentRegionBoundariesInformation.metaFirstKey = null;
      if (currentRegionBoundariesInformation.metaLastKey.length == 0)
        currentRegionBoundariesInformation.metaLastKey = null;

      // For a region to be correct, we need the META start key to be smaller or equal to the
      // smallest start key from all the stores, and the start key from the next META entry to
      // be bigger than the last key from all the current stores. First region start key is null;
      // Last region end key is null; some regions can be empty and not have any store.

      boolean valid = true;
      // Checking start key.
      if ((currentRegionBoundariesInformation.storesFirstKey != null)
          && (currentRegionBoundariesInformation.metaFirstKey != null)) {
        valid = valid
            && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
              currentRegionBoundariesInformation.metaFirstKey) >= 0;
      }
      // Checking stop key.
      if ((currentRegionBoundariesInformation.storesLastKey != null)
          && (currentRegionBoundariesInformation.metaLastKey != null)) {
        valid = valid
            && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
              currentRegionBoundariesInformation.metaLastKey) < 0;
      }
      if (!valid) {
        errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
          tablesInfo.get(regionInfo.getTable()));
        LOG.warn("Region's boundaries not aligned between stores and META for:");
        LOG.warn(Objects.toString(currentRegionBoundariesInformation));
      }
    }
  } catch (IOException e) {
    LOG.error(e.toString(), e);
  }
}
 
Example 16
Source File: TestHalfStoreFileReader.java    From hbase with Apache License 2.0 4 votes vote down vote up
@Test
public void testHalfScanner() throws IOException {
  String root_dir = TEST_UTIL.getDataTestDir().toString();
  Path p = new Path(root_dir, "test");
  Configuration conf = TEST_UTIL.getConfiguration();
  FileSystem fs = FileSystem.get(conf);
  CacheConfig cacheConf = new CacheConfig(conf);
  HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build();
  HFile.Writer w =
      HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta).create();

  // write some things.
  List<KeyValue> items = genSomeKeys();
  for (KeyValue kv : items) {
    w.append(kv);
  }
  w.close();

  HFile.Reader r = HFile.createReader(fs, p, cacheConf, true, conf);
  Cell midKV = r.midKey().get();
  byte[] midkey = CellUtil.cloneRow(midKV);

  Reference bottom = new Reference(midkey, Reference.Range.bottom);
  Reference top = new Reference(midkey, Reference.Range.top);

  // Ugly code to get the item before the midkey
  KeyValue beforeMidKey = null;
  for (KeyValue item : items) {
    if (CellComparatorImpl.COMPARATOR.compare(item, midKV) >= 0) {
      break;
    }
    beforeMidKey = item;
  }
  System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey));
  System.out.println("beforeMidKey: " + beforeMidKey);

  // Seek on the splitKey, should be in top, not in bottom
  Cell foundKeyValue = doTestOfSeekBefore(p, fs, bottom, midKV, cacheConf);
  assertEquals(beforeMidKey, foundKeyValue);

  // Seek tot the last thing should be the penultimate on the top, the one before the midkey on
  // the bottom.
  foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(items.size() - 1), cacheConf);
  assertEquals(items.get(items.size() - 2), foundKeyValue);

  foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(items.size() - 1), cacheConf);
  assertEquals(beforeMidKey, foundKeyValue);

  // Try and seek before something that is in the bottom.
  foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(0), cacheConf);
  assertNull(foundKeyValue);

  // Try and seek before the first thing.
  foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(0), cacheConf);
  assertNull(foundKeyValue);

  // Try and seek before the second thing in the top and bottom.
  foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(1), cacheConf);
  assertNull(foundKeyValue);

  foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(1), cacheConf);
  assertEquals(items.get(0), foundKeyValue);

  // Try to seek before the splitKey in the top file
  foundKeyValue = doTestOfSeekBefore(p, fs, top, midKV, cacheConf);
  assertNull(foundKeyValue);
}
 
Example 17
Source File: StoreFileReader.java    From hbase with Apache License 2.0 4 votes vote down vote up
private StoreFileReader(HFile.Reader reader, AtomicInteger refCount, ReaderContext context) {
  this.reader = reader;
  bloomFilterType = BloomType.NONE;
  this.refCount = refCount;
  this.context = context;
}
 
Example 18
Source File: TestHFileOutputFormat2.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * Run small MR job.
 */
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
public void testWritingPEData() throws Exception {
  Configuration conf = util.getConfiguration();
  Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
  FileSystem fs = testDir.getFileSystem(conf);

  // Set down this value or we OOME in eclipse.
  conf.setInt("mapreduce.task.io.sort.mb", 20);
  // Write a few files.
  long hregionMaxFilesize = 10 * 1024;
  conf.setLong(HConstants.HREGION_MAX_FILESIZE, hregionMaxFilesize);

  Job job = new Job(conf, "testWritingPEData");
  setupRandomGeneratorMapper(job, false);
  // This partitioner doesn't work well for number keys but using it anyways
  // just to demonstrate how to configure it.
  byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
  byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];

  Arrays.fill(startKey, (byte)0);
  Arrays.fill(endKey, (byte)0xff);

  job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
  // Set start and end rows for partitioner.
  SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
  SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
  job.setReducerClass(CellSortReducer.class);
  job.setOutputFormatClass(HFileOutputFormat2.class);
  job.setNumReduceTasks(4);
  job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName(),
      CellSerialization.class.getName());

  FileOutputFormat.setOutputPath(job, testDir);
  assertTrue(job.waitForCompletion(false));
  FileStatus [] files = fs.listStatus(testDir);
  assertTrue(files.length > 0);

  //check output file num and size.
  for (byte[] family : FAMILIES) {
    long kvCount= 0;
    RemoteIterator<LocatedFileStatus> iterator =
            fs.listFiles(testDir.suffix("/" + new String(family)), true);
    while (iterator.hasNext()) {
      LocatedFileStatus keyFileStatus = iterator.next();
      HFile.Reader reader =
              HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), true, conf);
      HFileScanner scanner = reader.getScanner(false, false, false);

      kvCount += reader.getEntries();
      scanner.seekTo();
      long perKVSize = scanner.getCell().getSerializedSize();
      assertTrue("Data size of each file should not be too large.",
              perKVSize * reader.getEntries() <= hregionMaxFilesize);
    }
    assertEquals("Should write expected data in output file.", ROWSPERSPLIT, kvCount);
  }
}
 
Example 19
Source File: HStore.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * This throws a WrongRegionException if the HFile does not fit in this region, or an
 * InvalidHFileException if the HFile is not valid.
 */
public void assertBulkLoadHFileOk(Path srcPath) throws IOException {
  HFile.Reader reader  = null;
  try {
    LOG.info("Validating hfile at " + srcPath + " for inclusion in " + this);
    FileSystem srcFs = srcPath.getFileSystem(conf);
    srcFs.access(srcPath, FsAction.READ_WRITE);
    reader = HFile.createReader(srcFs, srcPath, cacheConf, isPrimaryReplicaStore(), conf);

    Optional<byte[]> firstKey = reader.getFirstRowKey();
    Preconditions.checkState(firstKey.isPresent(), "First key can not be null");
    Optional<Cell> lk = reader.getLastKey();
    Preconditions.checkState(lk.isPresent(), "Last key can not be null");
    byte[] lastKey =  CellUtil.cloneRow(lk.get());

    if (LOG.isDebugEnabled()) {
      LOG.debug("HFile bounds: first=" + Bytes.toStringBinary(firstKey.get()) +
          " last=" + Bytes.toStringBinary(lastKey));
      LOG.debug("Region bounds: first=" +
          Bytes.toStringBinary(getRegionInfo().getStartKey()) +
          " last=" + Bytes.toStringBinary(getRegionInfo().getEndKey()));
    }

    if (!this.getRegionInfo().containsRange(firstKey.get(), lastKey)) {
      throw new WrongRegionException(
          "Bulk load file " + srcPath.toString() + " does not fit inside region "
          + this.getRegionInfo().getRegionNameAsString());
    }

    if(reader.length() > conf.getLong(HConstants.HREGION_MAX_FILESIZE,
        HConstants.DEFAULT_MAX_FILE_SIZE)) {
      LOG.warn("Trying to bulk load hfile " + srcPath + " with size: " +
          reader.length() + " bytes can be problematic as it may lead to oversplitting.");
    }

    if (verifyBulkLoads) {
      long verificationStartTime = EnvironmentEdgeManager.currentTime();
      LOG.info("Full verification started for bulk load hfile: {}", srcPath);
      Cell prevCell = null;
      HFileScanner scanner = reader.getScanner(false, false, false);
      scanner.seekTo();
      do {
        Cell cell = scanner.getCell();
        if (prevCell != null) {
          if (comparator.compareRows(prevCell, cell) > 0) {
            throw new InvalidHFileException("Previous row is greater than"
                + " current row: path=" + srcPath + " previous="
                + CellUtil.getCellKeyAsString(prevCell) + " current="
                + CellUtil.getCellKeyAsString(cell));
          }
          if (CellComparator.getInstance().compareFamilies(prevCell, cell) != 0) {
            throw new InvalidHFileException("Previous key had different"
                + " family compared to current key: path=" + srcPath
                + " previous="
                + Bytes.toStringBinary(prevCell.getFamilyArray(), prevCell.getFamilyOffset(),
                    prevCell.getFamilyLength())
                + " current="
                + Bytes.toStringBinary(cell.getFamilyArray(), cell.getFamilyOffset(),
                    cell.getFamilyLength()));
          }
        }
        prevCell = cell;
      } while (scanner.next());
      LOG.info("Full verification complete for bulk load hfile: " + srcPath.toString() +
        " took " + (EnvironmentEdgeManager.currentTime() - verificationStartTime) + " ms");
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
}
 
Example 20
Source File: TestHFileOutputFormat2.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * Test that {@link HFileOutputFormat2} RecordWriter writes tags such as ttl into
 * hfile.
 */
@Test
public void test_WritingTagData()
    throws Exception {
  Configuration conf = new Configuration(this.util.getConfiguration());
  final String HFILE_FORMAT_VERSION_CONF_KEY = "hfile.format.version";
  conf.setInt(HFILE_FORMAT_VERSION_CONF_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS);
  RecordWriter<ImmutableBytesWritable, Cell> writer = null;
  TaskAttemptContext context = null;
  Path dir =
      util.getDataTestDir("WritingTagData");
  try {
    conf.set(HFileOutputFormat2.OUTPUT_TABLE_NAME_CONF_KEY, TABLE_NAMES[0].getNameAsString());
    // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
    conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
    Job job = new Job(conf);
    FileOutputFormat.setOutputPath(job, dir);
    context = createTestTaskAttemptContext(job);
    HFileOutputFormat2 hof = new HFileOutputFormat2();
    writer = hof.getRecordWriter(context);
    final byte [] b = Bytes.toBytes("b");

    List< Tag > tags = new ArrayList<>();
    tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(978670)));
    KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b, tags);
    writer.write(new ImmutableBytesWritable(), kv);
    writer.close(context);
    writer = null;
    FileSystem fs = dir.getFileSystem(conf);
    RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true);
    while(iterator.hasNext()) {
      LocatedFileStatus keyFileStatus = iterator.next();
      HFile.Reader reader =
          HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), true, conf);
      HFileScanner scanner = reader.getScanner(false, false, false);
      scanner.seekTo();
      Cell cell = scanner.getCell();
      List<Tag> tagsFromCell = PrivateCellUtil.getTags(cell);
      assertTrue(tagsFromCell.size() > 0);
      for (Tag tag : tagsFromCell) {
        assertTrue(tag.getType() == TagType.TTL_TAG_TYPE);
      }
    }
  } finally {
    if (writer != null && context != null) writer.close(context);
    dir.getFileSystem(conf).delete(dir, true);
  }
}