Java Code Examples for org.apache.hadoop.conf.Configuration#setLong()

The following examples show how to use org.apache.hadoop.conf.Configuration#setLong() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: find_nth_driver.java    From MLHadoop with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("deprecation")
public static String runSafely (String[] args, long n) throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf= new Configuration();
  FileSystem hdfs=FileSystem.get(conf);
  // Deleting previous stored nth row
  hdfs.delete(new Path(args[1]));
  conf.setLong("n", n);
  Job job = new Job(conf);

  job.setJarByClass(find_nth_driver.class);

  job.setJobName("Finds the nth row of the HDFS file");

  FileInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));

  job.setMapperClass(find_nth_mapper.class);
  job.setNumReduceTasks(0);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(LongAndTextWritable.class);

  job.waitForCompletion(true);
  
  return readNthRow(args[1], conf);
}
 
Example 2
Source File: TestMapCollection.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testRandom() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(Job.COMPLETION_POLL_INTERVAL_KEY, 100);
  Job job = Job.getInstance(conf);
  conf = job.getConfiguration();
  conf.setInt(MRJobConfig.IO_SORT_MB, 1);
  conf.setClass("test.mapcollection.class", RandomFactory.class,
      RecordFactory.class);
  final Random r = new Random();
  final long seed = r.nextLong();
  LOG.info("SEED: " + seed);
  r.setSeed(seed);
  conf.set(MRJobConfig.MAP_SORT_SPILL_PERCENT,
      Float.toString(Math.max(0.1f, r.nextFloat())));
  RandomFactory.setLengths(conf, r, 1 << 14);
  conf.setInt("test.spillmap.records", r.nextInt(500));
  conf.setLong("test.randomfactory.seed", r.nextLong());
  runTest("random", job);
}
 
Example 3
Source File: RowCounter.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Sets up the actual job.
 *
 * @param conf  The current configuration.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public Job createSubmittableJob(Configuration conf) throws IOException {
  Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
  job.setJarByClass(RowCounter.class);
  Scan scan = new Scan();
  scan.setCacheBlocks(false);
  setScanFilter(scan, rowRangeList);

  for (String columnName : this.columns) {
    String family = StringUtils.substringBefore(columnName, ":");
    String qualifier = StringUtils.substringAfter(columnName, ":");
    if (StringUtils.isBlank(qualifier)) {
      scan.addFamily(Bytes.toBytes(family));
    } else {
      scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
    }
  }

  if(this.expectedCount >= 0) {
    conf.setLong(EXPECTED_COUNT_KEY, this.expectedCount);
  }

  scan.setTimeRange(startTime, endTime);
  job.setOutputFormatClass(NullOutputFormat.class);
  TableMapReduceUtil.initTableMapperJob(tableName, scan,
    RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
  job.setNumReduceTasks(0);
  return job;
}
 
Example 4
Source File: TestSnapshot.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
  conf = new Configuration();
  conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCKSIZE);
  cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION)
      .build();
  cluster.waitActive();

  fsn = cluster.getNamesystem();
  fsdir = fsn.getFSDirectory();
  hdfs = cluster.getFileSystem();
  dirTree = new TestDirectoryTree(DIRECTORY_TREE_LEVEL, hdfs);
}
 
Example 5
Source File: TestFiHFlush.java    From big-c with Apache License 2.0 5 votes vote down vote up
/** Similar to {@link #hFlushFi01_b()} but writing happens
 * across block and checksum's boundaries
 */
@Test
public void hFlushFi01_c() throws Exception { 
  final String methodName = FiTestUtil.getMethodName();
  Configuration conf = new HdfsConfiguration();
  int customPerChecksumSize = 400;
  int customBlockSize = customPerChecksumSize * 3;
  conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
  conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
  runDiskErrorTest(conf, methodName, 
      customBlockSize, new DerrAction(methodName, 0), 0, true);
}
 
Example 6
Source File: TestWriteBlockGetsBlockLengthHint.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void blockLengthHintIsPropagated() throws IOException {
  final String METHOD_NAME = GenericTestUtils.getMethodName();
  final Path path = new Path("/" + METHOD_NAME + ".dat");

  Configuration conf = new HdfsConfiguration();
  FsDatasetChecker.setFactory(conf);
  conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DEFAULT_BLOCK_LENGTH);
  conf.setInt(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, -1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();

  try {
    cluster.waitActive();

    // FsDatasetChecker#createRbw asserts during block creation if the test
    // fails.
    DFSTestUtil.createFile(
        cluster.getFileSystem(),
        path,
        4096,  // Buffer size.
        EXPECTED_BLOCK_LENGTH,
        EXPECTED_BLOCK_LENGTH,
        (short) 1,
        0x1BAD5EED);
  } finally {
    cluster.shutdown();
  }
}
 
Example 7
Source File: TestProcessCorruptBlocks.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * The corrupt block has to be removed when the number of valid replicas
 * matches replication factor for the file. In this the above condition is
 * tested by reducing the replication factor 
 * The test strategy : 
 *   Bring up Cluster with 3 DataNodes
 *   Create a file of replication factor 3 
 *   Corrupt one replica of a block of the file 
 *   Verify that there are still 2 good replicas and 1 corrupt replica
 *    (corrupt replica should not be removed since number of good
 *     replicas (2) is less than replication factor (3))
 *   Set the replication factor to 2 
 *   Verify that the corrupt replica is removed. 
 *     (corrupt replica  should not be removed since number of good
 *      replicas (2) is equal to replication factor (2))
 */
@Test
public void testWhenDecreasingReplication() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
  FileSystem fs = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    corruptBlock(cluster, fs, fileName, 0, block);

    DFSTestUtil.waitReplication(fs, fileName, (short) 2);

    assertEquals(2, countReplicas(namesystem, block).liveReplicas());
    assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

    namesystem.setReplication(fileName.toString(), (short) 2);

    // wait for 3 seconds so that all block reports are processed.
    try {
      Thread.sleep(3000);
    } catch (InterruptedException ignored) {
    }

    assertEquals(2, countReplicas(namesystem, block).liveReplicas());
    assertEquals(0, countReplicas(namesystem, block).corruptReplicas());

  } finally {
    cluster.shutdown();
  }
}
 
Example 8
Source File: TestDecayRpcScheduler.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testParsePeriod() {
  // By default
  scheduler = new DecayRpcScheduler(1, "", new Configuration());
  assertEquals(DecayRpcScheduler.IPC_CALLQUEUE_DECAYSCHEDULER_PERIOD_DEFAULT,
    scheduler.getDecayPeriodMillis());

  // Custom
  Configuration conf = new Configuration();
  conf.setLong("ns." + DecayRpcScheduler.IPC_CALLQUEUE_DECAYSCHEDULER_PERIOD_KEY,
    1058);
  scheduler = new DecayRpcScheduler(1, "ns", conf);
  assertEquals(1058L, scheduler.getDecayPeriodMillis());
}
 
Example 9
Source File: TestHFlush.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * The test calls
 * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
 * while requiring the semantic of {@link SyncFlag#UPDATE_LENGTH}.
 * Similar with {@link #hFlush_03()} , it writes a file with a custom block
 * size so the writes will be happening across block's and checksum'
 * boundaries.
 */
@Test
public void hSyncUpdateLength_03() throws IOException {
  Configuration conf = new HdfsConfiguration();
  int customPerChecksumSize = 400;
  int customBlockSize = customPerChecksumSize * 3;
  // Modify defaul filesystem settings
  conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
  conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);

  doTheJob(conf, fName, customBlockSize, (short) 2, true,
      EnumSet.of(SyncFlag.UPDATE_LENGTH));
}
 
Example 10
Source File: TestShuffleVertexManagerUtils.java    From tez with Apache License 2.0 5 votes vote down vote up
static ShuffleVertexManager createShuffleVertexManager(
    Configuration conf, VertexManagerPluginContext context,
    Boolean enableAutoParallelism, Long desiredTaskInputSize, Float min,
    Float max) {
  if (min != null) {
    conf.setFloat(
        ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION,
            min);
  } else {
    conf.unset(
        ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION);
  }
  if (max != null) {
    conf.setFloat(
        ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION,
            max);
  } else {
    conf.unset(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION);
  }
  if (enableAutoParallelism != null) {
    conf.setBoolean(
        ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL,
        enableAutoParallelism);
  }
  if (desiredTaskInputSize != null) {
    conf.setLong(
        ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE,
        desiredTaskInputSize);
  }
  UserPayload payload;
  try {
    payload = TezUtils.createUserPayloadFromConf(conf);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  when(context.getUserPayload()).thenReturn(payload);
  ShuffleVertexManager manager = new ShuffleVertexManager(context);
  manager.initialize();
  return manager;
}
 
Example 11
Source File: GoogleHadoopFileSystemIntegrationTest.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/** Validates success path in initialize(). */
@Test
@Override
public void testInitializeSuccess() throws IOException {
  // Reuse loadConfig() to initialize auth related settings.
  Configuration config = loadConfig();

  // Set up remaining settings to known test values.
  int bufferSize = 512;
  config.setInt(
      GoogleHadoopFileSystemConfiguration.GCS_INPUT_STREAM_BUFFER_SIZE.getKey(), bufferSize);
  long blockSize = 1024;
  config.setLong(GoogleHadoopFileSystemConfiguration.BLOCK_SIZE.getKey(), blockSize);
  String rootBucketName = ghfsHelper.getUniqueBucketName("initialize-root");

  URI initUri = new Path("gs://" + rootBucketName).toUri();
  GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem();
  fs.initialize(initUri, config);
  GoogleCloudStorageOptions cloudStorageOptions =
      fs.getGcsFs().getOptions().getCloudStorageOptions();

  // Verify that config settings were set correctly.
  assertThat(cloudStorageOptions.getReadChannelOptions().getBufferSize()).isEqualTo(bufferSize);
  assertThat(fs.getDefaultBlockSize()).isEqualTo(blockSize);
  assertThat(fs.initUri).isEqualTo(initUri);
  assertThat(fs.getRootBucketName()).isEqualTo(rootBucketName);
}
 
Example 12
Source File: MneConfigHelper.java    From mnemonic with Apache License 2.0 4 votes vote down vote up
public static void setSlotKeyId(Configuration conf, String prefix, long keyid) {
  conf.setLong(getConfigName(prefix, SLOT_KEY_ID), keyid);
}
 
Example 13
Source File: TestHeapMemoryManager.java    From hbase with Apache License 2.0 4 votes vote down vote up
@Test
public void testWhenClusterIsReadHeavy() throws Exception {
  BlockCacheStub blockCache = new BlockCacheStub((long) (maxHeapSize * 0.4));
  Configuration conf = HBaseConfiguration.create();
  conf.setFloat(MemorySizeUtil.MEMSTORE_SIZE_LOWER_LIMIT_KEY, 0.7f);
  conf.setFloat(HeapMemoryManager.MEMSTORE_SIZE_MAX_RANGE_KEY, 0.75f);
  conf.setFloat(HeapMemoryManager.MEMSTORE_SIZE_MIN_RANGE_KEY, 0.10f);
  conf.setFloat(HeapMemoryManager.BLOCK_CACHE_SIZE_MAX_RANGE_KEY, 0.7f);
  conf.setFloat(HeapMemoryManager.BLOCK_CACHE_SIZE_MIN_RANGE_KEY, 0.05f);
  conf.setLong(HeapMemoryManager.HBASE_RS_HEAP_MEMORY_TUNER_PERIOD, 1000);
  conf.setInt(DefaultHeapMemoryTuner.NUM_PERIODS_TO_IGNORE, 0);
  RegionServerAccountingStub regionServerAccounting = new RegionServerAccountingStub(conf);
  MemstoreFlusherStub memStoreFlusher =
      new MemstoreFlusherStub((long) (maxHeapSize * 0.4));
  // Empty memstore and but nearly filled block cache
  blockCache.setTestBlockSize((long) (maxHeapSize * 0.4 * 0.8));
  regionServerAccounting.setTestMemstoreSize(0);
  // Let the system start with default values for memstore heap and block cache size.
  HeapMemoryManager heapMemoryManager = new HeapMemoryManager(blockCache, memStoreFlusher,
      new RegionServerStub(conf), new RegionServerAccountingStub(conf));
  long oldMemstoreHeapSize = memStoreFlusher.memstoreSize;
  long oldBlockCacheSize = blockCache.maxSize;
  long oldMemstoreLowerMarkSize = 7 * oldMemstoreHeapSize / 10;
  long maxTuneSize = oldMemstoreHeapSize -  (oldMemstoreLowerMarkSize + oldMemstoreHeapSize) / 2;
  float maxStepValue = (maxTuneSize * 1.0f) / oldMemstoreHeapSize;
  maxStepValue = maxStepValue > DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE ?
      DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE:maxStepValue;
  final ChoreService choreService = new ChoreService("TEST_SERVER_NAME");
  heapMemoryManager.start(choreService);
  blockCache.evictBlock(null);
  blockCache.evictBlock(null);
  blockCache.evictBlock(null);
  // Allow the tuner to run once and do necessary memory up
  waitForTune(memStoreFlusher, memStoreFlusher.memstoreSize);
  assertHeapSpaceDelta(-maxStepValue, oldMemstoreHeapSize, memStoreFlusher.memstoreSize);
  assertHeapSpaceDelta(maxStepValue, oldBlockCacheSize, blockCache.maxSize);
  oldMemstoreHeapSize = memStoreFlusher.memstoreSize;
  oldBlockCacheSize = blockCache.maxSize;
  oldMemstoreLowerMarkSize = 7 * oldMemstoreHeapSize / 10;
  maxTuneSize = oldMemstoreHeapSize -  (oldMemstoreLowerMarkSize + oldMemstoreHeapSize) / 2;
  maxStepValue = (maxTuneSize * 1.0f) / oldMemstoreHeapSize;
  maxStepValue = maxStepValue > DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE ?
      DefaultHeapMemoryTuner.DEFAULT_MAX_STEP_VALUE:maxStepValue;
  // Do some more evictions before the next run of HeapMemoryTuner
  blockCache.evictBlock(null);
  // Allow the tuner to run once and do necessary memory up
  waitForTune(memStoreFlusher, memStoreFlusher.memstoreSize);
  assertHeapSpaceDelta(-maxStepValue, oldMemstoreHeapSize, memStoreFlusher.memstoreSize);
  assertHeapSpaceDelta(maxStepValue, oldBlockCacheSize, blockCache.maxSize);
}
 
Example 14
Source File: TestProcessCorruptBlocks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * The corrupt block has to be removed when the number of valid replicas
 * matches replication factor for the file. The above condition should hold
 * true as long as there is one good replica. This test verifies that.
 * 
 * The test strategy : 
 *   Bring up Cluster with 2 DataNodes
 *   Create a file of replication factor 2 
 *   Corrupt one replica of a block of the file 
 *   Verify that there is  one good replicas and 1 corrupt replica 
 *     (corrupt replica should not be removed since number of good 
 *     replicas (1) is less than replication factor (2)).
 *   Set the replication factor to 1 
 *   Verify that the corrupt replica is removed. 
 *     (corrupt replica should  be removed since number of good
 *      replicas (1) is equal to replication factor (1))
 */
@Test(timeout=20000)
public void testWithReplicationFactorAsOne() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
  FileSystem fs = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short) 2, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 2);

    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    corruptBlock(cluster, fs, fileName, 0, block);

    DFSTestUtil.waitReplication(fs, fileName, (short) 1);

    assertEquals(1, countReplicas(namesystem, block).liveReplicas());
    assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

    namesystem.setReplication(fileName.toString(), (short) 1);

    // wait for 3 seconds so that all block reports are processed.
    for (int i = 0; i < 10; i++) {
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ignored) {
      }
      if (countReplicas(namesystem, block).corruptReplicas() == 0) {
        break;
      }
    }

    assertEquals(1, countReplicas(namesystem, block).liveReplicas());
    assertEquals(0, countReplicas(namesystem, block).corruptReplicas());

  } finally {
    cluster.shutdown();
  }
}
 
Example 15
Source File: MneConfigHelper.java    From mnemonic with Apache License 2.0 4 votes vote down vote up
public static void setMemPoolSize(Configuration conf, String prefix, long size) {
  conf.setLong(getConfigName(prefix, MEM_POOL_SIZE), size);
}
 
Example 16
Source File: TestHFileOutputFormat2.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * Run small MR job.
 */
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
public void testWritingPEData() throws Exception {
  Configuration conf = util.getConfiguration();
  Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
  FileSystem fs = testDir.getFileSystem(conf);

  // Set down this value or we OOME in eclipse.
  conf.setInt("mapreduce.task.io.sort.mb", 20);
  // Write a few files.
  long hregionMaxFilesize = 10 * 1024;
  conf.setLong(HConstants.HREGION_MAX_FILESIZE, hregionMaxFilesize);

  Job job = new Job(conf, "testWritingPEData");
  setupRandomGeneratorMapper(job, false);
  // This partitioner doesn't work well for number keys but using it anyways
  // just to demonstrate how to configure it.
  byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
  byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];

  Arrays.fill(startKey, (byte)0);
  Arrays.fill(endKey, (byte)0xff);

  job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
  // Set start and end rows for partitioner.
  SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
  SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
  job.setReducerClass(CellSortReducer.class);
  job.setOutputFormatClass(HFileOutputFormat2.class);
  job.setNumReduceTasks(4);
  job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName(),
      CellSerialization.class.getName());

  FileOutputFormat.setOutputPath(job, testDir);
  assertTrue(job.waitForCompletion(false));
  FileStatus [] files = fs.listStatus(testDir);
  assertTrue(files.length > 0);

  //check output file num and size.
  for (byte[] family : FAMILIES) {
    long kvCount= 0;
    RemoteIterator<LocatedFileStatus> iterator =
            fs.listFiles(testDir.suffix("/" + new String(family)), true);
    while (iterator.hasNext()) {
      LocatedFileStatus keyFileStatus = iterator.next();
      HFile.Reader reader =
              HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), true, conf);
      HFileScanner scanner = reader.getScanner(false, false, false);

      kvCount += reader.getEntries();
      scanner.seekTo();
      long perKVSize = scanner.getCell().getSerializedSize();
      assertTrue("Data size of each file should not be too large.",
              perKVSize * reader.getEntries() <= hregionMaxFilesize);
    }
    assertEquals("Should write expected data in output file.", ROWSPERSPLIT, kvCount);
  }
}
 
Example 17
Source File: TestFsck.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the # of missing block replicas and expected replicas is correct
 * @throws IOException
 */
@Test
public void testFsckMissingReplicas() throws IOException {
  // Desired replication factor
  // Set this higher than NUM_REPLICAS so it's under-replicated
  final short REPL_FACTOR = 2;
  // Number of replicas to actually start
  final short NUM_REPLICAS = 1;
  // Number of blocks to write
  final short NUM_BLOCKS = 3;
  // Set a small-ish blocksize
  final long blockSize = 512;
  
  Configuration conf = new Configuration();
  conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
  
  MiniDFSCluster cluster = null;
  DistributedFileSystem dfs = null;
  
  try {
    // Startup a minicluster
    cluster = 
        new MiniDFSCluster.Builder(conf).numDataNodes(NUM_REPLICAS).build();
    assertNotNull("Failed Cluster Creation", cluster);
    cluster.waitClusterUp();
    dfs = cluster.getFileSystem();
    assertNotNull("Failed to get FileSystem", dfs);
    
    // Create a file that will be intentionally under-replicated
    final String pathString = new String("/testfile");
    final Path path = new Path(pathString);
    long fileLen = blockSize * NUM_BLOCKS;
    DFSTestUtil.createFile(dfs, path, fileLen, REPL_FACTOR, 1);
    
    // Create an under-replicated file
    NameNode namenode = cluster.getNameNode();
    NetworkTopology nettop = cluster.getNamesystem().getBlockManager()
        .getDatanodeManager().getNetworkTopology();
    Map<String,String[]> pmap = new HashMap<String, String[]>();
    Writer result = new StringWriter();
    PrintWriter out = new PrintWriter(result, true);
    InetAddress remoteAddress = InetAddress.getLocalHost();
    NamenodeFsck fsck = new NamenodeFsck(conf, namenode, nettop, pmap, out, 
        NUM_REPLICAS, remoteAddress);
    
    // Run the fsck and check the Result
    final HdfsFileStatus file = 
        namenode.getRpcServer().getFileInfo(pathString);
    assertNotNull(file);
    Result res = new Result(conf);
    fsck.check(pathString, file, res);
    // Also print the output from the fsck, for ex post facto sanity checks
    System.out.println(result.toString());
    assertEquals(res.missingReplicas, 
        (NUM_BLOCKS*REPL_FACTOR) - (NUM_BLOCKS*NUM_REPLICAS));
    assertEquals(res.numExpectedReplicas, NUM_BLOCKS*REPL_FACTOR);
  } finally {
    if(dfs != null) {
      dfs.close();
    }
    if(cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 18
Source File: AccumuloImportJob.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
@Override
protected void configureOutputFormat(Job job, String tableName,
    String tableClassName) throws ClassNotFoundException, IOException {

  // Use the DelegatingOutputFormat with the AccumuloMutationProcessor.
  job.setOutputFormatClass(getOutputFormatClass());

  Configuration conf = job.getConfiguration();
  conf.setClass("sqoop.output.delegate.field.map.processor.class",
      AccumuloMutationProcessor.class, FieldMapProcessor.class);

  // Set the Accumulo parameters (table, column family, row key):
  conf.set(AccumuloConstants.ZOOKEEPERS,
      options.getAccumuloZookeepers());
  conf.set(AccumuloConstants.ACCUMULO_INSTANCE,
      options.getAccumuloInstance());
  conf.set(AccumuloConstants.ACCUMULO_USER_NAME,
      options.getAccumuloUser());
  String pw = options.getAccumuloPassword();
  if (null == pw) {
    pw = "";
  }
  conf.set(AccumuloConstants.ACCUMULO_PASSWORD, pw);
  conf.set(AccumuloConstants.TABLE_NAME_KEY,
      options.getAccumuloTable());
  conf.set(AccumuloConstants.COL_FAMILY_KEY,
      options.getAccumuloColFamily());
  conf.setLong(AccumuloConstants.BATCH_SIZE,
      options.getAccumuloBatchSize());
  conf.setLong(AccumuloConstants.MAX_LATENCY,
      options.getAccumuloMaxLatency());

  // What column of the input becomes the row key?
  String rowKeyCol = options.getAccumuloRowKeyColumn();
  if (null == rowKeyCol) {
    // User didn't explicitly set one. If there's a split-by column set,
    // use that.
    rowKeyCol = options.getSplitByCol();
  }

  if (null == rowKeyCol) {
    // No split-by column is explicitly set.
    // If the table has a primary key, use that.
    ConnManager manager = getContext().getConnManager();
    rowKeyCol = manager.getPrimaryKey(tableName);
  }

  if (null == rowKeyCol) {
    // Give up here if this is still unset.
    throw new IOException(
        "Could not determine the row-key column. "
            + "Use --accumulo-row-key to specify the input column that "
            + "names each row.");
  }

  conf.set(AccumuloConstants.ROW_KEY_COLUMN_KEY, rowKeyCol);
}
 
Example 19
Source File: TestFsck.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test for blockIdCK
 */

@Test
public void testBlockIdCK() throws Exception {

  final short REPL_FACTOR = 2;
  short NUM_DN = 2;
  final long blockSize = 512;

  String [] racks = {"/rack1", "/rack2"};
  String [] hosts = {"host1", "host2"};

  Configuration conf = new Configuration();
  conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
  conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 2);

  MiniDFSCluster cluster = null;
  DistributedFileSystem dfs = null;
  cluster =
    new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).hosts(hosts)
      .racks(racks).build();

  assertNotNull("Failed Cluster Creation", cluster);
  cluster.waitClusterUp();
  dfs = cluster.getFileSystem();
  assertNotNull("Failed to get FileSystem", dfs);

  DFSTestUtil util = new DFSTestUtil.Builder().
    setName(getClass().getSimpleName()).setNumFiles(1).build();
  //create files
  final String pathString = new String("/testfile");
  final Path path = new Path(pathString);
  util.createFile(dfs, path, 1024, REPL_FACTOR , 1000L);
  util.waitReplication(dfs, path, REPL_FACTOR);
  StringBuilder sb = new StringBuilder();
  for (LocatedBlock lb: util.getAllBlocks(dfs, path)){
    sb.append(lb.getBlock().getLocalBlock().getBlockName()+" ");
  }
  String[] bIds = sb.toString().split(" ");

  //run fsck
  try {
    //illegal input test
    String runFsckResult = runFsck(conf, 0, true, "/", "-blockId",
        "not_a_block_id");
    assertTrue(runFsckResult.contains("Incorrect blockId format:"));

    //general test
    runFsckResult = runFsck(conf, 0, true, "/", "-blockId", sb.toString());
    assertTrue(runFsckResult.contains(bIds[0]));
    assertTrue(runFsckResult.contains(bIds[1]));
    assertTrue(runFsckResult.contains(
        "Block replica on datanode/rack: host1/rack1 is HEALTHY"));
    assertTrue(runFsckResult.contains(
        "Block replica on datanode/rack: host2/rack2 is HEALTHY"));
  } finally {
    cluster.shutdown();
  }
}
 
Example 20
Source File: PhoenixConfigurationUtil.java    From phoenix with Apache License 2.0 4 votes vote down vote up
public static void setScrutinyExecuteTimestamp(Configuration configuration, long ts) {
    Preconditions.checkNotNull(configuration);
    configuration.setLong(SCRUTINY_EXECUTE_TIMESTAMP, ts);
}