org.apache.hadoop.mapreduce.server.tasktracker.TTConfig Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.server.tasktracker.TTConfig. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TeraScheduler.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public TeraScheduler(FileSplit[] realSplits,
                     Configuration conf) throws IOException {
  this.realSplits = realSplits;
  this.slotsPerHost = conf.getInt(TTConfig.TT_MAP_SLOTS, 4);
  Map<String, Host> hostTable = new HashMap<String, Host>();
  splits = new Split[realSplits.length];
  for(FileSplit realSplit: realSplits) {
    Split split = new Split(realSplit.getPath().toString());
    splits[remainingSplits++] = split;
    for(String hostname: realSplit.getLocations()) {
      Host host = hostTable.get(hostname);
      if (host == null) {
        host = new Host(hostname);
        hostTable.put(hostname, host);
        hosts.add(host);
      }
      host.splits.add(split);
      split.locations.add(host);
    }
  }
}
 
Example #2
Source File: LoadJob.java    From hadoop with Apache License 2.0 6 votes vote down vote up
ResourceUsageMatcherRunner(final TaskInputOutputContext context, 
                           ResourceUsageMetrics metrics) {
  Configuration conf = context.getConfiguration();
  
  // set the resource calculator plugin
  Class<? extends ResourceCalculatorPlugin> clazz =
    conf.getClass(TTConfig.TT_RESOURCE_CALCULATOR_PLUGIN,
                  null, ResourceCalculatorPlugin.class);
  ResourceCalculatorPlugin plugin = 
    ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf);
  
  // set the other parameters
  this.sleepTime = conf.getLong(SLEEP_CONFIG, DEFAULT_SLEEP_TIME);
  progress = new BoostingProgress(context);
  
  // instantiate a resource-usage-matcher
  matcher = new ResourceUsageMatcher();
  matcher.configure(conf, plugin, metrics, progress);
}
 
Example #3
Source File: TeraScheduler.java    From pravega-samples with Apache License 2.0 6 votes vote down vote up
public TeraScheduler(FileSplit[] realSplits,
                     Configuration conf) throws IOException {
  this.realSplits = realSplits;
  this.slotsPerHost = conf.getInt(TTConfig.TT_MAP_SLOTS, 4);
  Map<String, Host> hostTable = new HashMap<String, Host>();
  splits = new Split[realSplits.length];
  for(FileSplit realSplit: realSplits) {
    Split split = new Split(realSplit.getPath().toString());
    splits[remainingSplits++] = split;
    for(String hostname: realSplit.getLocations()) {
      Host host = hostTable.get(hostname);
      if (host == null) {
        host = new Host(hostname);
        hostTable.put(hostname, host);
        hosts.add(host);
      }
      host.splits.add(split);
      split.locations.add(host);
    }
  }
}
 
Example #4
Source File: TeraScheduler.java    From big-c with Apache License 2.0 6 votes vote down vote up
public TeraScheduler(FileSplit[] realSplits,
                     Configuration conf) throws IOException {
  this.realSplits = realSplits;
  this.slotsPerHost = conf.getInt(TTConfig.TT_MAP_SLOTS, 4);
  Map<String, Host> hostTable = new HashMap<String, Host>();
  splits = new Split[realSplits.length];
  for(FileSplit realSplit: realSplits) {
    Split split = new Split(realSplit.getPath().toString());
    splits[remainingSplits++] = split;
    for(String hostname: realSplit.getLocations()) {
      Host host = hostTable.get(hostname);
      if (host == null) {
        host = new Host(hostname);
        hostTable.put(hostname, host);
        hosts.add(host);
      }
      host.splits.add(split);
      split.locations.add(host);
    }
  }
}
 
Example #5
Source File: LoadJob.java    From big-c with Apache License 2.0 6 votes vote down vote up
ResourceUsageMatcherRunner(final TaskInputOutputContext context, 
                           ResourceUsageMetrics metrics) {
  Configuration conf = context.getConfiguration();
  
  // set the resource calculator plugin
  Class<? extends ResourceCalculatorPlugin> clazz =
    conf.getClass(TTConfig.TT_RESOURCE_CALCULATOR_PLUGIN,
                  null, ResourceCalculatorPlugin.class);
  ResourceCalculatorPlugin plugin = 
    ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf);
  
  // set the other parameters
  this.sleepTime = conf.getLong(SLEEP_CONFIG, DEFAULT_SLEEP_TIME);
  progress = new BoostingProgress(context);
  
  // instantiate a resource-usage-matcher
  matcher = new ResourceUsageMatcher();
  matcher.configure(conf, plugin, metrics, progress);
}
 
Example #6
Source File: TeraScheduler.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
public TeraScheduler(FileSplit[] realSplits,
                     Configuration conf) throws IOException {
  this.realSplits = realSplits;
  this.slotsPerHost = conf.getInt(TTConfig.TT_MAP_SLOTS, 4);
  Map<String, Host> hostTable = new HashMap<String, Host>();
  splits = new Split[realSplits.length];
  for(FileSplit realSplit: realSplits) {
    Split split = new Split(realSplit.getPath().toString());
    splits[remainingSplits++] = split;
    for(String hostname: realSplit.getLocations()) {
      Host host = hostTable.get(hostname);
      if (host == null) {
        host = new Host(hostname);
        hostTable.put(hostname, host);
        hosts.add(host);
      }
      host.splits.add(split);
      split.locations.add(host);
    }
  }
}
 
Example #7
Source File: TestIndexCache.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void testBadIndex() throws Exception {
  final int parts = 30;
  fs.delete(p, true);
  conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
  IndexCache cache = new IndexCache(conf);

  Path f = new Path(p, "badindex");
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      if (0 == (i % 3)) {
        dout.writeLong(i);
      } else {
        out.writeLong(i);
      }
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
  try {
    cache.getIndexInformation("badindex", 7, f,
      UserGroupInformation.getCurrentUser().getShortUserName());
    fail("Did not detect bad checksum");
  } catch (IOException e) {
    if (!(e.getCause() instanceof ChecksumException)) {
      throw e;
    }
  }
}
 
Example #8
Source File: TestIndexCache.java    From big-c with Apache License 2.0 5 votes vote down vote up
public void testBadIndex() throws Exception {
  final int parts = 30;
  fs.delete(p, true);
  conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
  IndexCache cache = new IndexCache(conf);

  Path f = new Path(p, "badindex");
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      if (0 == (i % 3)) {
        dout.writeLong(i);
      } else {
        out.writeLong(i);
      }
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
  try {
    cache.getIndexInformation("badindex", 7, f,
      UserGroupInformation.getCurrentUser().getShortUserName());
    fail("Did not detect bad checksum");
  } catch (IOException e) {
    if (!(e.getCause() instanceof ChecksumException)) {
      throw e;
    }
  }
}
 
Example #9
Source File: IndexCache.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public IndexCache(JobConf conf) {
  this.conf = conf;
  totalMemoryAllowed =
    conf.getInt(TTConfig.TT_INDEX_CACHE, 10) * 1024 * 1024;
  LOG.info("IndexCache created with max memory = " + totalMemoryAllowed);
}
 
Example #10
Source File: TestIndexCache.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public void testRemoveMap() throws Exception {
  // This test case use two thread to call getIndexInformation and 
  // removeMap concurrently, in order to construct race condition.
  // This test case may not repeatable. But on my macbook this test 
  // fails with probability of 100% on code before MAPREDUCE-2541,
  // so it is repeatable in practice.
  fs.delete(p, true);
  conf.setInt(TTConfig.TT_INDEX_CACHE, 10);
  // Make a big file so removeMapThread almost surely runs faster than 
  // getInfoThread 
  final int partsPerMap = 100000;
  final int bytesPerFile = partsPerMap * 24;
  final IndexCache cache = new IndexCache(conf);

  final Path big = new Path(p, "bigIndex");
  final String user = 
    UserGroupInformation.getCurrentUser().getShortUserName();
  writeFile(fs, big, bytesPerFile, partsPerMap);
  
  // run multiple times
  for (int i = 0; i < 20; ++i) {
    Thread getInfoThread = new Thread() {
      @Override
      public void run() {
        try {
          cache.getIndexInformation("bigIndex", partsPerMap, big, user);
        } catch (Exception e) {
          // should not be here
        }
      }
    };
    Thread removeMapThread = new Thread() {
      @Override
      public void run() {
        cache.removeMap("bigIndex");
      }
    };
    if (i%2==0) {
      getInfoThread.start();
      removeMapThread.start();        
    } else {
      removeMapThread.start();        
      getInfoThread.start();
    }
    getInfoThread.join();
    removeMapThread.join();
    assertEquals(true, cache.checkTotalMemoryUsed());
  }      
}
 
Example #11
Source File: GenerateDistCacheData.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
  final JobClient client = new JobClient(jobConf);
  ClusterStatus stat = client.getClusterStatus(true);
  int numTrackers = stat.getTaskTrackers();
  final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);

  // Total size of distributed cache files to be generated
  final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
  // Get the path of the special file
  String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
  if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
    throw new RuntimeException("Invalid metadata: #files (" + fileCount
        + "), total_size (" + totalSize + "), filelisturi ("
        + distCacheFileList + ")");
  }

  Path sequenceFile = new Path(distCacheFileList);
  FileSystem fs = sequenceFile.getFileSystem(jobConf);
  FileStatus srcst = fs.getFileStatus(sequenceFile);
  // Consider the number of TTs * mapSlotsPerTracker as number of mappers.
  int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
  int numSplits = numTrackers * numMapSlotsPerTracker;

  List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
  LongWritable key = new LongWritable();
  BytesWritable value = new BytesWritable();

  // Average size of data to be generated by each map task
  final long targetSize = Math.max(totalSize / numSplits,
                            DistributedCacheEmulator.AVG_BYTES_PER_MAP);
  long splitStartPosition = 0L;
  long splitEndPosition = 0L;
  long acc = 0L;
  long bytesRemaining = srcst.getLen();
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
    while (reader.next(key, value)) {

      // If adding this file would put this split past the target size,
      // cut the last split and put this file in the next split.
      if (acc + key.get() > targetSize && acc != 0) {
        long splitSize = splitEndPosition - splitStartPosition;
        splits.add(new FileSplit(
            sequenceFile, splitStartPosition, splitSize, (String[])null));
        bytesRemaining -= splitSize;
        splitStartPosition = splitEndPosition;
        acc = 0L;
      }
      acc += key.get();
      splitEndPosition = reader.getPosition();
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
  if (bytesRemaining != 0) {
    splits.add(new FileSplit(
        sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
  }

  return splits;
}
 
Example #12
Source File: TestResourceUsageEmulators.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test {@link LoadJob.ResourceUsageMatcherRunner}.
 */
@Test
@SuppressWarnings("unchecked")
public void testResourceUsageMatcherRunner() throws Exception {
  Configuration conf = new Configuration();
  FakeProgressive progress = new FakeProgressive();
  
  // set the resource calculator plugin
  conf.setClass(TTConfig.TT_RESOURCE_CALCULATOR_PLUGIN,
                DummyResourceCalculatorPlugin.class, 
                ResourceCalculatorPlugin.class);
  // set the resources
  // set the resource implementation class
  conf.setClass(ResourceUsageMatcher.RESOURCE_USAGE_EMULATION_PLUGINS, 
                TestResourceUsageEmulatorPlugin.class, 
                ResourceUsageEmulatorPlugin.class);
  
  long currentTime = System.currentTimeMillis();
  
  // initialize the matcher class
  TaskAttemptID id = new TaskAttemptID("test", 1, TaskType.MAP, 1, 1);
  StatusReporter reporter = new DummyReporter(progress);
  TaskInputOutputContext context = 
    new MapContextImpl(conf, id, null, null, null, reporter, null);
  FakeResourceUsageMatcherRunner matcher = 
    new FakeResourceUsageMatcherRunner(context, null);
  
  // check if the matcher initialized the plugin
  String identifier = TestResourceUsageEmulatorPlugin.DEFAULT_IDENTIFIER;
  long initTime = 
    TestResourceUsageEmulatorPlugin.testInitialization(identifier, conf);
  assertTrue("ResourceUsageMatcherRunner failed to initialize the"
             + " configured plugin", initTime > currentTime);
  
  // check the progress
  assertEquals("Progress mismatch in ResourceUsageMatcherRunner", 
               0, progress.getProgress(), 0D);
  
  // call match() and check progress
  progress.setProgress(0.01f);
  currentTime = System.currentTimeMillis();
  matcher.test();
  long emulateTime = 
    TestResourceUsageEmulatorPlugin.testEmulation(identifier, conf);
  assertTrue("ProgressBasedResourceUsageMatcher failed to load and emulate"
             + " the configured plugin", emulateTime > currentTime);
}
 
Example #13
Source File: IndexCache.java    From big-c with Apache License 2.0 4 votes vote down vote up
public IndexCache(JobConf conf) {
  this.conf = conf;
  totalMemoryAllowed =
    conf.getInt(TTConfig.TT_INDEX_CACHE, 10) * 1024 * 1024;
  LOG.info("IndexCache created with max memory = " + totalMemoryAllowed);
}
 
Example #14
Source File: TestIndexCache.java    From big-c with Apache License 2.0 4 votes vote down vote up
public void testRemoveMap() throws Exception {
  // This test case use two thread to call getIndexInformation and 
  // removeMap concurrently, in order to construct race condition.
  // This test case may not repeatable. But on my macbook this test 
  // fails with probability of 100% on code before MAPREDUCE-2541,
  // so it is repeatable in practice.
  fs.delete(p, true);
  conf.setInt(TTConfig.TT_INDEX_CACHE, 10);
  // Make a big file so removeMapThread almost surely runs faster than 
  // getInfoThread 
  final int partsPerMap = 100000;
  final int bytesPerFile = partsPerMap * 24;
  final IndexCache cache = new IndexCache(conf);

  final Path big = new Path(p, "bigIndex");
  final String user = 
    UserGroupInformation.getCurrentUser().getShortUserName();
  writeFile(fs, big, bytesPerFile, partsPerMap);
  
  // run multiple times
  for (int i = 0; i < 20; ++i) {
    Thread getInfoThread = new Thread() {
      @Override
      public void run() {
        try {
          cache.getIndexInformation("bigIndex", partsPerMap, big, user);
        } catch (Exception e) {
          // should not be here
        }
      }
    };
    Thread removeMapThread = new Thread() {
      @Override
      public void run() {
        cache.removeMap("bigIndex");
      }
    };
    if (i%2==0) {
      getInfoThread.start();
      removeMapThread.start();        
    } else {
      removeMapThread.start();        
      getInfoThread.start();
    }
    getInfoThread.join();
    removeMapThread.join();
    assertEquals(true, cache.checkTotalMemoryUsed());
  }      
}
 
Example #15
Source File: GenerateDistCacheData.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
  final JobClient client = new JobClient(jobConf);
  ClusterStatus stat = client.getClusterStatus(true);
  int numTrackers = stat.getTaskTrackers();
  final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);

  // Total size of distributed cache files to be generated
  final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
  // Get the path of the special file
  String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
  if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
    throw new RuntimeException("Invalid metadata: #files (" + fileCount
        + "), total_size (" + totalSize + "), filelisturi ("
        + distCacheFileList + ")");
  }

  Path sequenceFile = new Path(distCacheFileList);
  FileSystem fs = sequenceFile.getFileSystem(jobConf);
  FileStatus srcst = fs.getFileStatus(sequenceFile);
  // Consider the number of TTs * mapSlotsPerTracker as number of mappers.
  int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
  int numSplits = numTrackers * numMapSlotsPerTracker;

  List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
  LongWritable key = new LongWritable();
  BytesWritable value = new BytesWritable();

  // Average size of data to be generated by each map task
  final long targetSize = Math.max(totalSize / numSplits,
                            DistributedCacheEmulator.AVG_BYTES_PER_MAP);
  long splitStartPosition = 0L;
  long splitEndPosition = 0L;
  long acc = 0L;
  long bytesRemaining = srcst.getLen();
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
    while (reader.next(key, value)) {

      // If adding this file would put this split past the target size,
      // cut the last split and put this file in the next split.
      if (acc + key.get() > targetSize && acc != 0) {
        long splitSize = splitEndPosition - splitStartPosition;
        splits.add(new FileSplit(
            sequenceFile, splitStartPosition, splitSize, (String[])null));
        bytesRemaining -= splitSize;
        splitStartPosition = splitEndPosition;
        acc = 0L;
      }
      acc += key.get();
      splitEndPosition = reader.getPosition();
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
  if (bytesRemaining != 0) {
    splits.add(new FileSplit(
        sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
  }

  return splits;
}
 
Example #16
Source File: TestResourceUsageEmulators.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test {@link LoadJob.ResourceUsageMatcherRunner}.
 */
@Test
@SuppressWarnings("unchecked")
public void testResourceUsageMatcherRunner() throws Exception {
  Configuration conf = new Configuration();
  FakeProgressive progress = new FakeProgressive();
  
  // set the resource calculator plugin
  conf.setClass(TTConfig.TT_RESOURCE_CALCULATOR_PLUGIN,
                DummyResourceCalculatorPlugin.class, 
                ResourceCalculatorPlugin.class);
  // set the resources
  // set the resource implementation class
  conf.setClass(ResourceUsageMatcher.RESOURCE_USAGE_EMULATION_PLUGINS, 
                TestResourceUsageEmulatorPlugin.class, 
                ResourceUsageEmulatorPlugin.class);
  
  long currentTime = System.currentTimeMillis();
  
  // initialize the matcher class
  TaskAttemptID id = new TaskAttemptID("test", 1, TaskType.MAP, 1, 1);
  StatusReporter reporter = new DummyReporter(progress);
  TaskInputOutputContext context = 
    new MapContextImpl(conf, id, null, null, null, reporter, null);
  FakeResourceUsageMatcherRunner matcher = 
    new FakeResourceUsageMatcherRunner(context, null);
  
  // check if the matcher initialized the plugin
  String identifier = TestResourceUsageEmulatorPlugin.DEFAULT_IDENTIFIER;
  long initTime = 
    TestResourceUsageEmulatorPlugin.testInitialization(identifier, conf);
  assertTrue("ResourceUsageMatcherRunner failed to initialize the"
             + " configured plugin", initTime > currentTime);
  
  // check the progress
  assertEquals("Progress mismatch in ResourceUsageMatcherRunner", 
               0, progress.getProgress(), 0D);
  
  // call match() and check progress
  progress.setProgress(0.01f);
  currentTime = System.currentTimeMillis();
  matcher.test();
  long emulateTime = 
    TestResourceUsageEmulatorPlugin.testEmulation(identifier, conf);
  assertTrue("ProgressBasedResourceUsageMatcher failed to load and emulate"
             + " the configured plugin", emulateTime > currentTime);
}