org.apache.hadoop.mapred.ClusterStatus Java Examples

The following examples show how to use org.apache.hadoop.mapred.ClusterStatus. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GenericMRLoadGenerator.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * When no input dir is specified, generate random data.
 */
protected static void confRandom(Job job)
    throws IOException {
  // from RandomWriter
  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomMapOutput.class);

  Configuration conf = job.getConfiguration();
  final ClusterStatus cluster = new JobClient(conf).getClusterStatus();
  int numMapsPerHost = conf.getInt(RandomTextWriter.MAPS_PER_HOST, 10);
  long numBytesToWritePerMap =
    conf.getLong(RandomTextWriter.BYTES_PER_MAP, 1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    throw new IOException(
        "Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0");
  }
  long totalBytesToWrite = conf.getLong(RandomTextWriter.TOTAL_BYTES,
       numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
  int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
}
 
Example #2
Source File: GenerateData.java    From RDFS with Apache License 2.0 6 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobClient client =
    new JobClient(new JobConf(jobCtxt.getConfiguration()));
  ClusterStatus stat = client.getClusterStatus(true);
  final long toGen =
    jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
  if (toGen < 0) {
    throw new IOException("Invalid/missing generation bytes: " + toGen);
  }
  final int nTrackers = stat.getTaskTrackers();
  final long bytesPerTracker = toGen / nTrackers;
  final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
  final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
  final Matcher m = trackerPattern.matcher("");
  for (String tracker : stat.getActiveTrackerNames()) {
    m.reset(tracker);
    if (!m.find()) {
      System.err.println("Skipping node: " + tracker);
      continue;
    }
    final String name = m.group(1);
    splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
  }
  return splits;
}
 
Example #3
Source File: JobTrackerJspHelper.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * Generates an XML-formatted block that summarizes the state of the JobTracker.
 */
public void generateSummaryTable(JspWriter out,
                                 JobTracker tracker) throws IOException {
  ClusterStatus status = tracker.getClusterStatus();
  int maxMapTasks = status.getMaxMapTasks();
  int maxReduceTasks = status.getMaxReduceTasks();
  int numTaskTrackers = status.getTaskTrackers();
  String tasksPerNodeStr;
  if (numTaskTrackers > 0) {
    double tasksPerNodePct = (double) (maxMapTasks + maxReduceTasks) / (double) numTaskTrackers;
    tasksPerNodeStr = percentFormat.format(tasksPerNodePct);
  } else {
    tasksPerNodeStr = "-";
  }
  out.print("<maps>" + status.getMapTasks() + "</maps>\n" +
          "<reduces>" + status.getReduceTasks() + "</reduces>\n" +
          "<total_submissions>" + tracker.getTotalSubmissions() + "</total_submissions>\n" +
          "<nodes>" + status.getTaskTrackers() + "</nodes>\n" +
          "<map_task_capacity>" + status.getMaxMapTasks() + "</map_task_capacity>\n" +
          "<reduce_task_capacity>" + status.getMaxReduceTasks() + "</reduce_task_capacity>\n" +
          "<avg_tasks_per_node>" + tasksPerNodeStr + "</avg_tasks_per_node>\n");
}
 
Example #4
Source File: InfrastructureAnalyzer.java    From systemds with Apache License 2.0 6 votes vote down vote up
/**
 * Analyzes properties of hadoop cluster and configuration.
 */
private static void analyzeHadoopCluster() {
	try {
		JobConf job = ConfigurationManager.getCachedJobConf();
		JobClient client = new JobClient(job);
		ClusterStatus stat = client.getClusterStatus();
		if( stat != null ) { //if in cluster mode
			//analyze cluster status
			_remotePar = stat.getTaskTrackers();
			_remoteParMap = stat.getMaxMapTasks(); 
			_remoteParReduce = stat.getMaxReduceTasks(); 
			
			//analyze pure configuration properties
			analyzeHadoopConfiguration();
		}
	} 
	catch (IOException e) {
		throw new RuntimeException("Unable to analyze infrastructure.",e);
	}
}
 
Example #5
Source File: StressJobFactory.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * STRESS Once you get the notification from StatsCollector.Collect the
 * clustermetrics. Update current loadStatus with new load status of JT.
 *
 * @param item
 */
@Override
public void update(Statistics.ClusterStats item) {
  ClusterStatus clusterStatus = item.getStatus();
  try {
    // update the max cluster map/reduce task capacity
    loadStatus.updateMapCapacity(clusterStatus.getMaxMapTasks());
    
    loadStatus.updateReduceCapacity(clusterStatus.getMaxReduceTasks());
    
    int numTrackers = clusterStatus.getTaskTrackers();
    int jobLoad = 
      (int) (maxJobTrackerRatio * numTrackers) - item.getNumRunningJob();
    loadStatus.updateJobLoad(jobLoad);
  } catch (Exception e) {
    LOG.error("Couldn't get the new Status",e);
  }
}
 
Example #6
Source File: GenerateData.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobClient client =
    new JobClient(new JobConf(jobCtxt.getConfiguration()));
  ClusterStatus stat = client.getClusterStatus(true);
  final long toGen =
    jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
  if (toGen < 0) {
    throw new IOException("Invalid/missing generation bytes: " + toGen);
  }
  final int nTrackers = stat.getTaskTrackers();
  final long bytesPerTracker = toGen / nTrackers;
  final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
  final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
  final Matcher m = trackerPattern.matcher("");
  for (String tracker : stat.getActiveTrackerNames()) {
    m.reset(tracker);
    if (!m.find()) {
      System.err.println("Skipping node: " + tracker);
      continue;
    }
    final String name = m.group(1);
    splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
  }
  return splits;
}
 
Example #7
Source File: GenericMRLoadGenerator.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * When no input dir is specified, generate random data.
 */
protected static void confRandom(Job job)
    throws IOException {
  // from RandomWriter
  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomMapOutput.class);

  Configuration conf = job.getConfiguration();
  final ClusterStatus cluster = new JobClient(conf).getClusterStatus();
  int numMapsPerHost = conf.getInt(RandomTextWriter.MAPS_PER_HOST, 10);
  long numBytesToWritePerMap =
    conf.getLong(RandomTextWriter.BYTES_PER_MAP, 1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    throw new IOException(
        "Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0");
  }
  long totalBytesToWrite = conf.getLong(RandomTextWriter.TOTAL_BYTES,
       numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
  int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
}
 
Example #8
Source File: StressJobFactory.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * STRESS Once you get the notification from StatsCollector.Collect the
 * clustermetrics. Update current loadStatus with new load status of JT.
 *
 * @param item
 */
@Override
public void update(Statistics.ClusterStats item) {
  ClusterStatus clusterStatus = item.getStatus();
  try {
    // update the max cluster map/reduce task capacity
    loadStatus.updateMapCapacity(clusterStatus.getMaxMapTasks());
    
    loadStatus.updateReduceCapacity(clusterStatus.getMaxReduceTasks());
    
    int numTrackers = clusterStatus.getTaskTrackers();
    int jobLoad = 
      (int) (maxJobTrackerRatio * numTrackers) - item.getNumRunningJob();
    loadStatus.updateJobLoad(jobLoad);
  } catch (Exception e) {
    LOG.error("Couldn't get the new Status",e);
  }
}
 
Example #9
Source File: GenerateData.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobClient client =
    new JobClient(new JobConf(jobCtxt.getConfiguration()));
  ClusterStatus stat = client.getClusterStatus(true);
  final long toGen =
    jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
  if (toGen < 0) {
    throw new IOException("Invalid/missing generation bytes: " + toGen);
  }
  final int nTrackers = stat.getTaskTrackers();
  final long bytesPerTracker = toGen / nTrackers;
  final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
  final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
  final Matcher m = trackerPattern.matcher("");
  for (String tracker : stat.getActiveTrackerNames()) {
    m.reset(tracker);
    if (!m.find()) {
      System.err.println("Skipping node: " + tracker);
      continue;
    }
    final String name = m.group(1);
    splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
  }
  return splits;
}
 
Example #10
Source File: InfrastructureAnalyzer.java    From systemds with Apache License 2.0 6 votes vote down vote up
/**
 * Analyzes properties of hadoop cluster and configuration.
 */
private static void analyzeHadoopCluster() {
	try {
		JobConf job = ConfigurationManager.getCachedJobConf();
		JobClient client = new JobClient(job);
		ClusterStatus stat = client.getClusterStatus();
		if( stat != null ) { //if in cluster mode
			//analyze cluster status
			_remotePar = stat.getTaskTrackers();
			_remoteParMap = stat.getMaxMapTasks(); 
			_remoteParReduce = stat.getMaxReduceTasks(); 
			
			//analyze pure configuration properties
			analyzeHadoopConfiguration();
		}
	} 
	catch (IOException e) {
		throw new RuntimeException("Unable to analyze infrastructure.",e);
	}
}
 
Example #11
Source File: TestCluster.java    From imputationserver with GNU Affero General Public License v3.0 5 votes vote down vote up
public void start() throws IOException {

		File testCluster = new File(WORKING_DIRECTORY);
		if (testCluster.exists()) {
			FileUtil.deleteDirectory(testCluster);
		}
		testCluster.mkdirs();
		
		File testClusterData = new File(WORKING_DIRECTORY + "/data");
		File testClusterLog = new File(WORKING_DIRECTORY + "/logs");

		
		if (cluster == null) {

			conf = new HdfsConfiguration();		
			conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR,
					testClusterData.getAbsolutePath());
			cluster = new MiniDFSCluster.Builder(conf).build();
			fs = cluster.getFileSystem();

			// set mincluster as default config
			HdfsUtil.setDefaultConfiguration(conf);
			System.setProperty("hadoop.log.dir", testClusterLog.getAbsolutePath());
			MiniMRCluster mrCluster = new MiniMRCluster(1, fs.getUri()
					.toString(), 1, null, null, new JobConf(conf));
			JobConf mrClusterConf = mrCluster.createJobConf();
			HdfsUtil.setDefaultConfiguration(new Configuration(mrClusterConf));

			System.out.println("------");

			JobClient client = new JobClient(mrClusterConf);
			ClusterStatus status = client.getClusterStatus(true);
			System.out.println(status.getActiveTrackerNames());
		}
	}
 
Example #12
Source File: Statistics.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void updateAndNotifyClusterStatsListeners(
  ClusterStatus clusterStatus) {
  ClusterStats stats = ClusterStats.getClusterStats();
  stats.setClusterMetric(clusterStatus);
  for (StatListener<ClusterStats> listener : clusterStatlisteners) {
    listener.update(stats);
  }
}
 
Example #13
Source File: Statistics.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void updateAndNotifyClusterStatsListeners(
  ClusterStatus clusterStatus) {
  ClusterStats stats = ClusterStats.getClusterStats();
  stats.setClusterMetric(clusterStatus);
  for (StatListener<ClusterStats> listener : clusterStatlisteners) {
    listener.update(stats);
  }
}
 
Example #14
Source File: RandomWriter.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }
  
  Path outDir = new Path(args[0]);
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

  Job job = new Job(conf);
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomMapper.class);        
  job.setReducerClass(Reducer.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}
 
Example #15
Source File: RandomWriter.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }
  
  Path outDir = new Path(args[0]);
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

  Job job = Job.getInstance(conf);
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomMapper.class);        
  job.setReducerClass(Reducer.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}
 
Example #16
Source File: RandomTextWriter.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  
  JobConf job = new JobConf(getConf());
  
  job.setJarByClass(RandomTextWriter.class);
  job.setJobName("random-text-writer");
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  
  job.setInputFormat(RandomWriter.RandomInputFormat.class);
  job.setMapperClass(Map.class);        
  
  JobClient client = new JobClient(job);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = job.getInt("test.randomtextwrite.maps_per_host", 10);
  long numBytesToWritePerMap = job.getLong("test.randomtextwrite.bytes_per_map",
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have test.randomtextwrite.bytes_per_map set to 0");
    return -2;
  }
  long totalBytesToWrite = job.getLong("test.randomtextwrite.total_bytes", 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    job.setLong("test.randomtextwrite.bytes_per_map", totalBytesToWrite);
  }
  
  Class<? extends OutputFormat> outputFormatClass = 
    SequenceFileOutputFormat.class;
  List<String> otherArgs = new ArrayList<String>();
  for(int i=0; i < args.length; ++i) {
    try {
      if ("-outFormat".equals(args[i])) {
        outputFormatClass = 
          Class.forName(args[++i]).asSubclass(OutputFormat.class);
      } else {
        otherArgs.add(args[i]);
      }
    } catch (ArrayIndexOutOfBoundsException except) {
      System.out.println("ERROR: Required parameter missing from " +
          args[i-1]);
      return printUsage(); // exits
    }
  }

  job.setOutputFormat(outputFormatClass);
  FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
  
  job.setNumMapTasks(numMaps);
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return 0;
}
 
Example #17
Source File: RandomWriter.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return -1;
  }
  
  Path outDir = new Path(args[0]);
  JobConf job = new JobConf(getConf());
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  
  job.setInputFormat(RandomInputFormat.class);
  job.setMapperClass(Map.class);        
  job.setReducerClass(IdentityReducer.class);
  job.setOutputFormat(SequenceFileOutputFormat.class);
  
  JobClient client = new JobClient(job);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
  long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map",
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
    return -2;
  }
  long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
  }
  
  job.setNumMapTasks(numMaps);
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return 0;
}
 
Example #18
Source File: RandomTextWriter.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
  
  Job job = Job.getInstance(conf);
  
  job.setJarByClass(RandomTextWriter.class);
  job.setJobName("random-text-writer");
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  
  job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
  job.setMapperClass(RandomTextMapper.class);        
  
  Class<? extends OutputFormat> outputFormatClass = 
    SequenceFileOutputFormat.class;
  List<String> otherArgs = new ArrayList<String>();
  for(int i=0; i < args.length; ++i) {
    try {
      if ("-outFormat".equals(args[i])) {
        outputFormatClass = 
          Class.forName(args[++i]).asSubclass(OutputFormat.class);
      } else {
        otherArgs.add(args[i]);
      }
    } catch (ArrayIndexOutOfBoundsException except) {
      System.out.println("ERROR: Required parameter missing from " +
          args[i-1]);
      return printUsage(); // exits
    }
  }

  job.setOutputFormatClass(outputFormatClass);
  FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}
 
Example #19
Source File: RandomTextWriter.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
  
  Job job = new Job(conf);
  
  job.setJarByClass(RandomTextWriter.class);
  job.setJobName("random-text-writer");
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  
  job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
  job.setMapperClass(RandomTextMapper.class);        
  
  Class<? extends OutputFormat> outputFormatClass = 
    SequenceFileOutputFormat.class;
  List<String> otherArgs = new ArrayList<String>();
  for(int i=0; i < args.length; ++i) {
    try {
      if ("-outFormat".equals(args[i])) {
        outputFormatClass = 
          Class.forName(args[++i]).asSubclass(OutputFormat.class);
      } else {
        otherArgs.add(args[i]);
      }
    } catch (ArrayIndexOutOfBoundsException except) {
      System.out.println("ERROR: Required parameter missing from " +
          args[i-1]);
      return printUsage(); // exits
    }
  }

  job.setOutputFormatClass(outputFormatClass);
  FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}
 
Example #20
Source File: GenerateDistCacheData.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
  final JobClient client = new JobClient(jobConf);
  ClusterStatus stat = client.getClusterStatus(true);
  int numTrackers = stat.getTaskTrackers();
  final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);

  // Total size of distributed cache files to be generated
  final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
  // Get the path of the special file
  String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
  if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
    throw new RuntimeException("Invalid metadata: #files (" + fileCount
        + "), total_size (" + totalSize + "), filelisturi ("
        + distCacheFileList + ")");
  }

  Path sequenceFile = new Path(distCacheFileList);
  FileSystem fs = sequenceFile.getFileSystem(jobConf);
  FileStatus srcst = fs.getFileStatus(sequenceFile);
  // Consider the number of TTs * mapSlotsPerTracker as number of mappers.
  int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
  int numSplits = numTrackers * numMapSlotsPerTracker;

  List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
  LongWritable key = new LongWritable();
  BytesWritable value = new BytesWritable();

  // Average size of data to be generated by each map task
  final long targetSize = Math.max(totalSize / numSplits,
                            DistributedCacheEmulator.AVG_BYTES_PER_MAP);
  long splitStartPosition = 0L;
  long splitEndPosition = 0L;
  long acc = 0L;
  long bytesRemaining = srcst.getLen();
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
    while (reader.next(key, value)) {

      // If adding this file would put this split past the target size,
      // cut the last split and put this file in the next split.
      if (acc + key.get() > targetSize && acc != 0) {
        long splitSize = splitEndPosition - splitStartPosition;
        splits.add(new FileSplit(
            sequenceFile, splitStartPosition, splitSize, (String[])null));
        bytesRemaining -= splitSize;
        splitStartPosition = splitEndPosition;
        acc = 0L;
      }
      acc += key.get();
      splitEndPosition = reader.getPosition();
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
  if (bytesRemaining != 0) {
    splits.add(new FileSplit(
        sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
  }

  return splits;
}
 
Example #21
Source File: RandomWriter.java    From hadoop-book with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job. It
 * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The
 * reduce doesn't do anything.
 *
 * @throws IOException
 */
public int run(String[] args) throws Exception {
    if (args.length == 0) {
        System.out.println("Usage: writer <out-dir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    Path outDir = new Path(args[0]);
    JobConf job = new JobConf(getConf());

    job.setJarByClass(RandomWriter.class);
    job.setJobName("random-writer");
    FileOutputFormat.setOutputPath(job, outDir);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);

    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(IdentityReducer.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    JobClient client = new JobClient(job);
    ClusterStatus cluster = client.getClusterStatus();
    int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
    long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map",
            1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
        return -2;
    }
    long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes",
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
    }

    job.setNumMapTasks(numMaps);
    System.out.println("Running " + numMaps + " maps.");

    // reducer NONE
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(job);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took "
            + (endTime.getTime() - startTime.getTime()) / 1000
            + " seconds.");

    return 0;
}
 
Example #22
Source File: RandomTextWriter.java    From tez with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
@SuppressWarnings("deprecation")
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
  
  Job job = new Job(conf);
  
  job.setJarByClass(RandomTextWriter.class);
  job.setJobName("random-text-writer");
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  
  job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
  job.setMapperClass(RandomTextMapper.class);        
  
  Class<? extends OutputFormat> outputFormatClass = 
    SequenceFileOutputFormat.class;
  List<String> otherArgs = new ArrayList<String>();
  for(int i=0; i < args.length; ++i) {
    try {
      if ("-outFormat".equals(args[i])) {
        outputFormatClass = 
          Class.forName(args[++i]).asSubclass(OutputFormat.class);
      } else {
        otherArgs.add(args[i]);
      }
    } catch (ArrayIndexOutOfBoundsException except) {
      System.out.println("ERROR: Required parameter missing from " +
          args[i-1]);
      return printUsage(); // exits
    }
  }

  job.setOutputFormatClass(outputFormatClass);
  FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}
 
Example #23
Source File: RandomWriter.java    From tez with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
@SuppressWarnings("deprecation")
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }
  
  Path outDir = new Path(args[0]);
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

  Job job = new Job(conf);
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomMapper.class);        
  job.setReducerClass(Reducer.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}
 
Example #24
Source File: RandomTextWriter.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  
  JobConf job = new JobConf(getConf());
  
  job.setJarByClass(RandomTextWriter.class);
  job.setJobName("random-text-writer");
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  
  job.setInputFormat(RandomWriter.RandomInputFormat.class);
  job.setMapperClass(Map.class);        
  
  JobClient client = new JobClient(job);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = job.getInt("test.randomtextwrite.maps_per_host", 10);
  long numBytesToWritePerMap = job.getLong("test.randomtextwrite.bytes_per_map",
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have test.randomtextwrite.bytes_per_map set to 0");
    return -2;
  }
  long totalBytesToWrite = job.getLong("test.randomtextwrite.total_bytes", 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    job.setLong("test.randomtextwrite.bytes_per_map", totalBytesToWrite);
  }
  
  Class<? extends OutputFormat> outputFormatClass = 
    SequenceFileOutputFormat.class;
  List<String> otherArgs = new ArrayList<String>();
  for(int i=0; i < args.length; ++i) {
    try {
      if ("-outFormat".equals(args[i])) {
        outputFormatClass = 
          Class.forName(args[++i]).asSubclass(OutputFormat.class);
      } else {
        otherArgs.add(args[i]);
      }
    } catch (ArrayIndexOutOfBoundsException except) {
      System.out.println("ERROR: Required parameter missing from " +
          args[i-1]);
      return printUsage(); // exits
    }
  }

  job.setOutputFormat(outputFormatClass);
  FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
  
  job.setNumMapTasks(numMaps);
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return 0;
}
 
Example #25
Source File: RandomWriter.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return -1;
  }
  
  Path outDir = new Path(args[0]);
  JobConf job = new JobConf(getConf());
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  
  job.setInputFormat(RandomInputFormat.class);
  job.setMapperClass(Map.class);        
  job.setReducerClass(IdentityReducer.class);
  job.setOutputFormat(SequenceFileOutputFormat.class);
  
  JobClient client = new JobClient(job);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
  long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map",
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
    return -2;
  }
  long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
  }
  
  job.setNumMapTasks(numMaps);
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return 0;
}
 
Example #26
Source File: RandomTextWriter.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
  
  Job job = Job.getInstance(conf);
  
  job.setJarByClass(RandomTextWriter.class);
  job.setJobName("random-text-writer");
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  
  job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
  job.setMapperClass(RandomTextMapper.class);        
  
  Class<? extends OutputFormat> outputFormatClass = 
    SequenceFileOutputFormat.class;
  List<String> otherArgs = new ArrayList<String>();
  for(int i=0; i < args.length; ++i) {
    try {
      if ("-outFormat".equals(args[i])) {
        outputFormatClass = 
          Class.forName(args[++i]).asSubclass(OutputFormat.class);
      } else {
        otherArgs.add(args[i]);
      }
    } catch (ArrayIndexOutOfBoundsException except) {
      System.out.println("ERROR: Required parameter missing from " +
          args[i-1]);
      return printUsage(); // exits
    }
  }

  job.setOutputFormatClass(outputFormatClass);
  FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}
 
Example #27
Source File: Statistics.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * @param metrics
 */
void setClusterMetric(ClusterStatus metrics) {
  this.status = metrics;
}
 
Example #28
Source File: Statistics.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * @return metrics
 */
public ClusterStatus getStatus() {
  return status;
}
 
Example #29
Source File: Statistics.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * @param metrics
 */
void setClusterMetric(ClusterStatus metrics) {
  this.status = metrics;
}
 
Example #30
Source File: RandomWriter.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }
  
  Path outDir = new Path(args[0]);
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

  Job job = Job.getInstance(conf);
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomMapper.class);        
  job.setReducerClass(Reducer.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}