Java Code Examples for org.apache.hadoop.mapred.TaskLog#captureOutAndError()

The following examples show how to use org.apache.hadoop.mapred.TaskLog#captureOutAndError() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Application.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Start the child process to handle the task for us.
 * @param conf the task's configuration
 * @param recordReader the fake record reader to update progress with
 * @param output the collector to send output to
 * @param reporter the reporter for the task
 * @param outputKeyClass the class of the output keys
 * @param outputValueClass the class of the output values
 * @throws IOException
 * @throws InterruptedException
 */
Application(JobConf conf, 
            RecordReader<FloatWritable, NullWritable> recordReader, 
            OutputCollector<K2,V2> output, Reporter reporter,
            Class<? extends K2> outputKeyClass,
            Class<? extends V2> outputValueClass
            ) throws IOException, InterruptedException {
  serverSocket = new ServerSocket(0);
  Map<String, String> env = new HashMap<String,String>();
  // add TMPDIR environment variable with the value of java.io.tmpdir
  env.put("TMPDIR", System.getProperty("java.io.tmpdir"));
  env.put(Submitter.PORT, 
          Integer.toString(serverSocket.getLocalPort()));
  
  //Add token to the environment if security is enabled
  Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(conf
      .getCredentials());
  // This password is used as shared secret key between this application and
  // child pipes process
  byte[]  password = jobToken.getPassword();
  String localPasswordFile = new File(".") + Path.SEPARATOR
      + "jobTokenPassword";
  writePasswordToLocalFile(localPasswordFile, password, conf);
  env.put("hadoop.pipes.shared.secret.location", localPasswordFile);
 
  List<String> cmd = new ArrayList<String>();
  String interpretor = conf.get(Submitter.INTERPRETOR);
  if (interpretor != null) {
    cmd.add(interpretor);
  }
  String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
  if (!FileUtil.canExecute(new File(executable))) {
    // LinuxTaskController sets +x permissions on all distcache files already.
    // In case of DefaultTaskController, set permissions here.
    FileUtil.chmod(executable, "u+x");
  }
  cmd.add(executable);
  // wrap the command in a stdout/stderr capture
  // we are starting map/reduce task of the pipes job. this is not a cleanup
  // attempt. 
  TaskAttemptID taskid = 
    TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID));
  File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
  File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
  long logLength = TaskLog.getTaskLogLength(conf);
  cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength,
                                   false);
  
  process = runClient(cmd, env);
  clientSocket = serverSocket.accept();
  
  String challenge = getSecurityChallenge();
  String digestToSend = createDigest(password, challenge);
  String digestExpected = createDigest(password, digestToSend);
  
  handler = new OutputHandler<K2, V2>(output, reporter, recordReader, 
      digestExpected);
  K2 outputKey = (K2)
    ReflectionUtils.newInstance(outputKeyClass, conf);
  V2 outputValue = (V2) 
    ReflectionUtils.newInstance(outputValueClass, conf);
  downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, 
                                outputKey, outputValue, conf);
  
  downlink.authenticate(digestToSend, challenge);
  waitForAuthentication();
  LOG.debug("Authentication succeeded");
  downlink.start();
  downlink.setJobConf(conf);
}
 
Example 2
Source File: Application.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Start the child process to handle the task for us.
 * @param conf the task's configuration
 * @param recordReader the fake record reader to update progress with
 * @param output the collector to send output to
 * @param reporter the reporter for the task
 * @param outputKeyClass the class of the output keys
 * @param outputValueClass the class of the output values
 * @throws IOException
 * @throws InterruptedException
 */
Application(JobConf conf, 
            RecordReader<FloatWritable, NullWritable> recordReader, 
            OutputCollector<K2,V2> output, Reporter reporter,
            Class<? extends K2> outputKeyClass,
            Class<? extends V2> outputValueClass
            ) throws IOException, InterruptedException {
  serverSocket = new ServerSocket(0);
  Map<String, String> env = new HashMap<String,String>();
  // add TMPDIR environment variable with the value of java.io.tmpdir
  env.put("TMPDIR", System.getProperty("java.io.tmpdir"));
  env.put(Submitter.PORT, 
          Integer.toString(serverSocket.getLocalPort()));
  
  //Add token to the environment if security is enabled
  Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(conf
      .getCredentials());
  // This password is used as shared secret key between this application and
  // child pipes process
  byte[]  password = jobToken.getPassword();
  String localPasswordFile = new File(".") + Path.SEPARATOR
      + "jobTokenPassword";
  writePasswordToLocalFile(localPasswordFile, password, conf);
  env.put("hadoop.pipes.shared.secret.location", localPasswordFile);
 
  List<String> cmd = new ArrayList<String>();
  String interpretor = conf.get(Submitter.INTERPRETOR);
  if (interpretor != null) {
    cmd.add(interpretor);
  }
  String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
  if (!FileUtil.canExecute(new File(executable))) {
    // LinuxTaskController sets +x permissions on all distcache files already.
    // In case of DefaultTaskController, set permissions here.
    FileUtil.chmod(executable, "u+x");
  }
  cmd.add(executable);
  // wrap the command in a stdout/stderr capture
  // we are starting map/reduce task of the pipes job. this is not a cleanup
  // attempt. 
  TaskAttemptID taskid = 
    TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID));
  File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
  File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
  long logLength = TaskLog.getTaskLogLength(conf);
  cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength,
                                   false);
  
  process = runClient(cmd, env);
  clientSocket = serverSocket.accept();
  
  String challenge = getSecurityChallenge();
  String digestToSend = createDigest(password, challenge);
  String digestExpected = createDigest(password, digestToSend);
  
  handler = new OutputHandler<K2, V2>(output, reporter, recordReader, 
      digestExpected);
  K2 outputKey = (K2)
    ReflectionUtils.newInstance(outputKeyClass, conf);
  V2 outputValue = (V2) 
    ReflectionUtils.newInstance(outputValueClass, conf);
  downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, 
                                outputKey, outputValue, conf);
  
  downlink.authenticate(digestToSend, challenge);
  waitForAuthentication();
  LOG.debug("Authentication succeeded");
  downlink.start();
  downlink.setJobConf(conf);
}
 
Example 3
Source File: Application.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Start the child process to handle the task for us.
 * @param conf the task's configuration
 * @param recordReader the fake record reader to update progress with
 * @param output the collector to send output to
 * @param reporter the reporter for the task
 * @param outputKeyClass the class of the output keys
 * @param outputValueClass the class of the output values
 * @throws IOException
 * @throws InterruptedException
 */
Application(JobConf conf, 
            RecordReader<FloatWritable, NullWritable> recordReader, 
            OutputCollector<K2,V2> output, Reporter reporter,
            Class<? extends K2> outputKeyClass,
            Class<? extends V2> outputValueClass
            ) throws IOException, InterruptedException {
  serverSocket = new ServerSocket(0);
  Map<String, String> env = new HashMap<String,String>();
  // add TMPDIR environment variable with the value of java.io.tmpdir
  env.put("TMPDIR", System.getProperty("java.io.tmpdir"));
  env.put("hadoop.pipes.command.port", 
          Integer.toString(serverSocket.getLocalPort()));
  List<String> cmd = new ArrayList<String>();
  String interpretor = conf.get("hadoop.pipes.executable.interpretor");
  if (interpretor != null) {
    cmd.add(interpretor);
  }

  String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
  FileUtil.chmod(executable, "a+x");
  cmd.add(executable);
  // wrap the command in a stdout/stderr capture
  TaskAttemptID taskid = TaskAttemptID.forName(conf.get("mapred.task.id"));
  File stdout = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDOUT);
  File stderr = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDERR);
  long logLength = TaskLog.getTaskLogLength(conf);
  cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength,
      false);

  process = runClient(cmd, env);
  clientSocket = serverSocket.accept();
  handler = new OutputHandler<K2, V2>(output, reporter, recordReader);
  K2 outputKey = (K2)
    ReflectionUtils.newInstance(outputKeyClass, conf);
  V2 outputValue = (V2) 
    ReflectionUtils.newInstance(outputValueClass, conf);
  downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, 
                                outputKey, outputValue, conf);
  downlink.start();
  downlink.setJobConf(conf);
}
 
Example 4
Source File: Application.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
/**
 * Start the child process to handle the task for us. 
 * @param conf the task's configuration
 * @param recordReader the fake record reader to update progress with
 * @param output the collector to send output to
 * @param reporter the reporter for the task
 * @param outputKeyClass the class of the output keys
 * @param outputValueClass the class of the output values
 * @param runOnGPU 
 * @throws IOException
 * @throws InterruptedException
 */
Application(JobConf conf, 
            RecordReader<FloatWritable, NullWritable> recordReader, 
            OutputCollector<K2,V2> output, Reporter reporter,
            Class<? extends K2> outputKeyClass,
            Class<? extends V2> outputValueClass,
            boolean runOnGPU
            ) throws IOException, InterruptedException {
  serverSocket = new ServerSocket(0);
  Map<String, String> env = new HashMap<String,String>();
  // add TMPDIR environment variable with the value of java.io.tmpdir
  env.put("TMPDIR", System.getProperty("java.io.tmpdir"));
  env.put("hadoop.pipes.command.port", 
          Integer.toString(serverSocket.getLocalPort()));
  List<String> cmd = new ArrayList<String>();
  String interpretor = conf.get("hadoop.pipes.executable.interpretor");
  if (interpretor != null) {
    cmd.add(interpretor);
  }

  // Check whether the applicaiton will run on GPU
  int i = runOnGPU ? 1 : 0;
  String executable = DistributedCache.getLocalCacheFiles(conf)[i].toString();
  FileUtil.chmod(executable, "a+x");
  cmd.add(executable);
  // wrap the command in a stdout/stderr capture
  TaskAttemptID taskid = TaskAttemptID.forName(conf.get("mapred.task.id"));
  File stdout = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDOUT);
  File stderr = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDERR);
  long logLength = TaskLog.getTaskLogLength(conf);
  cmd = TaskLog.captureOutAndError(cmd, stdout, stderr, logLength);

  process = runClient(cmd, env);
  clientSocket = serverSocket.accept();
  handler = new OutputHandler<K2, V2>(output, reporter, recordReader);
  K2 outputKey = (K2)
    ReflectionUtils.newInstance(outputKeyClass, conf);
  V2 outputValue = (V2) 
    ReflectionUtils.newInstance(outputValueClass, conf);
  downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, 
                                outputKey, outputValue, conf);
  downlink.start();
  downlink.setJobConf(conf);
}