Java Code Examples for org.apache.hadoop.mapred.JobClient#submitJob()

The following examples show how to use org.apache.hadoop.mapred.JobClient#submitJob() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMiniMRProxyUser.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void mrRun() throws Exception {
  FileSystem fs = FileSystem.get(getJobConf());
  Path inputDir = new Path("input");
  fs.mkdirs(inputDir);
  Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
  writer.write("hello");
  writer.close();

  Path outputDir = new Path("output", "output");

  JobConf jobConf = new JobConf(getJobConf());
  jobConf.setInt("mapred.map.tasks", 1);
  jobConf.setInt("mapred.map.max.attempts", 1);
  jobConf.setInt("mapred.reduce.max.attempts", 1);
  jobConf.set("mapred.input.dir", inputDir.toString());
  jobConf.set("mapred.output.dir", outputDir.toString());

  JobClient jobClient = new JobClient(jobConf);
  RunningJob runJob = jobClient.submitJob(jobConf);
  runJob.waitForCompletion();
  assertTrue(runJob.isComplete());
  assertTrue(runJob.isSuccessful());
}
 
Example 2
Source File: TestMiniMRProxyUser.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void mrRun() throws Exception {
  FileSystem fs = FileSystem.get(getJobConf());
  Path inputDir = new Path("input");
  fs.mkdirs(inputDir);
  Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
  writer.write("hello");
  writer.close();

  Path outputDir = new Path("output", "output");

  JobConf jobConf = new JobConf(getJobConf());
  jobConf.setInt("mapred.map.tasks", 1);
  jobConf.setInt("mapred.map.max.attempts", 1);
  jobConf.setInt("mapred.reduce.max.attempts", 1);
  jobConf.set("mapred.input.dir", inputDir.toString());
  jobConf.set("mapred.output.dir", outputDir.toString());

  JobClient jobClient = new JobClient(jobConf);
  RunningJob runJob = jobClient.submitJob(jobConf);
  runJob.waitForCompletion();
  assertTrue(runJob.isComplete());
  assertTrue(runJob.isSuccessful());
}
 
Example 3
Source File: DataJoinJob.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
/**
 * Submit/run a map/reduce job.
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
  JobClient jc = new JobClient(job);
  boolean sucess = true;
  RunningJob running = null;
  try {
    running = jc.submitJob(job);
    JobID jobId = running.getID();
    System.out.println("Job " + jobId + " is submitted");
    while (!running.isComplete()) {
      System.out.println("Job " + jobId + " is still running.");
      try {
        Thread.sleep(60000);
      } catch (InterruptedException e) {
      }
      running = jc.getJob(jobId);
    }
    sucess = running.isSuccessful();
  } finally {
    if (!sucess && (running != null)) {
      running.killJob();
    }
    jc.close();
  }
  return sucess;
}
 
Example 4
Source File: DataJoinJob.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Submit/run a map/reduce job.
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
  JobClient jc = new JobClient(job);
  boolean sucess = true;
  RunningJob running = null;
  try {
    running = jc.submitJob(job);
    JobID jobId = running.getID();
    System.out.println("Job " + jobId + " is submitted");
    while (!running.isComplete()) {
      System.out.println("Job " + jobId + " is still running.");
      try {
        Thread.sleep(60000);
      } catch (InterruptedException e) {
      }
      running = jc.getJob(jobId);
    }
    sucess = running.isSuccessful();
  } finally {
    if (!sucess && (running != null)) {
      running.killJob();
    }
    jc.close();
  }
  return sucess;
}
 
Example 5
Source File: TestEncryptedShuffle.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void encryptedShuffleWithCerts(boolean useClientCerts)
  throws Exception {
  try {
    Configuration conf = new Configuration();
    String keystoresDir = new File(BASEDIR).getAbsolutePath();
    String sslConfsDir =
      KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
    KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf,
                                    useClientCerts);
    conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
    startCluster(conf);
    FileSystem fs = FileSystem.get(getJobConf());
    Path inputDir = new Path("input");
    fs.mkdirs(inputDir);
    Writer writer =
      new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    writer.write("hello");
    writer.close();

    Path outputDir = new Path("output", "output");

    JobConf jobConf = new JobConf(getJobConf());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);
    jobConf.set("mapred.input.dir", inputDir.toString());
    jobConf.set("mapred.output.dir", outputDir.toString());
    JobClient jobClient = new JobClient(jobConf);
    RunningJob runJob = jobClient.submitJob(jobConf);
    runJob.waitForCompletion();
    Assert.assertTrue(runJob.isComplete());
    Assert.assertTrue(runJob.isSuccessful());
  } finally {
    stopCluster();
  }
}
 
Example 6
Source File: HadoopAlgoRunner.java    From mr4c with Apache License 2.0 5 votes vote down vote up
private void submitJob() throws IOException {
	// most of this method copies JobClient.runJob()
	// addition here is logging the job URI
	JobClient client = new JobClient(m_jobConf);
	RunningJob job = client.submitJob(m_jobConf);
	m_log.info("Job URL is [{}]" , job.getTrackingURL());
	try {
		if ( !client.monitorAndPrintJob(m_jobConf, job) ) {
			throw new IOException("Job failed!");
		}
	} catch (InterruptedException ie ) {
		Thread.currentThread().interrupt();
	}
}
 
Example 7
Source File: TestEncryptedShuffle.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void encryptedShuffleWithCerts(boolean useClientCerts)
  throws Exception {
  try {
    Configuration conf = new Configuration();
    String keystoresDir = new File(BASEDIR).getAbsolutePath();
    String sslConfsDir =
      KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
    KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf,
                                    useClientCerts);
    conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
    startCluster(conf);
    FileSystem fs = FileSystem.get(getJobConf());
    Path inputDir = new Path("input");
    fs.mkdirs(inputDir);
    Writer writer =
      new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    writer.write("hello");
    writer.close();

    Path outputDir = new Path("output", "output");

    JobConf jobConf = new JobConf(getJobConf());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);
    jobConf.set("mapred.input.dir", inputDir.toString());
    jobConf.set("mapred.output.dir", outputDir.toString());
    JobClient jobClient = new JobClient(jobConf);
    RunningJob runJob = jobClient.submitJob(jobConf);
    runJob.waitForCompletion();
    Assert.assertTrue(runJob.isComplete());
    Assert.assertTrue(runJob.isSuccessful());
  } finally {
    stopCluster();
  }
}
 
Example 8
Source File: TestMROldApiJobs.java    From hadoop with Apache License 2.0 5 votes vote down vote up
static boolean runJob(JobConf conf, Path inDir, Path outDir, int numMaps, 
                         int numReds) throws IOException, InterruptedException {

  FileSystem fs = FileSystem.get(conf);
  if (fs.exists(outDir)) {
    fs.delete(outDir, true);
  }
  if (!fs.exists(inDir)) {
    fs.mkdirs(inDir);
  }
  String input = "The quick brown fox\n" + "has many silly\n"
      + "red fox sox\n";
  for (int i = 0; i < numMaps; ++i) {
    DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
    file.writeBytes(input);
    file.close();
  }

  DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf, fs);
  conf.setOutputCommitter(CustomOutputCommitter.class);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputKeyClass(LongWritable.class);
  conf.setOutputValueClass(Text.class);

  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setNumMapTasks(numMaps);
  conf.setNumReduceTasks(numReds);

  JobClient jobClient = new JobClient(conf);
  
  RunningJob job = jobClient.submitJob(conf);
  return jobClient.monitorAndPrintJob(conf, job);
}
 
Example 9
Source File: StreamJob.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
public int submitAndMonitorJob() throws IOException {

    if (jar_ != null && isLocalHadoop()) {
      // getAbs became required when shell and subvm have different working dirs...
      File wd = new File(".").getAbsoluteFile();
      StreamUtil.unJar(new File(jar_), wd);
    }

    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    boolean error = true;
    running_ = null;
    String lastReport = null;
    try {
      running_ = jc_.submitJob(jobConf_);
      jobId_ = running_.getID();

      LOG.info("getLocalDirs(): " + Arrays.asList(jobConf_.getLocalDirs()));
      LOG.info("Running job: " + jobId_);
      jobInfo();

      while (!running_.isComplete()) {
        try {
          Thread.sleep(1000);
        } catch (InterruptedException e) {
        }
        running_ = jc_.getJob(jobId_);
        String report = null;
        report = " map " + Math.round(running_.mapProgress() * 100) + "%  reduce "
          + Math.round(running_.reduceProgress() * 100) + "%";

        if (!report.equals(lastReport)) {
          LOG.info(report);
          lastReport = report;
        }
      }
      if (!running_.isSuccessful()) {
        jobInfo();
	LOG.error("Job not Successful!");
	return 1;
      }
      LOG.info("Job complete: " + jobId_);
      LOG.info("Output: " + output_);
      error = false;
    } catch(FileNotFoundException fe) {
      LOG.error("Error launching job , bad input path : " + fe.getMessage());
      return 2;
    } catch(InvalidJobConfException je) {
      LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
      return 3;
    } catch(FileAlreadyExistsException fae) {
      LOG.error("Error launching job , Output path already exists : " 
                + fae.getMessage());
      return 4;
    } catch(IOException ioe) {
      LOG.error("Error Launching job : " + ioe.getMessage());
      return 5;
    } finally {
      if (error && (running_ != null)) {
        LOG.info("killJob...");
        running_.killJob();
      }
      jc_.close();
    }
    return 0;
  }
 
Example 10
Source File: StreamJob.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public int submitAndMonitorJob() throws IOException {

    if (jar_ != null && isLocalHadoop()) {
      // getAbs became required when shell and subvm have different working dirs...
      File wd = new File(".").getAbsoluteFile();
      RunJar.unJar(new File(jar_), wd);
    }

    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    running_ = null;
    try {
      running_ = jc_.submitJob(jobConf_);
      jobId_ = running_.getID();
      if (background_) {
        LOG.info("Job is running in background.");
      } else if (!jc_.monitorAndPrintJob(jobConf_, running_)) {
        LOG.error("Job not successful!");
        return 1;
      }
      LOG.info("Output directory: " + output_);
    } catch(FileNotFoundException fe) {
      LOG.error("Error launching job , bad input path : " + fe.getMessage());
      return 2;
    } catch(InvalidJobConfException je) {
      LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
      return 3;
    } catch(FileAlreadyExistsException fae) {
      LOG.error("Error launching job , Output path already exists : "
                + fae.getMessage());
      return 4;
    } catch(IOException ioe) {
      LOG.error("Error Launching job : " + ioe.getMessage());
      return 5;
    } catch (InterruptedException ie) {
      LOG.error("Error monitoring job : " + ie.getMessage());
      return 6;
    } finally {
      jc_.close();
    }
    return 0;
  }
 
Example 11
Source File: TestMRAppWithCombiner.java    From hadoop with Apache License 2.0 4 votes vote down vote up
static boolean runJob(JobConf conf) throws Exception {
  JobClient jobClient = new JobClient(conf);
  RunningJob job = jobClient.submitJob(conf);
  return jobClient.monitorAndPrintJob(conf, job);
}
 
Example 12
Source File: TestMRAppWithCombiner.java    From big-c with Apache License 2.0 4 votes vote down vote up
static boolean runJob(JobConf conf) throws Exception {
  JobClient jobClient = new JobClient(conf);
  RunningJob job = jobClient.submitJob(conf);
  return jobClient.monitorAndPrintJob(conf, job);
}
 
Example 13
Source File: TestKeyFieldBasedComparator.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
public void configure(String keySpec, int expect) throws Exception {
  Path testdir = new Path("build/test/test.mapred.spill");
  Path inDir = new Path(testdir, "in");
  Path outDir = new Path(testdir, "out");
  FileSystem fs = getFileSystem();
  fs.delete(testdir, true);
  conf.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(LongWritable.class);

  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(2);

  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
  conf.setKeyFieldComparatorOptions(keySpec);
  conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
  conf.set("map.output.key.field.separator", " ");
  conf.setMapperClass(InverseMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  if (!fs.mkdirs(testdir)) {
    throw new IOException("Mkdirs failed to create " + testdir.toString());
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  // set up input data in 2 files 
  Path inFile = new Path(inDir, "part0");
  FileOutputStream fos = new FileOutputStream(inFile.toString());
  fos.write((line1 + "\n").getBytes());
  fos.write((line2 + "\n").getBytes());
  fos.close();
  JobClient jc = new JobClient(conf);
  RunningJob r_job = jc.submitJob(conf);
  while (!r_job.isComplete()) {
    Thread.sleep(1000);
  }
  
  if (!r_job.isSuccessful()) {
    fail("Oops! The job broke due to an unexpected error");
  }
  Path[] outputFiles = FileUtil.stat2Paths(
      getFileSystem().listStatus(outDir,
      new OutputLogFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
 
Example 14
Source File: StreamJob.java    From big-c with Apache License 2.0 4 votes vote down vote up
public int submitAndMonitorJob() throws IOException {

    if (jar_ != null && isLocalHadoop()) {
      // getAbs became required when shell and subvm have different working dirs...
      File wd = new File(".").getAbsoluteFile();
      RunJar.unJar(new File(jar_), wd);
    }

    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    running_ = null;
    try {
      running_ = jc_.submitJob(jobConf_);
      jobId_ = running_.getID();
      if (background_) {
        LOG.info("Job is running in background.");
      } else if (!jc_.monitorAndPrintJob(jobConf_, running_)) {
        LOG.error("Job not successful!");
        return 1;
      }
      LOG.info("Output directory: " + output_);
    } catch(FileNotFoundException fe) {
      LOG.error("Error launching job , bad input path : " + fe.getMessage());
      return 2;
    } catch(InvalidJobConfException je) {
      LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
      return 3;
    } catch(FileAlreadyExistsException fae) {
      LOG.error("Error launching job , Output path already exists : "
                + fae.getMessage());
      return 4;
    } catch(IOException ioe) {
      LOG.error("Error Launching job : " + ioe.getMessage());
      return 5;
    } catch (InterruptedException ie) {
      LOG.error("Error monitoring job : " + ie.getMessage());
      return 6;
    } finally {
      jc_.close();
    }
    return 0;
  }
 
Example 15
Source File: JobRunner.java    From tracing-framework with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private void runJobContinuously() throws IOException, InterruptedException {
    System.out.printf("Starting MapReduce %s runner for tenant %d\n", job.getClass().getSimpleName(), tenantClass);
    this.status = "Configuring";

    // Set the HDFS variables in the config
    Configuration conf = new Configuration();
    conf.set("yarn.resourcemanager.hostname", ConfigFactory.load().getString("mapreduce-generator.yarn-resourcemanager-hostname"));
    conf.set("mapreduce.framework.name", "yarn");
    conf.set("fs.defaultFS", ConfigFactory.load().getString("mapreduce-generator.hdfs-namenode-url"));

    // Create a job config and get the job to populate it
    JobConf jobconf = new JobConf(conf);
    job.configure(jobconf);

    // Populate the input data if needed
    this.status = "Initializing input data";
    FileSystem fs = FileSystem.get(conf);
    job.initialize(fs);

    // Now start running the job in a loop
    while (!Thread.currentThread().isInterrupted()) {
        this.status = "Cleaning up Job #" + count;
        // Clear any previous xtrace context
        Baggage.stop();

        // Clean up previous output if necessary
        job.teardown(fs);

        // Set the xtrace metadata for the new job
        boolean sampled = numToSample.getAndDecrement() > 0;
        if (sampled) {
            XTrace.startTask(true);
            xtrace.log("Starting job");
        } else {
            numToSample.getAndIncrement();
        }
        Retro.setTenant(tenantClass);

        // Run the job
        try {
            this.status = "Running Job #" + (count++);
            JobClient jc = new JobClient(jobconf);
            rj = jc.submitJob(jobconf);
            rj.waitForCompletion();
            System.out.println("Job Complete");
            System.out.println(rj);
            rj = null;
        } finally {
            // Log the end of the job and clear the metadata
            if (sampled)
                xtrace.log("Job complete");
            Baggage.stop();
        }
    }
    throw new InterruptedException("JobRunner interrupted");
}
 
Example 16
Source File: TestKeyFieldBasedComparator.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public void configure(String keySpec, int expect) throws Exception {
  Path testdir = new Path(TEST_DIR.getAbsolutePath());
  Path inDir = new Path(testdir, "in");
  Path outDir = new Path(testdir, "out");
  FileSystem fs = getFileSystem();
  fs.delete(testdir, true);
  conf.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(LongWritable.class);

  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(1);

  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
  conf.setKeyFieldComparatorOptions(keySpec);
  conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
  conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
  conf.setMapperClass(InverseMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  if (!fs.mkdirs(testdir)) {
    throw new IOException("Mkdirs failed to create " + testdir.toString());
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  // set up input data in 2 files 
  Path inFile = new Path(inDir, "part0");
  FileOutputStream fos = new FileOutputStream(inFile.toString());
  fos.write((line1 + "\n").getBytes());
  fos.write((line2 + "\n").getBytes());
  fos.close();
  JobClient jc = new JobClient(conf);
  RunningJob r_job = jc.submitJob(conf);
  while (!r_job.isComplete()) {
    Thread.sleep(1000);
  }
  
  if (!r_job.isSuccessful()) {
    fail("Oops! The job broke due to an unexpected error");
  }
  Path[] outputFiles = FileUtil.stat2Paths(
      getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
 
Example 17
Source File: StreamJob.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public int submitAndMonitorJob() throws IOException {

    if (jar_ != null && isLocalHadoop()) {
      // getAbs became required when shell and subvm have different working dirs...
      File wd = new File(".").getAbsoluteFile();
      StreamUtil.unJar(new File(jar_), wd);
    }

    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    boolean error = true;
    running_ = null;
    String lastReport = null;
    try {
      running_ = jc_.submitJob(jobConf_);
      jobId_ = running_.getID();

      LOG.info("getLocalDirs(): " + Arrays.asList(jobConf_.getLocalDirs()));
      LOG.info("Running job: " + jobId_);
      jobInfo();

      while (!running_.isComplete()) {
        try {
          Thread.sleep(1000);
        } catch (InterruptedException e) {
        }
        running_ = jc_.getJob(jobId_);
        String report = null;
        report = " map " + Math.round(running_.mapProgress() * 100) + "%  reduce "
          + Math.round(running_.reduceProgress() * 100) + "%";

        if (!report.equals(lastReport)) {
          LOG.info(report);
          lastReport = report;
        }
      }
      if (!running_.isSuccessful()) {
        jobInfo();
	LOG.error("Job not Successful!");
	return 1;
      }
      LOG.info("Job complete: " + jobId_);
      LOG.info("Output: " + output_);
      error = false;
    } catch(FileNotFoundException fe) {
      LOG.error("Error launching job , bad input path : " + fe.getMessage());
      return 2;
    } catch(InvalidJobConfException je) {
      LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
      return 3;
    } catch(FileAlreadyExistsException fae) {
      LOG.error("Error launching job , Output path already exists : " 
                + fae.getMessage());
      return 4;
    } catch(IOException ioe) {
      LOG.error("Error Launching job : " + ioe.getMessage());
      return 5;
    } finally {
      if (error && (running_ != null)) {
        LOG.info("killJob...");
        running_.killJob();
      }
      jc_.close();
    }
    return 0;
  }
 
Example 18
Source File: TestKeyFieldBasedComparator.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public void configure(String keySpec, int expect) throws Exception {
  Path testdir = new Path("build/test/test.mapred.spill");
  Path inDir = new Path(testdir, "in");
  Path outDir = new Path(testdir, "out");
  FileSystem fs = getFileSystem();
  fs.delete(testdir, true);
  conf.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(LongWritable.class);

  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(2);

  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
  conf.setKeyFieldComparatorOptions(keySpec);
  conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
  conf.set("map.output.key.field.separator", " ");
  conf.setMapperClass(InverseMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  if (!fs.mkdirs(testdir)) {
    throw new IOException("Mkdirs failed to create " + testdir.toString());
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  // set up input data in 2 files 
  Path inFile = new Path(inDir, "part0");
  FileOutputStream fos = new FileOutputStream(inFile.toString());
  fos.write((line1 + "\n").getBytes());
  fos.write((line2 + "\n").getBytes());
  fos.close();
  JobClient jc = new JobClient(conf);
  RunningJob r_job = jc.submitJob(conf);
  while (!r_job.isComplete()) {
    Thread.sleep(1000);
  }
  
  if (!r_job.isSuccessful()) {
    fail("Oops! The job broke due to an unexpected error");
  }
  Path[] outputFiles = FileUtil.stat2Paths(
      getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
 
Example 19
Source File: StreamJob.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public int submitAndMonitorJob() throws IOException {

    if (jar_ != null && isLocalHadoop()) {
      // getAbs became required when shell and subvm have different working dirs...
      File wd = new File(".").getAbsoluteFile();
      StreamUtil.unJar(new File(jar_), wd);
    }

    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    boolean error = true;
    running_ = null;
    String lastReport = null;
    try {
      running_ = jc_.submitJob(jobConf_);
      jobId_ = running_.getJobID();

      LOG.info("getLocalDirs(): " + Arrays.asList(jobConf_.getLocalDirs()));
      LOG.info("Running job: " + jobId_);
      jobInfo();

      while (!running_.isComplete()) {
        try {
          Thread.sleep(1000);
        } catch (InterruptedException e) {
        }
        running_ = jc_.getJob(jobId_);
        String report = null;
        report = " map " + Math.round(running_.mapProgress() * 100) + "%  reduce "
          + Math.round(running_.reduceProgress() * 100) + "%";

        if (!report.equals(lastReport)) {
          LOG.info(report);
          lastReport = report;
        }
      }
      if (!running_.isSuccessful()) {
        jobInfo();
	LOG.error("Job not Successful!");
	return 1;
      }
      LOG.info("Job complete: " + jobId_);
      LOG.info("Output: " + output_);
      error = false;
    } catch(FileNotFoundException fe) {
      LOG.error("Error launching job , bad input path : " + fe.getMessage());
      return 2;
    } catch(InvalidJobConfException je) {
      LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
      return 3;
    } catch(FileAlreadyExistsException fae) {
      LOG.error("Error launching job , Output path already exists : "
                + fae.getMessage());
      return 4;
    } catch(IOException ioe) {
      LOG.error("Error Launching job : " + ioe.getMessage());
      return 5;
    } finally {
      if (error && (running_ != null)) {
        LOG.info("killJob...");
        running_.killJob();
      }
      jc_.close();
    }
    return 0;
  }
 
Example 20
Source File: DataFsck.java    From RDFS with Apache License 2.0 4 votes vote down vote up
List<JobContext> submitJobs(BufferedReader inputReader, int filesPerJob) throws IOException {
  boolean done = false;
  JobClient jClient = new JobClient(createJobConf());
  List<JobContext> submitted = new ArrayList<JobContext>();
  Random rand = new Random();
  do {
    JobConf jobConf = createJobConf();
    final String randomId = Integer.toString(rand.nextInt(Integer.MAX_VALUE), 36);
    Path jobDir = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDir.toString());
    Path log = new Path(jobDir, "_logs");
    FileOutputFormat.setOutputPath(jobConf, log);
    LOG.info("log=" + log);

    // create operation list
    FileSystem fs = jobDir.getFileSystem(jobConf);
    Path opList = new Path(jobDir, "_" + OP_LIST_LABEL);
    jobConf.set(OP_LIST_LABEL, opList.toString());
    int opCount = 0, synCount = 0;
    SequenceFile.Writer opWriter = null;

    try {
      opWriter = SequenceFile.createWriter(fs, jobConf, opList, Text.class,
          Text.class, SequenceFile.CompressionType.NONE);
      String f = null;
      do {
        f = inputReader.readLine();
        if (f == null) {
          done = true;
          break;
        }
        opWriter.append(new Text(f), new Text(f));
        opCount++;
        if (++synCount > SYNC_FILE_MAX) {
          opWriter.sync();
          synCount = 0;
        }
      } while (opCount < filesPerJob);
    } finally {
      if (opWriter != null) {
        opWriter.close();
      }
      fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file
    }

    jobConf.setInt(OP_COUNT_LABEL, opCount);
    RunningJob rJob = jClient.submitJob(jobConf);
    JobContext ctx = new JobContext(rJob, jobConf);
    submitted.add(ctx);
  } while (!done);

  return submitted;
}