Java Code Examples for org.apache.hadoop.mapreduce.Job#getJobID()

The following examples show how to use org.apache.hadoop.mapreduce.Job#getJobID() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: FastIndex.java From ES-Fastloader with Apache License 2.0

6 votes

private void startGetJobIdThread(Job job) {
    Thread thread = new Thread(new Runnable() {
        @Override
        public void run() {
            while (true) {
                if (job.getJobID() != null) {
                    LogUtils.info("map reducer jobId:" + job.getJobID());
                    break;
                }

                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }


        }
    });

    thread.setDaemon(true);
    thread.setName("getJobId");
    thread.start();
}

Example 2

Source File: S3MapReduceCpCopier.java From circus-train with Apache License 2.0

6 votes

@Override
public Metrics copy() throws CircusTrainException {
  LOG.info("Copying table data.");
  LOG.debug("Invoking S3MapReduceCp: {} -> {}", sourceDataBaseLocation, replicaDataLocation);

  S3MapReduceCpOptions s3MapReduceCpOptions = parseCopierOptions(copierOptions);
  LOG.debug("Invoking S3MapReduceCp with options: {}", s3MapReduceCpOptions);

  try {
    Enum<?> counter = Counter.BYTESCOPIED;
    Job job = executor.exec(conf, s3MapReduceCpOptions);
    registerRunningJobMetrics(job, counter);
    if (!job.waitForCompletion(true)) {
      throw new IOException(
          "S3MapReduceCp failure: Job " + job.getJobID() + " has failed: " + job.getStatus().getFailureInfo());
    }

    return new JobMetrics(job, counter);
  } catch (Exception e) {
    cleanUpReplicaDataLocation();
    throw new CircusTrainException("Unable to copy file(s)", e);
  }
}

Example 3

Source File: Statistics.java From hadoop with Apache License 2.0

6 votes

/**
 * Generates a job stats.
 */
public static JobStats generateJobStats(Job job, JobStory jobdesc) {
  int seq = GridmixJob.getJobSeqId(job);
  // bail out if job description is missing for a job to be simulated
  if (seq >= 0 && jobdesc == null) {
    throw new IllegalArgumentException("JobStory not available for job " 
                                       + job.getJobID());
  }
  
  int maps = -1;
  int reds = -1;
  if (jobdesc != null) {
    // Note that the ZombieJob will return a >= 0 value
    maps = jobdesc.getNumberMaps();
    reds = jobdesc.getNumberReduces();
  }
  return new JobStats(maps, reds, job);
}

Example 4

Source File: Statistics.java From big-c with Apache License 2.0

6 votes

/**
 * Generates a job stats.
 */
public static JobStats generateJobStats(Job job, JobStory jobdesc) {
  int seq = GridmixJob.getJobSeqId(job);
  // bail out if job description is missing for a job to be simulated
  if (seq >= 0 && jobdesc == null) {
    throw new IllegalArgumentException("JobStory not available for job " 
                                       + job.getJobID());
  }
  
  int maps = -1;
  int reds = -1;
  if (jobdesc != null) {
    // Note that the ZombieJob will return a >= 0 value
    maps = jobdesc.getNumberMaps();
    reds = jobdesc.getNumberReduces();
  }
  return new JobStats(maps, reds, job);
}

Example 5

Source File: ConfigurableHDFSFileSink.java From components with Apache License 2.0

6 votes

@Override
public void open(String uId) throws Exception {
    this.hash = uId.hashCode();

    Job job = ((ConfigurableHDFSFileSink<K, V>) getWriteOperation().getSink()).jobInstance();
    FileOutputFormat.setOutputPath(job, new Path(path));

    // Each Writer is responsible for writing one bundle of elements and is represented by one
    // unique Hadoop task based on uId/hash. All tasks share the same job ID. Since Dataflow
    // handles retrying of failed bundles, each task has one attempt only.
    JobID jobId = job.getJobID();
    TaskID taskId = new TaskID(jobId, TaskType.REDUCE, hash);
    configure(job);
    context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(taskId, 0));

    FileOutputFormat<K, V> outputFormat = formatClass.newInstance();
    recordWriter = outputFormat.getRecordWriter(context);
    outputCommitter = (FileOutputCommitter) outputFormat.getOutputCommitter(context);
}

Example 6

Source File: HadoopJobStatusChecker.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

public static JobStepStatusEnum checkStatus(Job job, StringBuilder output) {
    if (job == null || job.getJobID() == null) {
        output.append("Skip status check with empty job id..\n");
        return JobStepStatusEnum.WAITING;
    }

    JobStepStatusEnum status = null;
    try {
        switch (job.getStatus().getState()) {
        case SUCCEEDED:
            status = JobStepStatusEnum.FINISHED;
            break;
        case FAILED:
            status = JobStepStatusEnum.ERROR;
            break;
        case KILLED:
            status = JobStepStatusEnum.KILLED;
            break;
        case RUNNING:
            status = JobStepStatusEnum.RUNNING;
            break;
        case PREP:
            status = JobStepStatusEnum.WAITING;
            break;
        default:
            throw new IllegalStateException();
        }
    } catch (Exception e) {
        logger.error("error check status", e);
        output.append("Exception: ").append(e.getLocalizedMessage()).append("\n");
        status = JobStepStatusEnum.ERROR;
    }

    return status;
}

Example 7

Source File: DistCpCopier.java From circus-train with Apache License 2.0

5 votes

@Override
public Metrics copy() throws CircusTrainException {
  LOG.info("Copying table data.");
  LOG.debug("Invoking DistCp: {} -> {}", sourceDataBaseLocation, replicaDataLocation);

  DistCpOptions distCpOptions = parseCopierOptions(copierOptions);
  LOG.debug("Invoking DistCp with options: {}", distCpOptions);

  CircusTrainCopyListing.setAsCopyListingClass(conf);
  CircusTrainCopyListing.setRootPath(conf, sourceDataBaseLocation);

  try {
    distCpOptions.setBlocking(false);
    Job job = executor.exec(conf, distCpOptions);
    String counter = String
        .format("%s_BYTES_WRITTEN", replicaDataLocation.toUri().getScheme().toUpperCase(Locale.ROOT));
    registerRunningJobMetrics(job, counter);
    if (!job.waitForCompletion(true)) {
      throw new IOException(
          "DistCp failure: Job " + job.getJobID() + " has failed: " + job.getStatus().getFailureInfo());
    }

    return new JobMetrics(job, FileSystemCounter.class.getName(), counter);
  } catch (Exception e) {
    cleanUpReplicaDataLocation();
    throw new CircusTrainException("Unable to copy file(s)", e);
  }
}

Example 8

Source File: HadoopJobStatusChecker.java From kylin with Apache License 2.0

5 votes

public static JobStepStatusEnum checkStatus(Job job, StringBuilder output) {
    if (job == null || job.getJobID() == null) {
        output.append("Skip status check with empty job id..\n");
        return JobStepStatusEnum.WAITING;
    }

    JobStepStatusEnum status = null;
    try {
        switch (job.getStatus().getState()) {
        case SUCCEEDED:
            status = JobStepStatusEnum.FINISHED;
            break;
        case FAILED:
            status = JobStepStatusEnum.ERROR;
            break;
        case KILLED:
            status = JobStepStatusEnum.KILLED;
            break;
        case RUNNING:
            status = JobStepStatusEnum.RUNNING;
            break;
        case PREP:
            status = JobStepStatusEnum.WAITING;
            break;
        default:
            throw new IllegalStateException();
        }
    } catch (Exception e) {
        logger.error("error check status", e);
        output.append("Exception: ").append(e.getLocalizedMessage()).append("\n");
        status = JobStepStatusEnum.ERROR;
    }

    return status;
}

Example 9

Source File: GraphXEdgeInputFormatTest.java From rya with Apache License 2.0

4 votes

@SuppressWarnings("rawtypes")
@Test
public void testInputFormat() throws Exception {
    RyaStatement input = RyaStatement.builder()
        .setSubject(new RyaIRI("http://www.google.com"))
        .setPredicate(new RyaIRI("http://some_other_uri"))
        .setObject(new RyaIRI("http://www.yahoo.com"))
        .setColumnVisibility(new byte[0])
        .setValue(new byte[0])
        .build();

    apiImpl.add(input);

    Job jobConf = Job.getInstance();

    GraphXEdgeInputFormat.setMockInstance(jobConf, instance.getInstanceName());
    GraphXEdgeInputFormat.setConnectorInfo(jobConf, username, password);
    GraphXEdgeInputFormat.setTableLayout(jobConf, TABLE_LAYOUT.SPO);
    GraphXEdgeInputFormat.setInputTableName(jobConf, table);
    GraphXEdgeInputFormat.setInputTableName(jobConf, table);

    GraphXEdgeInputFormat.setScanIsolation(jobConf, false);
    GraphXEdgeInputFormat.setLocalIterators(jobConf, false);
    GraphXEdgeInputFormat.setOfflineTableScan(jobConf, false);

    GraphXEdgeInputFormat inputFormat = new GraphXEdgeInputFormat();

    JobContext context = new JobContextImpl(jobConf.getConfiguration(), jobConf.getJobID());

    List<InputSplit> splits = inputFormat.getSplits(context);

    Assert.assertEquals(1, splits.size());

    TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(context.getConfiguration(), new TaskAttemptID(new TaskID(), 1));

    RecordReader reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext);

    RecordReader ryaStatementRecordReader = (RecordReader) reader;
    ryaStatementRecordReader.initialize(splits.get(0), taskAttemptContext);

    List<Edge> results = new ArrayList<Edge>();
    while(ryaStatementRecordReader.nextKeyValue()) {
        Edge writable = (Edge) ryaStatementRecordReader.getCurrentValue();
        long srcId = writable.srcId();
        long destId = writable.dstId();
        RyaTypeWritable rtw = null;
        Object text = ryaStatementRecordReader.getCurrentKey();
        Edge<RyaTypeWritable> edge = new Edge<RyaTypeWritable>(srcId, destId, rtw);
        results.add(edge);

        System.out.println(text);
    }

    System.out.println(results.size());
    System.out.println(results);
    Assert.assertTrue(results.size() == 2);
}

Example 10

Source File: RyaInputFormatTest.java From rya with Apache License 2.0

4 votes

@Test
public void testInputFormat() throws Exception {


    RyaStatement input = RyaStatement.builder()
        .setSubject(new RyaIRI("http://www.google.com"))
        .setPredicate(new RyaIRI("http://some_other_uri"))
        .setObject(new RyaIRI("http://www.yahoo.com"))
        .setColumnVisibility(new byte[0])
        .setValue(new byte[0])
        .build();

    apiImpl.add(input);

    Job jobConf = Job.getInstance();

    RyaInputFormat.setMockInstance(jobConf, instance.getInstanceName());
    RyaInputFormat.setConnectorInfo(jobConf, username, password);
    RyaInputFormat.setTableLayout(jobConf, TABLE_LAYOUT.SPO);

    AccumuloInputFormat.setInputTableName(jobConf, table);
    AccumuloInputFormat.setInputTableName(jobConf, table);
    AccumuloInputFormat.setScanIsolation(jobConf, false);
    AccumuloInputFormat.setLocalIterators(jobConf, false);
    AccumuloInputFormat.setOfflineTableScan(jobConf, false);

    RyaInputFormat inputFormat = new RyaInputFormat();

    JobContext context = new JobContextImpl(jobConf.getConfiguration(), jobConf.getJobID());

    List<InputSplit> splits = inputFormat.getSplits(context);

    Assert.assertEquals(1, splits.size());

    TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(context.getConfiguration(), new TaskAttemptID(new TaskID(), 1));

    RecordReader<Text, RyaStatementWritable> reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext);

    RyaStatementRecordReader ryaStatementRecordReader = (RyaStatementRecordReader)reader;
    ryaStatementRecordReader.initialize(splits.get(0), taskAttemptContext);

    List<RyaStatement> results = new ArrayList<RyaStatement>();
    while(ryaStatementRecordReader.nextKeyValue()) {
        RyaStatementWritable writable = ryaStatementRecordReader.getCurrentValue();
        RyaStatement value = writable.getRyaStatement();
        Text text = ryaStatementRecordReader.getCurrentKey();
        RyaStatement stmt = RyaStatement.builder()
            .setSubject(value.getSubject())
            .setPredicate(value.getPredicate())
            .setObject(value.getObject())
            .setContext(value.getContext())
            .setQualifier(value.getQualifer())
            .setColumnVisibility(value.getColumnVisibility())
            .setValue(value.getValue())
            .build();
        results.add(stmt);

        System.out.println(text);
        System.out.println(value);
    }

    Assert.assertTrue(results.size() == 2);
    Assert.assertTrue(results.contains(input));
}

Example 11

Source File: GraphXInputFormatTest.java From rya with Apache License 2.0

4 votes

@Test
    public void testInputFormat() throws Exception {
        final RyaStatement input = RyaStatement.builder()
            .setSubject(new RyaIRI("http://www.google.com"))
            .setPredicate(new RyaIRI("http://some_other_uri"))
            .setObject(new RyaIRI("http://www.yahoo.com"))
            .setColumnVisibility(new byte[0])
            .setValue(new byte[0])
            .build();

        apiImpl.add(input);

        final Job jobConf = Job.getInstance();

        GraphXInputFormat.setMockInstance(jobConf, instance.getInstanceName());
        GraphXInputFormat.setConnectorInfo(jobConf, username, password);
        GraphXInputFormat.setInputTableName(jobConf, table);
        GraphXInputFormat.setInputTableName(jobConf, table);

        GraphXInputFormat.setScanIsolation(jobConf, false);
        GraphXInputFormat.setLocalIterators(jobConf, false);
        GraphXInputFormat.setOfflineTableScan(jobConf, false);

        final GraphXInputFormat inputFormat = new GraphXInputFormat();

        final JobContext context = new JobContextImpl(jobConf.getConfiguration(), jobConf.getJobID());

        final List<InputSplit> splits = inputFormat.getSplits(context);

        Assert.assertEquals(1, splits.size());

        final TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(context.getConfiguration(), new TaskAttemptID(new TaskID(), 1));

        final RecordReader<Object, RyaTypeWritable> reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext);

        final RyaStatementRecordReader ryaStatementRecordReader = (RyaStatementRecordReader)reader;
        ryaStatementRecordReader.initialize(splits.get(0), taskAttemptContext);

        final List<RyaType> results = new ArrayList<RyaType>();
        System.out.println("before while");
        while(ryaStatementRecordReader.nextKeyValue()) {
            System.out.println("in while");
            final RyaTypeWritable writable = ryaStatementRecordReader.getCurrentValue();
            final RyaType value = writable.getRyaType();
            final Object text = ryaStatementRecordReader.getCurrentKey();
            final RyaType type = new RyaType();
            final String validatedLanguage = LiteralLanguageUtils.validateLanguage(value.getLanguage(), value.getDataType());
            type.setData(value.getData());
            type.setDataType(value.getDataType());
            type.setLanguage(validatedLanguage);
            results.add(type);

            System.out.println(value.getData());
            System.out.println(value.getDataType());
            System.out.println(results);
            System.out.println(type);
            System.out.println(text);
            System.out.println(value);
        }
        System.out.println("after while");

        System.out.println(results.size());
        System.out.println(results);
//        Assert.assertTrue(results.size() == 2);
//        Assert.assertTrue(results.contains(input));
    }

Example 12

Source File: MapReduceIndexerTool.java From examples with Apache License 2.0

4 votes

private String getJobInfo(Job job) {
  return "jobName: " + job.getJobName() + ", jobId: " + job.getJobID();
}

Example 13

Source File: QuasiMonteCarlo.java From sequenceiq-samples with Apache License 2.0

4 votes

/**
   * Run a map/reduce job for estimating Pi.
   *
   * @return the estimated value of Pi
   */
  public static JobID submitPiEstimationMRApp(String jobName, int numMaps, long numPoints,
      Path tmpDir, Configuration conf
      ) throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(conf);
    //setup job conf
    job.setJobName(jobName);
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
    	fs.delete(tmpDir, true);
//      throw new IOException("Tmp directory " + fs.makeQualified(tmpDir)
//          + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
      throw new IOException("Cannot create input directory " + inDir);
    }

  //  try {
      //generate an input file for each map task
      for(int i=0; i < numMaps; ++i) {
        final Path file = new Path(inDir, "part"+i);
        final LongWritable offset = new LongWritable(i * numPoints);
        final LongWritable size = new LongWritable(numPoints);
        final SequenceFile.Writer writer = SequenceFile.createWriter(
            fs, conf, file,
            LongWritable.class, LongWritable.class, CompressionType.NONE);
        try {
          writer.append(offset, size);
        } finally {
          writer.close();
        }
        System.out.println("Wrote input for Map #"+i);
      }
  
      //start a map/reduce job
      System.out.println("Starting Job");
      final long startTime = System.currentTimeMillis();
      job.submit();
//      final double duration = (System.currentTimeMillis() - startTime)/1000.0;
//      System.out.println("Job Finished in " + duration + " seconds");
      return job.getJobID();
    
//    } finally {
//      fs.delete(tmpDir, true);
//    }
  }

Example 14

Source File: HiveCassandraStandardColumnInputFormat.java From Hive-Cassandra with Apache License 2.0

4 votes

@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
  String ks = jobConf.get(AbstractColumnSerDe.CASSANDRA_KEYSPACE_NAME);
  String cf = jobConf.get(AbstractColumnSerDe.CASSANDRA_CF_NAME);
  int slicePredicateSize = jobConf.getInt(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_SIZE,
      AbstractColumnSerDe.DEFAULT_SLICE_PREDICATE_SIZE);
  int sliceRangeSize = jobConf.getInt(
      AbstractColumnSerDe.CASSANDRA_RANGE_BATCH_SIZE,
      AbstractColumnSerDe.DEFAULT_RANGE_BATCH_SIZE);
  int splitSize = jobConf.getInt(
      AbstractColumnSerDe.CASSANDRA_SPLIT_SIZE,
      AbstractColumnSerDe.DEFAULT_SPLIT_SIZE);
  String cassandraColumnMapping = jobConf.get(AbstractColumnSerDe.CASSANDRA_COL_MAPPING);
  int rpcPort = jobConf.getInt(AbstractColumnSerDe.CASSANDRA_PORT, 9160);
  String host = jobConf.get(AbstractColumnSerDe.CASSANDRA_HOST);
  String partitioner = jobConf.get(AbstractColumnSerDe.CASSANDRA_PARTITIONER);

  if (cassandraColumnMapping == null) {
    throw new IOException("cassandra.columns.mapping required for Cassandra Table.");
  }

  SliceRange range = new SliceRange();
  range.setStart(new byte[0]);
  range.setFinish(new byte[0]);
  range.setReversed(false);
  range.setCount(slicePredicateSize);
  SlicePredicate predicate = new SlicePredicate();
  predicate.setSlice_range(range);

  ConfigHelper.setInputRpcPort(jobConf, "" + rpcPort);
  ConfigHelper.setInputInitialAddress(jobConf, host);
  ConfigHelper.setInputPartitioner(jobConf, partitioner);
  ConfigHelper.setInputSlicePredicate(jobConf, predicate);
  ConfigHelper.setInputColumnFamily(jobConf, ks, cf);
  ConfigHelper.setRangeBatchSize(jobConf, sliceRangeSize);
  ConfigHelper.setInputSplitSize(jobConf, splitSize);

  Job job = new Job(jobConf);
  JobContext jobContext = new JobContext(job.getConfiguration(), job.getJobID());

  Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);
  List<org.apache.hadoop.mapreduce.InputSplit> splits = getSplits(jobContext);
  InputSplit[] results = new InputSplit[splits.size()];

  for (int i = 0; i < splits.size(); ++i) {
    HiveCassandraStandardSplit csplit = new HiveCassandraStandardSplit(
        (ColumnFamilySplit) splits.get(i), cassandraColumnMapping, tablePaths[0]);
    csplit.setKeyspace(ks);
    csplit.setColumnFamily(cf);
    csplit.setRangeBatchSize(sliceRangeSize);
    csplit.setSplitSize(splitSize);
    csplit.setHost(host);
    csplit.setPort(rpcPort);
    csplit.setSlicePredicateSize(slicePredicateSize);
    csplit.setPartitioner(partitioner);
    csplit.setColumnMapping(cassandraColumnMapping);
    results[i] = csplit;
  }
  return results;
}

Example 15

Source File: HBaseMapReduceIndexerTool.java From hbase-indexer with Apache License 2.0

4 votes

private static String getJobInfo(Job job) {
    return "jobName: " + job.getJobName() + ", jobId: " + job.getJobID();
}