org.apache.hadoop.mapreduce.Job Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.Job. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IcebergStorage.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
public List<String> getPredicateFields(String location, Job job) throws IOException {
  LOG.info("[{}]: getPredicateFields() -> {}", signature, location);
  Schema schema = load(location, job).schema();

  List<String> result = Lists.newArrayList();

  for (Types.NestedField nf : schema.columns()) {
    switch (nf.type().typeId()) {
      case MAP:
      case LIST:
      case STRUCT:
        continue;
      default:
        result.add(nf.name());
    }
  }

  return result;
}
 
Example #2
Source File: CreateHTableJob.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void exportHBaseConfiguration(String hbaseTableName) throws IOException {

        Configuration hbaseConf = HBaseConnection.getCurrentHBaseConfiguration();
        HadoopUtil.healSickConfig(hbaseConf);
        Job job = Job.getInstance(hbaseConf, hbaseTableName);
        HTable table = new HTable(hbaseConf, hbaseTableName);
        HFileOutputFormat3.configureIncrementalLoadMap(job, table);

        logger.info("Saving HBase configuration to {}", hbaseConfPath);
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        FSDataOutputStream out = null;
        try {
            out = fs.create(new Path(hbaseConfPath));
            job.getConfiguration().writeXml(out);
        } finally {
            IOUtils.closeQuietly(out);
        }
    }
 
Example #3
Source File: POMergeJoin.java    From spork with Apache License 2.0 6 votes vote down vote up
private void seekInRightStream(Object firstLeftKey) throws IOException{
    rightLoader = (LoadFunc)PigContext.instantiateFuncFromSpec(rightLoaderFuncSpec);

    // check if hadoop distributed cache is used
    if (indexFile != null && rightLoader instanceof DefaultIndexableLoader) {
        DefaultIndexableLoader loader = (DefaultIndexableLoader)rightLoader;
        loader.setIndexFile(indexFile);
    }
    
    // Pass signature of the loader to rightLoader
    // make a copy of the conf to use in calls to rightLoader.
    rightLoader.setUDFContextSignature(signature);
    Job job = new Job(new Configuration(PigMapReduce.sJobConfInternal.get()));
    rightLoader.setLocation(rightInputFileName, job);
    ((IndexableLoadFunc)rightLoader).initialize(job.getConfiguration());
    ((IndexableLoadFunc)rightLoader).seekNear(
            firstLeftKey instanceof Tuple ? (Tuple)firstLeftKey : mTupleFactory.newTuple(firstLeftKey));
}
 
Example #4
Source File: PostgreSQLCopyExportJob.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
protected void propagateOptionsToJob(Job job) {
  super.propagateOptionsToJob(job);
  SqoopOptions opts = context.getOptions();
  Configuration conf = job.getConfiguration();
  if (opts.getNullStringValue() != null) {
    conf.set("postgresql.null.string", opts.getNullStringValue());
  }
  setDelimiter("postgresql.input.field.delim",
               opts.getInputFieldDelim(), conf);
  setDelimiter("postgresql.input.record.delim",
               opts.getInputRecordDelim(), conf);
  setDelimiter("postgresql.input.enclosedby",
               opts.getInputEnclosedBy(), conf);
  setDelimiter("postgresql.input.escapedby",
               opts.getInputEscapedBy(), conf);
  conf.setBoolean("postgresql.input.encloserequired",
                  opts.isInputEncloseRequired());
}
 
Example #5
Source File: AvgTemperature.java    From BigData-In-Practice with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    if (args.length != 2) {
        System.err.println("Usage: AvgTemperature <input path> <output path>");
        System.exit(-1);
    }
    Job job = Job.getInstance();
    job.setJarByClass(AvgTemperature.class);
    job.setJobName("MapReduce实验-气象数据集-求气温平均值");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(TemperatureMapper.class);
    job.setReducerClass(AvgTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example #6
Source File: TestChainErrors.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Tests Reducer throwing exception.
 * 
 * @throws Exception
 */
public void testReducerFail() throws Exception {

  Configuration conf = createJobConf();

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1, input);
  job.setJobName("chain");

  ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class,
      LongWritable.class, Text.class, null);

  ChainReducer.setReducer(job, FailReduce.class, LongWritable.class,
      Text.class, LongWritable.class, Text.class, null);

  ChainReducer.addMapper(job, Mapper.class, LongWritable.class, Text.class,
      LongWritable.class, Text.class, null);

  job.waitForCompletion(true);
  assertTrue("Job Not failed", !job.isSuccessful());
}
 
Example #7
Source File: MapReduceJobConfiguration.java    From datawave with Apache License 2.0 6 votes vote down vote up
protected void writeProperties(String jobId, Job job, FileSystem fs, Path classpath, Properties properties) {
    
    File f = null;
    try {
        f = File.createTempFile(jobId, ".properties");
        try (FileOutputStream fos = new FileOutputStream(f)) {
            properties.store(fos, "");
        }
        addSingleFile(f, new Path(classpath, "embedded-configuration.properties"), jobId, job, fs);
    } catch (IOException e) {
        log.error(e.getMessage(), e);
    } finally {
        if (f != null) {
            f.delete();
        }
    }
}
 
Example #8
Source File: CredentialsTestJob.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public Job createJob() 
throws IOException {
  Configuration conf = getConf();
  conf.setInt(MRJobConfig.NUM_MAPS, 1);
  Job job = Job.getInstance(conf, "test");
  job.setNumReduceTasks(1);
  job.setJarByClass(CredentialsTestJob.class);
  job.setNumReduceTasks(1);
  job.setMapperClass(CredentialsTestJob.CredentialsTestMapper.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(CredentialsTestJob.CredentialsTestReducer.class);
  job.setInputFormatClass(SleepJob.SleepInputFormat.class);
  job.setPartitionerClass(SleepJob.SleepJobPartitioner.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("test job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  return job;
}
 
Example #9
Source File: IntegrationTestLoadAndVerify.java    From hbase with Apache License 2.0 6 votes vote down vote up
protected void doVerify(Configuration conf, TableDescriptor tableDescriptor) throws Exception {
  Path outputDir = getTestDir(TEST_NAME, "verify-output");
  LOG.info("Verify output dir: " + outputDir);

  Job job = Job.getInstance(conf);
  job.setJarByClass(this.getClass());
  job.setJobName(TEST_NAME + " Verification for " + tableDescriptor.getTableName());
  setJobScannerConf(job);

  Scan scan = new Scan();

  TableMapReduceUtil.initTableMapperJob(
      tableDescriptor.getTableName().getNameAsString(), scan, VerifyMapper.class,
      BytesWritable.class, BytesWritable.class, job);
  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
  int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
  TableMapReduceUtil.setScannerCaching(job, scannerCaching);

  job.setReducerClass(VerifyReducer.class);
  job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
  FileOutputFormat.setOutputPath(job, outputDir);
  assertTrue(job.waitForCompletion(true));

  long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
  assertEquals(0, numOutputRecords);
}
 
Example #10
Source File: CreateHTableJob.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void exportHBaseConfiguration(String hbaseTableName) throws IOException {

        Configuration hbaseConf = HBaseConnection.getCurrentHBaseConfiguration();
        HadoopUtil.healSickConfig(hbaseConf);
        Job job = Job.getInstance(hbaseConf, hbaseTableName);
        HTable table = new HTable(hbaseConf, hbaseTableName);
        HFileOutputFormat3.configureIncrementalLoadMap(job, table);

        logger.info("Saving HBase configuration to {}", hbaseConfPath);
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        FSDataOutputStream out = null;
        try {
            out = fs.create(new Path(hbaseConfPath));
            job.getConfiguration().writeXml(out);
        } finally {
            IOUtils.closeQuietly(out);
        }
    }
 
Example #11
Source File: FluoRowInputFormat.java    From fluo with Apache License 2.0 6 votes vote down vote up
/**
 * Configure properties needed to connect to a Fluo application
 *
 * @param conf Job configuration
 * @param config use {@link org.apache.fluo.api.config.FluoConfiguration} to configure
 *        programmatically
 */
public static void configure(Job conf, SimpleConfiguration config) {
  try {
    FluoConfiguration fconfig = new FluoConfiguration(config);
    try (Environment env = new Environment(fconfig)) {
      long ts =
          env.getSharedResources().getTimestampTracker().allocateTimestamp().getTxTimestamp();
      conf.getConfiguration().setLong(TIMESTAMP_CONF_KEY, ts);

      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      config.save(baos);
      conf.getConfiguration().set(PROPS_CONF_KEY,
          new String(baos.toByteArray(), StandardCharsets.UTF_8));

      AccumuloInputFormat.setZooKeeperInstance(conf, fconfig.getAccumuloInstance(),
          fconfig.getAccumuloZookeepers());
      AccumuloInputFormat.setConnectorInfo(conf, fconfig.getAccumuloUser(),
          new PasswordToken(fconfig.getAccumuloPassword()));
      AccumuloInputFormat.setInputTableName(conf, env.getTable());
      AccumuloInputFormat.setScanAuthorizations(conf, env.getAuthorizations());
    }
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}
 
Example #12
Source File: MatMulDriver.java    From MLHadoop with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
	Configuration conf = new Configuration();
	// A is an m-by-n matrix; B is an n-by-p matrix.
	conf.set("m", args[0]);
	conf.set("n", args[1]);
	conf.set("p", args[2]);
	Job job = new Job(conf, "Matrix_Multiplication");
	job.setJarByClass(MatMulDriver.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);
	job.setMapperClass(MatMulMap.class);
	//Don't use combiner if there is no scope of combining the output. Otherwise the job will get stuck.
	//job.setCombinerClass(MatMulModGenReduce.class);
	job.setReducerClass(MatMulReduce.class);
	//args[3] is the input path.
	FileInputFormat.addInputPath(job, new Path(args[3]));
	//args[4] is the output path.
	FileOutputFormat.setOutputPath(job, new Path(args[4]));
	System.exit(job.waitForCompletion(true)?0:1);
}
 
Example #13
Source File: DBOutputFormat.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes the reduce-part of the job with 
 * the appropriate output settings
 * 
 * @param job The job
 * @param tableName The table to insert data into
 * @param fieldNames The field names in the table.
 */
public static void setOutput(Job job, String tableName, 
    String... fieldNames) throws IOException {
  if(fieldNames.length > 0 && fieldNames[0] != null) {
    DBConfiguration dbConf = setOutput(job, tableName);
    dbConf.setOutputFieldNames(fieldNames);
  } else {
    if (fieldNames.length > 0) {
      setOutput(job, tableName, fieldNames.length);
    }
    else { 
      throw new IllegalArgumentException(
        "Field names must be greater than 0");
    }
  }
}
 
Example #14
Source File: PiEstimator.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Parse arguments and then runs a map/reduce job.
 * Print output in standard out.
 * 
 * @return a non-zero if there is an error.  Otherwise, return 0.  
 */
public int run(String[] args) throws Exception {
  if (args.length != 2) {
    System.err.println("Usage: "+getClass().getName()+" <nMaps> <nSamples>");
    ToolRunner.printGenericCommandUsage(System.err);
    return -1;
  }
  
  final int nMaps = Integer.parseInt(args[0]);
  final long nSamples = Long.parseLong(args[1]);
      
  System.out.println("Number of Maps  = " + nMaps);
  System.out.println("Samples per Map = " + nSamples);
      
  Configuration conf = new Configuration();
  final Job job = new Job(conf, "PiEstimatior");
  System.out.println("Estimated value of Pi is "
      + estimate(nMaps, nSamples, job));
  return 0;
}
 
Example #15
Source File: MergeJob.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
private void configueAvroMergeJob(Configuration conf, Job job, Path oldPath, Path newPath)
    throws IOException {
  LOG.info("Trying to merge avro files");
  final Schema oldPathSchema = AvroUtil.getAvroSchema(oldPath, conf);
  final Schema newPathSchema = AvroUtil.getAvroSchema(newPath, conf);
  if (oldPathSchema == null || newPathSchema == null || !oldPathSchema.equals(newPathSchema)) {
    throw new IOException("Invalid schema for input directories. Schema for old data: ["
        + oldPathSchema + "]. Schema for new data: [" + newPathSchema + "]");
  }
  LOG.debug("Avro Schema:" + oldPathSchema);
  job.setInputFormatClass(AvroInputFormat.class);
  job.setOutputFormatClass(AvroOutputFormat.class);
  job.setMapperClass(MergeAvroMapper.class);
  job.setReducerClass(MergeAvroReducer.class);
  AvroJob.setOutputSchema(job.getConfiguration(), oldPathSchema);
}
 
Example #16
Source File: TransformBaseRunner.java    From BigDataPlatform with GNU General Public License v3.0 6 votes vote down vote up
protected Job initJob(Configuration conf) throws IOException {
    Job job = Job.getInstance(conf, this.jobName);

    job.setJarByClass(this.runnerClass);
    // 本地运行
//    TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass, job, false);
    TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass, job, true);
    // 集群运行:本地提交和打包(jar)提交
    // TableMapReduceUtil.initTableMapperJob(initScans(job),
    // this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass,
    // job);
    job.setReducerClass(this.reducerClass);
    job.setOutputKeyClass(this.outputKeyClass);
    job.setOutputValueClass(this.outputValueClass);
    job.setOutputFormatClass(this.outputFormatClass);
    return job;
  }
 
Example #17
Source File: IndexWritingTool.java    From rya with Apache License 2.0 6 votes vote down vote up
private static void setAccumuloOutput(final String instStr, final String zooStr, final String userStr, final String passStr, final Job job, final String tableName)
        throws AccumuloSecurityException {

    final AuthenticationToken token = new PasswordToken(passStr);
    AccumuloOutputFormat.setConnectorInfo(job, userStr, token);
    AccumuloOutputFormat.setDefaultTableName(job, tableName);
    AccumuloOutputFormat.setCreateTables(job, true);
    //TODO best way to do this?

    if (zooStr.equals("mock")) {
        AccumuloOutputFormat.setMockInstance(job, instStr);
    } else {
        AccumuloOutputFormat.setZooKeeperInstance(job, instStr, zooStr);
    }

    job.setOutputFormatClass(AccumuloOutputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Mutation.class);
}
 
Example #18
Source File: AbstractHadoopJob.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void appendTmpDir(Job job, FileSystem fs, Path tmpDir, StringBuilder jarList, StringBuilder fileList) {
    try {
        FileStatus[] fList = fs.listStatus(tmpDir);

        for (FileStatus file : fList) {
            Path p = file.getPath();
            if (fs.getFileStatus(p).isDirectory()) {
                appendTmpDir(job, fs, p, jarList, fileList);
                continue;
            }

            StringBuilder list = (p.getName().endsWith(".jar")) ? jarList : fileList;
            if (list.length() > 0)
                list.append(",");
            list.append(fs.getFileStatus(p).getPath().toString());
        }

    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
 
Example #19
Source File: ValueAggregatorJob.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * create and run an Aggregate based map/reduce job.
 * 
 * @param args the arguments used for job creation
 * @throws IOException
 */
public static void main(String args[]) 
    throws IOException, InterruptedException, ClassNotFoundException {
  Job job = ValueAggregatorJob.createValueAggregatorJob(
              new Configuration(), args);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  System.exit(ret);
}
 
Example #20
Source File: MRCompactorAvroKeyDedupJobRunner.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Obtain the schema used for compaction. If compaction.dedup.key=all, it returns topicSchema.
 * If compaction.dedup.key=key, it returns a schema composed of all fields in topicSchema
 * whose doc matches "(?i).*primarykey". If there's no such field, option "all" will be used.
 * If compaction.dedup.key=custom, it reads the schema from compaction.avro.key.schema.loc.
 * If the read fails, or if the custom key schema is incompatible with topicSchema, option "key" will be used.
 */
@VisibleForTesting
Schema getKeySchema(Job job, Schema topicSchema) throws IOException {
  Schema keySchema = null;
  DedupKeyOption dedupKeyOption = getDedupKeyOption();
  if (dedupKeyOption == DedupKeyOption.ALL) {
    LOG.info("Using all attributes in the schema (except Map, Arrar and Enum fields) for compaction");
    keySchema = AvroUtils.removeUncomparableFields(topicSchema).get();
  } else if (dedupKeyOption == DedupKeyOption.KEY) {
    LOG.info("Using key attributes in the schema for compaction");
    keySchema = AvroUtils.removeUncomparableFields(getKeySchema(topicSchema)).get();
  } else if (keySchemaFileSpecified()) {
    Path keySchemaFile = getKeySchemaFile();
    LOG.info("Using attributes specified in schema file " + keySchemaFile + " for compaction");
    try {
      keySchema = AvroUtils.parseSchemaFromFile(keySchemaFile, this.fs);
    } catch (IOException e) {
      LOG.error("Failed to parse avro schema from " + keySchemaFile
          + ", using key attributes in the schema for compaction");
      keySchema = AvroUtils.removeUncomparableFields(getKeySchema(topicSchema)).get();
    }
    if (!isKeySchemaValid(keySchema, topicSchema)) {
      LOG.warn(String.format("Key schema %s is not compatible with record schema %s.", keySchema, topicSchema)
          + "Using key attributes in the schema for compaction");
      keySchema = AvroUtils.removeUncomparableFields(getKeySchema(topicSchema)).get();
    }
  } else {
    LOG.info("Property " + COMPACTION_JOB_AVRO_KEY_SCHEMA_LOC
        + " not provided. Using key attributes in the schema for compaction");
    keySchema = AvroUtils.removeUncomparableFields(getKeySchema(topicSchema)).get();
  }

  return keySchema;
}
 
Example #21
Source File: TestLineRecordReaderJobs.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Creates and runs an MR job
 *
 * @param conf
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void createAndRunJob(Configuration conf) throws IOException,
    InterruptedException, ClassNotFoundException {
  Job job = Job.getInstance(conf);
  job.setJarByClass(TestLineRecordReaderJobs.class);
  job.setMapperClass(Mapper.class);
  job.setReducerClass(Reducer.class);
  FileInputFormat.addInputPath(job, inputDir);
  FileOutputFormat.setOutputPath(job, outputDir);
  job.waitForCompletion(true);
}
 
Example #22
Source File: EntityInputFormatIT.java    From accumulo-recipes with Apache License 2.0 5 votes vote down vote up
@Test
public void testQuery() throws Exception {

    Connector connector = accumuloMiniClusterDriver.getConnector();
    AccumuloEntityStore store = new AccumuloEntityStore(connector);
    entity = EntityBuilder.create("type", "id").attr(new Attribute("key1", "val1")).attr(new Attribute("key2", false)).build();
    store.save(singleton(entity));
    store.flush();

    Job job = Job.getInstance();
    job.setJarByClass(getClass());
    job.setMapperClass(TestMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(EntityInputFormat.class);
    EntityInputFormat.setZooKeeperInstance(job,accumuloMiniClusterDriver.getClientConfiguration());
    EntityInputFormat.setInputInfo(job, "root", accumuloMiniClusterDriver.getRootPassword().getBytes(), new Authorizations());
    EntityInputFormat.setQueryInfo(job, Collections.singleton("type"),
            QueryBuilder.create().eq("key1", "val1").build(), DEFAULT_SHARD_BUILDER, LEXI_TYPES);
    job.setOutputFormatClass(NullOutputFormat.class);

    job.submit();
    job.waitForCompletion(true);

    assertEquals(1, TestMapper.entities.size());
    assertEquals(TestMapper.entities.get(0).getId(), entity.getId());
    assertEquals(TestMapper.entities.get(0).getType(), entity.getType());
    assertEquals(new HashSet<Attribute>(TestMapper.entities.get(0).getAttributes()), new HashSet<Attribute>(entity.getAttributes()));

}
 
Example #23
Source File: SyncTable.java    From hbase with Apache License 2.0 5 votes vote down vote up
private void initCredentialsForHBase(String zookeeper, Job job) throws IOException {
  Configuration peerConf = HBaseConfiguration.createClusterConf(job
          .getConfiguration(), zookeeper);
  if("kerberos".equalsIgnoreCase(peerConf.get("hbase.security.authentication"))){
    TableMapReduceUtil.initCredentialsForCluster(job, peerConf);
  }
}
 
Example #24
Source File: WALPlayer.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  if (args.length < 2) {
    usage("Wrong number of arguments: " + args.length);
    System.exit(-1);
  }
  Job job = createSubmittableJob(args);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
Example #25
Source File: RowCounter.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Sets up the actual job.
 *
 * @param conf  The current configuration.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public Job createSubmittableJob(Configuration conf) throws IOException {
  Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
  job.setJarByClass(RowCounter.class);
  Scan scan = new Scan();
  scan.setCacheBlocks(false);
  setScanFilter(scan, rowRangeList);

  for (String columnName : this.columns) {
    String family = StringUtils.substringBefore(columnName, ":");
    String qualifier = StringUtils.substringAfter(columnName, ":");
    if (StringUtils.isBlank(qualifier)) {
      scan.addFamily(Bytes.toBytes(family));
    } else {
      scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
    }
  }

  if(this.expectedCount >= 0) {
    conf.setLong(EXPECTED_COUNT_KEY, this.expectedCount);
  }

  scan.setTimeRange(startTime, endTime);
  job.setOutputFormatClass(NullOutputFormat.class);
  TableMapReduceUtil.initTableMapperJob(tableName, scan,
    RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
  job.setNumReduceTasks(0);
  return job;
}
 
Example #26
Source File: TestBinaryTokenFile.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public Job createJob(int numMapper, int numReducer, 
    long mapSleepTime, int mapSleepCount, 
    long reduceSleepTime, int reduceSleepCount) 
throws IOException {
  Job job =  super.createJob(numMapper, numReducer,
       mapSleepTime, mapSleepCount, 
      reduceSleepTime, reduceSleepCount);
  
  job.setMapperClass(MySleepMapper.class);
  //Populate tokens here because security is disabled.
  setupBinaryTokenFile(job);
  return job;
}
 
Example #27
Source File: AllLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void initialize(InputSplit inputSplit,
        TaskAttemptContext taskAttemptContext) throws IOException,
        InterruptedException {

    FileSplit fileSplit = (FileSplit) inputSplit;

    path = fileSplit.getPath();
    String fileName = path.toUri().toString();

    // select the correct load function and initialise
    loadFuncHelper = new LoadFuncHelper(
            taskAttemptContext.getConfiguration());

    FuncSpec funcSpec = loadFuncHelper.determineFunction(fileName);

    if (funcSpec == null) {
        throw new IOException("Cannot determine LoadFunc for "
                + fileName);
    }

    selectedLoadFunc = (LoadFunc) PigContext
            .instantiateFuncFromSpec(funcSpec);

    selectedLoadFunc.setUDFContextSignature(udfSignature);
    selectedLoadFunc.setLocation(fileName,
            new Job(taskAttemptContext.getConfiguration(),
                    taskAttemptContext.getJobName()));

    selectedReader = selectedLoadFunc.getInputFormat()
            .createRecordReader(fileSplit, taskAttemptContext);

    selectedReader.initialize(fileSplit, taskAttemptContext);

    LOG.info("Using LoadFunc " + selectedLoadFunc.getClass().getName()
            + " on " + fileName);

}
 
Example #28
Source File: TestReporter.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testStatusLimit() throws IOException, InterruptedException,
    ClassNotFoundException {
  Path test = new Path(testRootTempDir, "testStatusLimit");

  Configuration conf = new Configuration();
  Path inDir = new Path(test, "in");
  Path outDir = new Path(test, "out");
  FileSystem fs = FileSystem.get(conf);
  if (fs.exists(inDir)) {
    fs.delete(inDir, true);
  }
  fs.mkdirs(inDir);
  DataOutputStream file = fs.create(new Path(inDir, "part-" + 0));
  file.writeBytes("testStatusLimit");
  file.close();

  if (fs.exists(outDir)) {
    fs.delete(outDir, true);
  }

  Job job = Job.getInstance(conf, "testStatusLimit");

  job.setMapperClass(StatusLimitMapper.class);
  job.setNumReduceTasks(0);

  FileInputFormat.addInputPath(job, inDir);
  FileOutputFormat.setOutputPath(job, outDir);

  job.waitForCompletion(true);

  assertTrue("Job failed", job.isSuccessful());
}
 
Example #29
Source File: TestMapReduceJobControl.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void testJobControlWithKillJob() throws Exception {
  LOG.info("Starting testJobControlWithKillJob");

  Configuration conf = createJobConf();
  cleanupData(conf);
  Job job1 = MapReduceTestUtil.createKillJob(conf, outdir_1, indir);
  JobControl theControl = createDependencies(conf, job1);

  while (cjob1.getJobState() != ControlledJob.State.RUNNING) {
    try {
      Thread.sleep(100);
    } catch (InterruptedException e) {
      break;
    }
  }
  // verify adding dependingJo to RUNNING job fails.
  assertFalse(cjob1.addDependingJob(cjob2));

  // suspend jobcontrol and resume it again
  theControl.suspend();
  assertTrue(
    theControl.getThreadState() == JobControl.ThreadState.SUSPENDED);
  theControl.resume();
  
  // kill the first job.
  cjob1.killJob();

  // wait till all the jobs complete
  waitTillAllFinished(theControl);
  
  assertTrue(cjob1.getJobState() == ControlledJob.State.FAILED);
  assertTrue(cjob2.getJobState() == ControlledJob.State.SUCCESS);
  assertTrue(cjob3.getJobState() == ControlledJob.State.DEPENDENT_FAILED);
  assertTrue(cjob4.getJobState() == ControlledJob.State.DEPENDENT_FAILED);

  theControl.stop();
}
 
Example #30
Source File: DirectoryIngestMapperTest.java    From hadoop-solr with Apache License 2.0 5 votes vote down vote up
private void doTest(int expectedNumDocs) throws Exception {
    new DirectoryIngestMapper().getFixture().init(jobConf);
    Job job = Job.getInstance(jobConf);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LWDocumentWritable.class);

    List<String> results = runJobSuccessfully(job, expectedNumDocs);

    assertNumDocsProcessed(job, expectedNumDocs);
    for (String docStr : results) {
        assertNotNull(docStr);
    }
}