org.apache.hadoop.mapred.JobConf Java Examples

The following examples show how to use org.apache.hadoop.mapred.JobConf. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SliveMapper.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override // MapReduceBase
public void configure(JobConf conf) {
  try {
    config = new ConfigExtractor(conf);
    ConfigExtractor.dumpOptions(config);
    filesystem = config.getBaseDirectory().getFileSystem(conf);
  } catch (Exception e) {
    LOG.error("Unable to setup slive " + StringUtils.stringifyException(e));
    throw new RuntimeException("Unable to setup slive configuration", e);
  }
  if(conf.get(MRJobConfig.TASK_ATTEMPT_ID) != null ) {
    this.taskId = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID))
      .getTaskID().getId();
  } else {
    // So that branch-1/0.20 can run this same code as well
    this.taskId = TaskAttemptID.forName(conf.get("mapred.task.id"))
        .getTaskID().getId();
  }
}
 
Example #2
Source File: TestCombineFileInputFormat.java    From RDFS with Apache License 2.0 6 votes vote down vote up
@Override
protected LocatedFileStatus[] listLocatedStatus(JobConf job) throws IOException {
  Path[] files = getInputPaths(job);
  LocatedFileStatus[] results = new LocatedFileStatus[files.length];
  for (int i = 0; i < files.length; i++) {
    Path p = files[i];
    FileSystem fs = p.getFileSystem(job);
    FileStatus stat = fs.getFileStatus(p);
    if (stat.isDir()) {
      results[i] = new LocatedFileStatus(stat, null);
    } else {
      results[i] = new LocatedFileStatus(stat,
          fs.getFileBlockLocations(stat, 0, stat.getLen()));
    }
  }
  return results;
}
 
Example #3
Source File: UtilsForTests.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
static RunningJob runJobFail(JobConf conf, Path inDir, Path outDir)
       throws IOException {
  conf.setJobName("test-job-fail");
  conf.setMapperClass(FailMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  
  RunningJob job = UtilsForTests.runJob(conf, inDir, outDir);
  while (!job.isComplete()) {
    try {
      Thread.sleep(100);
    } catch (InterruptedException e) {
      break;
    }
  }

  return job;
}
 
Example #4
Source File: TestCombineFileInputFormat.java    From RDFS with Apache License 2.0 6 votes vote down vote up
private void splitRealFiles(String[] args) throws IOException {
  JobConf conf = new JobConf();
  FileSystem fs = FileSystem.get(conf);
  if (!(fs instanceof DistributedFileSystem)) {
    throw new IOException("Wrong file system: " + fs.getClass().getName());
  }
  int blockSize = conf.getInt("dfs.block.size", 128 * 1024 * 1024);

  DummyInputFormat inFormat = new DummyInputFormat();
  for (int i = 0; i < args.length; i++) {
    inFormat.addInputPaths(conf, args[i]);
  }
  inFormat.setMinSplitSizeRack(blockSize);
  inFormat.setMaxSplitSize(10 * blockSize);

  InputSplit[] splits = inFormat.getSplits(conf, 1);
  System.out.println("Total number of splits " + splits.length);
  for (int i = 0; i < splits.length; ++i) {
    CombineFileSplit fileSplit = (CombineFileSplit) splits[i];
    System.out.println("Split[" + i + "] " + fileSplit);
  }
}
 
Example #5
Source File: HadoopReduceCombineFunction.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Maps two Hadoop Reducer (mapred API) to a combinable Flink GroupReduceFunction.
 *
 * @param hadoopReducer The Hadoop Reducer that is mapped to a GroupReduceFunction.
 * @param hadoopCombiner The Hadoop Reducer that is mapped to the combiner function.
 * @param conf The JobConf that is used to configure both Hadoop Reducers.
 */
public HadoopReduceCombineFunction(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> hadoopReducer,
							Reducer<KEYIN, VALUEIN, KEYIN, VALUEIN> hadoopCombiner, JobConf conf) {
	if (hadoopReducer == null) {
		throw new NullPointerException("Reducer may not be null.");
	}
	if (hadoopCombiner == null) {
		throw new NullPointerException("Combiner may not be null.");
	}
	if (conf == null) {
		throw new NullPointerException("JobConf may not be null.");
	}

	this.reducer = hadoopReducer;
	this.combiner = hadoopCombiner;
	this.jobConf = conf;
}
 
Example #6
Source File: HadoopUtils.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
public static int getTaskId() {
    MapredContext ctx = MapredContextAccessor.get();
    if (ctx == null) {
        throw new IllegalStateException("MapredContext is not set");
    }
    JobConf jobconf = ctx.getJobConf();
    if (jobconf == null) {
        throw new IllegalStateException("JobConf is not set");
    }
    int taskid = jobconf.getInt("mapred.task.partition", -1);
    if (taskid == -1) {
        taskid = jobconf.getInt("mapreduce.task.partition", -1);
        if (taskid == -1) {
            throw new IllegalStateException(
                "Both mapred.task.partition and mapreduce.task.partition are not set: "
                        + toString(jobconf));
        }
    }
    return taskid;
}
 
Example #7
Source File: AvroAsJsonOutputFormat.java    From iow-hadoop-streaming with Apache License 2.0 6 votes vote down vote up
static <K> void configureDataFileWriter(DataFileWriter<K> writer,
    JobConf job) throws UnsupportedEncodingException {

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
                org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ?
            CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
        writer.setCodec(factory);
    }

    writer.setSyncInterval(job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY,
            DEFAULT_SYNC_INTERVAL));

    // copy metadata from job
    for (Map.Entry<String,String> e : job) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                   URLDecoder.decode(e.getValue(), "ISO-8859-1")
                   .getBytes("ISO-8859-1"));
    }
}
 
Example #8
Source File: GridmixJob.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("deprecation")
protected static void configureTaskJVMOptions(Configuration originalJobConf,
                                              Configuration simulatedJobConf){
  // Get the heap related java opts used for the original job and set the 
  // same for the simulated job.
  //    set task task heap options
  configureTaskJVMMaxHeapOptions(originalJobConf, simulatedJobConf, 
                                 JobConf.MAPRED_TASK_JAVA_OPTS);
  //  set map task heap options
  configureTaskJVMMaxHeapOptions(originalJobConf, simulatedJobConf, 
                                 MRJobConfig.MAP_JAVA_OPTS);

  //  set reduce task heap options
  configureTaskJVMMaxHeapOptions(originalJobConf, simulatedJobConf, 
                                 MRJobConfig.REDUCE_JAVA_OPTS);
}
 
Example #9
Source File: TokenUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
private static void getJtToken(Credentials cred) throws IOException {
  try {
    JobConf jobConf = new JobConf();
    JobClient jobClient = new JobClient(jobConf);
    LOG.info("Pre-fetching JT token from JobTracker");

    Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(getMRTokenRenewerInternal(jobConf));
    if (mrdt == null) {
      LOG.error("Failed to fetch JT token");
      throw new IOException("Failed to fetch JT token.");
    }
    LOG.info("Created JT token: " + mrdt.toString());
    LOG.info("Token kind: " + mrdt.getKind());
    LOG.info("Token id: " + Arrays.toString(mrdt.getIdentifier()));
    LOG.info("Token service: " + mrdt.getService());
    cred.addToken(mrdt.getService(), mrdt);
  } catch (InterruptedException ie) {
    throw new IOException(ie);
  }
}
 
Example #10
Source File: PipeReducer.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);

  try {
    reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8");
    reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8");
    this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
Example #11
Source File: TestDatamerge.java    From big-c with Apache License 2.0 6 votes vote down vote up
private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
 
Example #12
Source File: TestDFSIO.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override // Mapper
public void configure(JobConf conf) {
  super.configure(conf);

  // grab compression
  String compression = getConf().get("test.io.compression.class", null);
  Class<? extends CompressionCodec> codec;

  // try to initialize codec
  try {
    codec = (compression == null) ? null : 
      Class.forName(compression).asSubclass(CompressionCodec.class);
  } catch(Exception e) {
    throw new RuntimeException("Compression codec not found: ", e);
  }

  if(codec != null) {
    compressionCodec = (CompressionCodec)
        ReflectionUtils.newInstance(codec, getConf());
  }
}
 
Example #13
Source File: DFSGeneralTest.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void control(JobConf fsConfig, String fileName)
    throws IOException {
  String name = fileName;
  FileSystem fs = FileSystem.get(fsConfig);

  SequenceFile.Writer write = null;
  for (int i = 0; i < nmaps; i++) {
    try {
      Path controlFile = new Path(dfs_input, name + i);
      write = SequenceFile.createWriter(fs, fsConfig, controlFile,
          Text.class, Text.class, CompressionType.NONE);
      write.append(new Text(name + i), new Text(workdir));
    } finally {
      if (write != null)
        write.close();
      write = null;
    }
  }
}
 
Example #14
Source File: ReaderTextCellParallel.java    From systemds with Apache License 2.0 5 votes vote down vote up
public ReadTask( InputSplit split, TextInputFormat informat, JobConf job, MatrixBlock dest, long rlen, long clen, boolean mm, FileFormatPropertiesMM mmProps ) {
	_split = split;
	_sparse = dest.isInSparseFormat();
	_informat = informat;
	_job = job;
	_dest = dest;
	_rlen = rlen;
	_clen = clen;
	_matrixMarket = mm;
	_mmProps = mmProps;
}
 
Example #15
Source File: MapTaskImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
public MapTaskImpl(JobId jobId, int partition, EventHandler eventHandler,
    Path remoteJobConfFile, JobConf conf,
    TaskSplitMetaInfo taskSplitMetaInfo,
    TaskAttemptListener taskAttemptListener,
    Token<JobTokenIdentifier> jobToken,
    Credentials credentials, Clock clock,
    int appAttemptId, MRAppMetrics metrics, AppContext appContext) {
  super(jobId, TaskType.MAP, partition, eventHandler, remoteJobConfFile,
      conf, taskAttemptListener, jobToken, credentials, clock,
      appAttemptId, metrics, appContext);
  this.taskSplitMetaInfo = taskSplitMetaInfo;
}
 
Example #16
Source File: FrameReaderTextCSV.java    From systemds with Apache License 2.0 5 votes vote down vote up
@Override
public final FrameBlock readFrameFromHDFS(String fname, ValueType[] schema, String[] names,
		long rlen, long clen)
	throws IOException, DMLRuntimeException 
{
	//prepare file access
	JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());	
	Path path = new Path( fname );
	FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
	FileInputFormat.addInputPath(job, path);
	
	//check existence and non-empty file
	checkValidInputFile(fs, path); 
	
	//compute size if necessary
	if( rlen <= 0 || clen <= 0 ) {
		Pair<Integer,Integer> size = computeCSVSize(path, job, fs);
		rlen = size.getKey();
		clen = size.getValue();
	}
	
	//allocate output frame block
	ValueType[] lschema = createOutputSchema(schema, clen);
	String[] lnames = createOutputNames(names, clen);
	FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);

	//core read (sequential/parallel) 
	readCSVFrameFromHDFS(path, job, fs, ret, lschema, lnames, rlen, clen);
	
	return ret;
}
 
Example #17
Source File: ParquetFileWriterFactory.java    From presto with Apache License 2.0 5 votes vote down vote up
private static CompressionCodecName getCompression(JobConf configuration)
{
    String compressionName = configuration.get(ParquetOutputFormat.COMPRESSION);
    if (compressionName == null) {
        return CompressionCodecName.GZIP;
    }
    return CompressionCodecName.valueOf(compressionName);
}
 
Example #18
Source File: CombineFileSplit.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public CombineFileSplit(JobConf job, Path[] files, long[] lengths) {
  long[] startoffset = new long[files.length];
  for (int i = 0; i < startoffset.length; i++) {
    startoffset[i] = 0;
  }
  String[] locations = new String[files.length];
  for (int i = 0; i < locations.length; i++) {
    locations[i] = "";
  }
  initSplit(job, files, startoffset, lengths, locations);
}
 
Example #19
Source File: KafkaInputFormat.java    From HiveKa with Apache License 2.0 5 votes vote down vote up
private Set<String> getMoveToLatestTopicsSet(JobConf conf) {
	Set<String> topics = new HashSet<String>();

	String[] arr = getMoveToLatestTopics(conf);

	if (arr != null) {
		for (String topic : arr) {
			topics.add(topic);
		}
	}

	return topics;
}
 
Example #20
Source File: DistCpV1.java    From big-c with Apache License 2.0 5 votes vote down vote up
public void setConf(Configuration conf) {
  if (conf instanceof JobConf) {
    this.conf = (JobConf) conf;
  } else {
    this.conf = new JobConf(conf);
  }
}
 
Example #21
Source File: TestCLI.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void setUp() throws Exception {
  // Read the testConfig.xml file
  readTestConfigFile();
  
  // Start up the mini dfs cluster
  boolean success = false;
  conf = new Configuration();
  conf.setClass(PolicyProvider.POLICY_PROVIDER_CONFIG,
                HadoopPolicyProvider.class, PolicyProvider.class);
  conf.setBoolean(ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, 
                  true);

  dfsCluster = new MiniDFSCluster(conf, 1, true, null);
  namenode = conf.get("fs.default.name", "file:///");
  clitestDataDir = new File(TEST_CACHE_DATA_DIR).
    toURI().toString().replace(' ', '+');
  username = System.getProperty("user.name");

  FileSystem fs = dfsCluster.getFileSystem();
  assertTrue("Not a HDFS: "+fs.getUri(),
             fs instanceof DistributedFileSystem);
  dfs = (DistributedFileSystem) fs;
  
   // Start up mini mr cluster
  JobConf mrConf = new JobConf(conf);
  mrCluster = new MiniMRCluster(1, dfsCluster.getFileSystem().getUri().toString(), 1, 
                         null, null, mrConf);
  jobtracker = mrCluster.createJobConf().get("mapred.job.tracker", "local");

  success = true;

  assertTrue("Error setting up Mini DFS & MR clusters", success);
}
 
Example #22
Source File: DummyInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  InputSplit[] splits = new InputSplit[numSplits];
  for (int i = 0; i < splits.length; ++i) {
    splits[i] = new EmptySplit();
  }
  return splits;
}
 
Example #23
Source File: FileOutputCommitterWrapper.java    From stratosphere with Apache License 2.0 5 votes vote down vote up
public void setupJob(JobConf conf) throws IOException {
	Path outputPath = FileOutputFormat.getOutputPath(conf);
	if (outputPath != null) {
		Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
		FileSystem fileSys = tmpDir.getFileSystem(conf);
		if (!fileSys.mkdirs(tmpDir)) {
			LOG.error("Mkdirs failed to create " + tmpDir.toString());
		}
	}
}
 
Example #24
Source File: GrokHelper.java    From hadoop-solr with Apache License 2.0 5 votes vote down vote up
public static String readConfiguration(String path, JobConf conf) {
  String response = "";
  Path p = new Path(path);
  try {
    FileSystem fs = p.getFileSystem(conf);
    FSDataInputStream inputStream = fs.open(p);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    IOUtils.copyBytes(inputStream, out, conf);
    response = out.toString();
    fs.close();
  } catch (IOException e) {
    log.error("Unable to read " + path + " from HDFS", e);
  }
  return response;
}
 
Example #25
Source File: HiveExcelRowFileOutputFormat.java    From hadoopoffice with Apache License 2.0 5 votes vote down vote up
@Override
public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc,
		Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties,
		Progressable progress) throws IOException {
	FileSystem fs = finalOutPath.getFileSystem(jc);
	HiveExcelRowFileOutputFormat.setOutputPath(jc, finalOutPath);
    RecordWriter<?, ?> recordWriter = this.getRecordWriter(fs, jc, null, progress);
    return new HivePassThroughRecordWriter(recordWriter);
}
 
Example #26
Source File: TestMRAppWithCombiner.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testCombinerShouldUpdateTheReporter() throws Exception {
  JobConf conf = new JobConf(mrCluster.getConfig());
  int numMaps = 5;
  int numReds = 2;
  Path in = new Path(mrCluster.getTestWorkDir().getAbsolutePath(),
      "testCombinerShouldUpdateTheReporter-in");
  Path out = new Path(mrCluster.getTestWorkDir().getAbsolutePath(),
      "testCombinerShouldUpdateTheReporter-out");
  createInputOutPutFolder(in, out, numMaps);
  conf.setJobName("test-job-with-combiner");
  conf.setMapperClass(IdentityMapper.class);
  conf.setCombinerClass(MyCombinerToCheckReporter.class);
  //conf.setJarByClass(MyCombinerToCheckReporter.class);
  conf.setReducerClass(IdentityReducer.class);
  DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf);
  conf.setOutputCommitter(CustomOutputCommitter.class);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputKeyClass(LongWritable.class);
  conf.setOutputValueClass(Text.class);

  FileInputFormat.setInputPaths(conf, in);
  FileOutputFormat.setOutputPath(conf, out);
  conf.setNumMapTasks(numMaps);
  conf.setNumReduceTasks(numReds);
  
  runJob(conf);
}
 
Example #27
Source File: TestMiniCoronaRunJob.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private void runSleepJob(JobConf conf, int maps, int reduces)
    throws Exception {
  String[] args = {"-m", maps + "",
                   "-r", reduces + "",
                   "-mt", "1",
                   "-rt", "1" };
  ToolRunner.run(conf, new SleepJob(), args);
  // This sleep is here to wait for the JobTracker to go down completely
  TstUtils.reliableSleep(1000);
}
 
Example #28
Source File: WriterMatrixMarketParallel.java    From systemds with Apache License 2.0 5 votes vote down vote up
public WriteMMTask(Path path, JobConf job, FileSystem fs, MatrixBlock src, int rl, int ru) {
	_path = path;
	_job = job;
	_fs = fs;
	_src = src;
	_rl = rl;
	_ru = ru;
}
 
Example #29
Source File: HiveTableSink.java    From flink with Apache License 2.0 5 votes vote down vote up
public HiveTableSink(JobConf jobConf, ObjectPath tablePath, CatalogTable table) {
	this.jobConf = jobConf;
	this.tablePath = tablePath;
	this.catalogTable = table;
	hiveVersion = Preconditions.checkNotNull(jobConf.get(HiveCatalogValidator.CATALOG_HIVE_VERSION),
			"Hive version is not defined");
	tableSchema = table.getSchema();
}
 
Example #30
Source File: ReaderTextLIBSVMParallel.java    From systemds with Apache License 2.0 5 votes vote down vote up
private void readLIBSVMMatrixFromHDFS(InputSplit[] splits, Path path, JobConf job, 
		MatrixBlock dest, long rlen, long clen, int blen) 
	throws IOException 
{
	FileInputFormat.addInputPath(job, path);
	TextInputFormat informat = new TextInputFormat();
	informat.configure(job);

	ExecutorService pool = CommonThreadPool.get(_numThreads);

	try 
	{
		// create read tasks for all splits
		ArrayList<LIBSVMReadTask> tasks = new ArrayList<>();
		int splitCount = 0;
		for (InputSplit split : splits) {
			tasks.add( new LIBSVMReadTask(split, _offsets, informat, job, dest, rlen, clen, splitCount++) );
		}
		pool.invokeAll(tasks);
		pool.shutdown();

		// check return codes and aggregate nnz
		long lnnz = 0;
		for (LIBSVMReadTask rt : tasks) {
			lnnz += rt.getPartialNnz();
			if (!rt.getReturnCode()) {
				Exception err = rt.getException();
				throw new IOException("Read task for libsvm input failed: "+ err.toString(), err);
			}
		}
		dest.setNonZeros(lnnz);
	} 
	catch (Exception e) {
		throw new IOException("Threadpool issue, while parallel read.", e);
	}
}