Java Code Examples for org.apache.hadoop.mapreduce.lib.input.FileInputFormat

The following are top voted examples for showing how to use org.apache.hadoop.mapreduce.lib.input.FileInputFormat. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: mumu-parquet   File: MapReduceParquetMapReducer.java   Source Code and License 8 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n",
                getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    Job job = new Job(getConf(), "Text to Parquet");
    job.setJarByClass(getClass());
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClass(TextToParquetMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(AvroParquetOutputFormat.class);
    AvroParquetOutputFormat.setSchema(job, SCHEMA);
    job.setOutputKeyClass(Void.class);
    job.setOutputValueClass(Group.class);
    return job.waitForCompletion(true) ? 0 : 1;
}
 
Example 2
Project: Wikipedia-Index   File: TF_IDF.java   Source Code and License 7 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	
	Job job =Job.getInstance(conf);
	job.setJobName("TF-IDFCount");
	job.setJarByClass(TF_IDF.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(TextArrayWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);
	
	job.setMapperClass(TF_IDFMap.class);
	job.setReducerClass(TF_IDFReduce.class);
	
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileInputFormat.addInputPath(job, new Path(args[1]));
	FileOutputFormat.setOutputPath(job, new Path(args[2]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
Example 3
Project: Hadoop-Codes   File: MaxTempDriver.java   Source Code and License 7 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf, "maxtemp");
	
	job.setMapperClass(MaxTempMapper.class);
	job.setReducerClass(MaxTempReducer.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(FloatWritable.class);

	FileInputFormat.setInputPaths(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	if (!job.waitForCompletion(true))
		return;
}
 
Example 4
Project: aliyun-maxcompute-data-collectors   File: TestImportJob.java   Source Code and License 7 votes vote down vote up
@Override
public void configureInputFormat(Job job, String tableName,
    String tableClassName, String splitByCol)
    throws ClassNotFoundException, IOException {

  // Write a line of text into a file so that we can get
  // a record to the map task.
  Path dir = new Path(this.options.getTempDir());
  Path p = new Path(dir, "sqoop-dummy-import-job-file.txt");
  FileSystem fs = FileSystem.getLocal(this.options.getConf());
  if (fs.exists(p)) {
    boolean result = fs.delete(p, false);
    assertTrue("Couldn't delete temp file!", result);
  }

  BufferedWriter w = new BufferedWriter(
      new OutputStreamWriter(fs.create(p)));
  w.append("This is a line!");
  w.close();

  FileInputFormat.addInputPath(job, p);

  // And set the InputFormat itself.
  super.configureInputFormat(job, tableName, tableClassName, splitByCol);
}
 
Example 5
Project: hadoop   File: MapReduceTestUtil.java   Source Code and License 7 votes vote down vote up
/**
 * Creates a simple copy job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a data copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createCopyJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {
  conf.setInt(MRJobConfig.NUM_MAPS, 3);
  Job theJob = Job.getInstance(conf);
  theJob.setJobName("DataMoveJob");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(DataCopyMapper.class);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  theJob.setReducerClass(DataCopyReducer.class);
  theJob.setNumReduceTasks(1);
  return theJob;
}
 
Example 6
Project: hadoop   File: MapReduceTestUtil.java   Source Code and License 7 votes vote down vote up
/**
 * Creates a simple fail job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a simple fail job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createFailJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {
  FileSystem fs = outdir.getFileSystem(conf);
  if (fs.exists(outdir)) {
    fs.delete(outdir, true);
  }
  conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
  Job theJob = Job.getInstance(conf);
  theJob.setJobName("Fail-Job");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(FailMapper.class);
  theJob.setReducerClass(Reducer.class);
  theJob.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  return theJob;
}
 
Example 7
Project: hadoop   File: MapReduceTestUtil.java   Source Code and License 7 votes vote down vote up
public static Job createJob(Configuration conf, Path inDir, Path outDir, 
    int numInputFiles, int numReds, String input) throws IOException {
  Job job = Job.getInstance(conf);
  FileSystem fs = FileSystem.get(conf);
  if (fs.exists(outDir)) {
    fs.delete(outDir, true);
  }
  if (fs.exists(inDir)) {
    fs.delete(inDir, true);
  }
  fs.mkdirs(inDir);
  for (int i = 0; i < numInputFiles; ++i) {
    DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
    file.writeBytes(input);
    file.close();
  }    

  FileInputFormat.setInputPaths(job, inDir);
  FileOutputFormat.setOutputPath(job, outDir);
  job.setNumReduceTasks(numReds);
  return job;
}
 
Example 8
Project: hadoop   File: TestLocalRunner.java   Source Code and License 7 votes vote down vote up
/**
 * Run a test with a misconfigured number of mappers.
 * Expect failure.
 */
@Test
public void testInvalidMultiMapParallelism() throws Exception {
  Job job = Job.getInstance();

  Path inputPath = createMultiMapsInput();
  Path outputPath = getOutputPath();

  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.getLocal(conf);

  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }

  job.setMapperClass(StressMapper.class);
  job.setReducerClass(CountingReducer.class);
  job.setNumReduceTasks(1);
  LocalJobRunner.setLocalMaxRunningMaps(job, -6);
  FileInputFormat.addInputPath(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  boolean success = job.waitForCompletion(true);
  assertFalse("Job succeeded somehow", success);
}
 
Example 9
Project: learn-to-hadoop   File: MaxTemperatureWithCombiner.java   Source Code and License 7 votes vote down vote up
public static void main(String[] args) throws Exception {
    if(args.length != 2){
        System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>");
        System.exit(-1);
    }

    Job job = new Job();
    job.setJarByClass(MaxTemperatureWithCombiner.class);
    job.setJobName("Max Temperature With Combiner");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTemperatureMapper.class);
    job.setCombinerClass(MaxTemperatureReducer.class);
    job.setReducerClass(MaxTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example 10
Project: LDA   File: CalParamDriver.java   Source Code and License 7 votes vote down vote up
public static void run(Configuration conf, Path inputPath, Path output, double params) throws IOException, ClassNotFoundException, InterruptedException {
    String jobName = "calculating parameter";
    conf.set("params",String.valueOf(params));

    Job job = new Job(conf, jobName);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(indexToCountWritable.class);
    job.setOutputKeyClass(twoDimensionIndexWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(CalParamsMapper.class);
    job.setReducerClass(CalParamsReducer.class);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job,output);

    job.setJarByClass(LDADriver.class);
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("calculating parameter failed");
    }
}
 
Example 11
Project: big-data-benchmark   File: HadoopWordCount.java   Source Code and License 7 votes vote down vote up
public static void main(String[] args) throws Exception {
    BasicConfigurator.configure();
    Configuration conf = new Configuration();
    conf.setQuietMode(true);

    Job job = Job.getInstance(conf, "WordCount");
    job.setJarByClass(HadoopWordCount.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1] + "_" + System.currentTimeMillis()));

    long t = System.currentTimeMillis();
    job.waitForCompletion(true);

    System.out.println("TotalTime=" + (System.currentTimeMillis() - t));
}
 
Example 12
Project: DocIT   File: Cut.java   Source Code and License 6 votes vote down vote up
public static void cut(String name, String in) throws Exception {
	Job job = createJob(name, in);
	job.setNumReduceTasks(0);
	boolean success = job.waitForCompletion(true);

	if (success) {
		// MapReduce reads an input and writes the result to an new location.
		// Hence it can not modify data in place, and we have to replace the input
		// with the output
		Path output = FileOutputFormat.getOutputPath(job);
		FileSystem fs = output.getFileSystem(job.getConfiguration());
		Path[] inputs = FileInputFormat.getInputPaths(job);
		for (int i = 0; i < inputs.length; i++) {
			fs.delete(inputs[i], true);
		}
		fs.rename(output, inputs[0]);
	}
}
 
Example 13
Project: Wikipedia-Index   File: TF.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	conf.set("xmlinput.start", "<page>");
	conf.set("xmlinput.end", "</page>");
	
	Job job =Job.getInstance(conf);
	job.setJobName("TermFrequencyCount");
	job.setJarByClass(TF.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(IntArrayWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);
	
	job.setMapperClass(TFMap.class);
	job.setReducerClass(TFReduce.class);
	
	job.setInputFormatClass(XmlInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
Example 14
Project: Wikipedia-Index   File: ExtractPage.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	conf.set("xmlinput.start", "<page>");
	conf.set("xmlinput.end", "</page>");
	
	Job job =Job.getInstance(conf);
	job.setJobName("ExrtactPages");
	job.setJarByClass(ExtractPage.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(Text.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);
	
	job.setMapperClass(ExtractPageMap.class);
	job.setReducerClass(ExtractPageReduce.class);
	
	job.setInputFormatClass(XmlInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
Example 15
Project: Wikipedia-Index   File: DF.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	
	Job job =Job.getInstance(conf);
	job.setJobName("DocumentFrequencyCount");
	job.setJarByClass(DF.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(IntWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);
	
	job.setMapperClass(DFMap.class);
	job.setReducerClass(DFReduce.class);
	
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
Example 16
Project: Wikipedia-Index   File: MaxThreeLabel.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	
	Job job =Job.getInstance(conf);
	job.setJobName("MaxThreeLabel");
	job.setJarByClass(MaxThreeLabel.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(TextArrayWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);
	
	job.setMapperClass(MaxThreeLabelMap.class);
	job.setReducerClass(MaxThreeLabelReduce.class);
	
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
Example 17
Project: Hadoop-Codes   File: testDriver.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf, "test");
	
	job.setMapperClass(testMapper.class);
	job.setPartitionerClass(testPartitioner.class);
	job.setReducerClass(testReducer.class);
	job.setNumReduceTasks(10);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);
	
	FileInputFormat.setInputPaths(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	if (!job.waitForCompletion(true))
		return;
}
 
Example 18
Project: aliyun-maxcompute-data-collectors   File: ExportJobBase.java   Source Code and License 6 votes vote down vote up
@Override
protected void configureInputFormat(Job job, String tableName,
    String tableClassName, String splitByCol)
    throws ClassNotFoundException, IOException {

 if (options.getOdpsTable() != null) {
    Configuration conf = job.getConfiguration();
    setInputFormatClass(OdpsExportInputFormat.class);
    conf.set(OdpsConstants.TABLE_NAME, options.getOdpsTable());
    conf.set(OdpsConstants.ACCESS_ID, options.getOdpsAccessID());
    conf.set(OdpsConstants.ACCESS_KEY, options.getOdpsAccessKey());
    conf.set(OdpsConstants.ENDPOINT, options.getOdpsEndPoint());
    conf.set(OdpsConstants.PROJECT, options.getOdpsProject());
    String partitionSpec = options.getOdpsPartitionSpec();
    if (partitionSpec != null) {
      conf.set(OdpsConstants.PARTITION_SPEC, partitionSpec);
    }
    setMapperClass(OdpsExportMapper.class);
  }
  super.configureInputFormat(job, tableName, tableClassName, splitByCol);
  if (!isHCatJob && options.getOdpsTable() == null) {
    FileInputFormat.addInputPath(job, getInputPath());
  }

}
 
Example 19
Project: hadoop   File: CredentialsTestJob.java   Source Code and License 6 votes vote down vote up
public Job createJob() 
throws IOException {
  Configuration conf = getConf();
  conf.setInt(MRJobConfig.NUM_MAPS, 1);
  Job job = Job.getInstance(conf, "test");
  job.setNumReduceTasks(1);
  job.setJarByClass(CredentialsTestJob.class);
  job.setNumReduceTasks(1);
  job.setMapperClass(CredentialsTestJob.CredentialsTestMapper.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(CredentialsTestJob.CredentialsTestReducer.class);
  job.setInputFormatClass(SleepJob.SleepInputFormat.class);
  job.setPartitionerClass(SleepJob.SleepJobPartitioner.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("test job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  return job;
}
 
Example 20
Project: hadoop   File: FailJob.java   Source Code and License 6 votes vote down vote up
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile) 
    throws IOException {
  Configuration conf = getConf();
  conf.setBoolean(FAIL_MAP, failMappers);
  conf.setBoolean(FAIL_REDUCE, failReducers);
  Job job = Job.getInstance(conf, "fail");
  job.setJarByClass(FailJob.class);
  job.setMapperClass(FailMapper.class);
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(FailReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Fail job");
  FileInputFormat.addInputPath(job, inputFile);
  return job;
}
 
Example 21
Project: hadoop   File: SleepJob.java   Source Code and License 6 votes vote down vote up
public Job createJob(int numMapper, int numReducer, 
                     long mapSleepTime, int mapSleepCount, 
                     long reduceSleepTime, int reduceSleepCount) 
    throws IOException {
  Configuration conf = getConf();
  conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
  conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
  conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
  conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
  conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
  Job job = Job.getInstance(conf, "sleep");
  job.setNumReduceTasks(numReducer);
  job.setJarByClass(SleepJob.class);
  job.setMapperClass(SleepMapper.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(SleepReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(SleepInputFormat.class);
  job.setPartitionerClass(SleepJobPartitioner.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Sleep job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  return job;
}
 
Example 22
Project: hadoop   File: LocatedFileStatusFetcher.java   Source Code and License 6 votes vote down vote up
/**
 * @param conf configuration for the job
 * @param dirs the initial list of paths
 * @param recursive whether to traverse the patchs recursively
 * @param inputFilter inputFilter to apply to the resulting paths
 * @param newApi whether using the mapred or mapreduce API
 * @throws InterruptedException
 * @throws IOException
 */
public LocatedFileStatusFetcher(Configuration conf, Path[] dirs,
    boolean recursive, PathFilter inputFilter, boolean newApi) throws InterruptedException,
    IOException {
  int numThreads = conf.getInt(FileInputFormat.LIST_STATUS_NUM_THREADS,
      FileInputFormat.DEFAULT_LIST_STATUS_NUM_THREADS);
  rawExec = Executors.newFixedThreadPool(
      numThreads,
      new ThreadFactoryBuilder().setDaemon(true)
          .setNameFormat("GetFileInfo #%d").build());
  exec = MoreExecutors.listeningDecorator(rawExec);
  resultQueue = new LinkedBlockingQueue<List<FileStatus>>();
  this.conf = conf;
  this.inputDirs = dirs;
  this.recursive = recursive;
  this.inputFilter = inputFilter;
  this.newApi = newApi;
}
 
Example 23
Project: hadoop   File: ControlledJob.java   Source Code and License 6 votes vote down vote up
/**
 * Submit this job to mapred. The state becomes RUNNING if submission 
 * is successful, FAILED otherwise.  
 */
protected synchronized void submit() {
  try {
    Configuration conf = job.getConfiguration();
    if (conf.getBoolean(CREATE_DIR, false)) {
      FileSystem fs = FileSystem.get(conf);
      Path inputPaths[] = FileInputFormat.getInputPaths(job);
      for (int i = 0; i < inputPaths.length; i++) {
        if (!fs.exists(inputPaths[i])) {
          try {
            fs.mkdirs(inputPaths[i]);
          } catch (IOException e) {

          }
        }
      }
    }
    job.submit();
    this.state = State.RUNNING;
  } catch (Exception ioe) {
    LOG.info(getJobName()+" got an error while submitting ",ioe);
    this.state = State.FAILED;
    this.message = StringUtils.stringifyException(ioe);
  }
}
 
Example 24
Project: hadoop   File: TestFileInputFormat.java   Source Code and License 6 votes vote down vote up
@Test
public void testListLocatedStatus() throws Exception {
  Configuration conf = getConfiguration();
  conf.setBoolean("fs.test.impl.disable.cache", false);
  conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);
  conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
      "test:///a1/a2");
  MockFileSystem mockFs =
      (MockFileSystem) new Path("test:///").getFileSystem(conf);
  Assert.assertEquals("listLocatedStatus already called",
      0, mockFs.numListLocatedStatusCalls);
  JobConf job = new JobConf(conf);
  TextInputFormat fileInputFormat = new TextInputFormat();
  fileInputFormat.configure(job);
  InputSplit[] splits = fileInputFormat.getSplits(job, 1);
  Assert.assertEquals("Input splits are not correct", 2, splits.length);
  Assert.assertEquals("listLocatedStatuss calls",
      1, mockFs.numListLocatedStatusCalls);
  FileSystem.closeAll();
}
 
Example 25
Project: hadoop   File: TestFileInputFormat.java   Source Code and License 6 votes vote down vote up
@Test
public void testSplitLocationInfo() throws Exception {
  Configuration conf = getConfiguration();
  conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
      "test:///a1/a2");
  JobConf job = new JobConf(conf);
  TextInputFormat fileInputFormat = new TextInputFormat();
  fileInputFormat.configure(job);
  FileSplit[] splits = (FileSplit[]) fileInputFormat.getSplits(job, 1);
  String[] locations = splits[0].getLocations();
  Assert.assertEquals(2, locations.length);
  SplitLocationInfo[] locationInfo = splits[0].getLocationInfo();
  Assert.assertEquals(2, locationInfo.length);
  SplitLocationInfo localhostInfo = locations[0].equals("localhost") ?
      locationInfo[0] : locationInfo[1];
  SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ?
      locationInfo[0] : locationInfo[1];
  Assert.assertTrue(localhostInfo.isOnDisk());
  Assert.assertTrue(localhostInfo.isInMemory());
  Assert.assertTrue(otherhostInfo.isOnDisk());
  Assert.assertFalse(otherhostInfo.isInMemory());
}
 
Example 26
Project: hadoop   File: TestFileInputFormat.java   Source Code and License 6 votes vote down vote up
@Test
public void testListStatusSimple() throws IOException {
  Configuration conf = new Configuration();
  conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);

  List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
      .configureTestSimple(conf, localFs);

  JobConf jobConf = new JobConf(conf);
  TextInputFormat fif = new TextInputFormat();
  fif.configure(jobConf);
  FileStatus[] statuses = fif.listStatus(jobConf);

  org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
      .verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses),
          localFs);
}
 
Example 27
Project: hadoop   File: TestFileInputFormat.java   Source Code and License 6 votes vote down vote up
@Test
public void testListStatusNestedRecursive() throws IOException {
  Configuration conf = new Configuration();
  conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);

  List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
      .configureTestNestedRecursive(conf, localFs);
  JobConf jobConf = new JobConf(conf);
  TextInputFormat fif = new TextInputFormat();
  fif.configure(jobConf);
  FileStatus[] statuses = fif.listStatus(jobConf);

  org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
      .verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses),
          localFs);
}
 
Example 28
Project: hadoop   File: TestFileInputFormat.java   Source Code and License 6 votes vote down vote up
@Test
public void testListStatusNestedNonRecursive() throws IOException {
  Configuration conf = new Configuration();
  conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);

  List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
      .configureTestNestedNonRecursive(conf, localFs);
  JobConf jobConf = new JobConf(conf);
  TextInputFormat fif = new TextInputFormat();
  fif.configure(jobConf);
  FileStatus[] statuses = fif.listStatus(jobConf);

  org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
      .verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses),
          localFs);
}
 
Example 29
Project: hadoop   File: TestFileInputFormat.java   Source Code and License 6 votes vote down vote up
@Test
public void testListStatusErrorOnNonExistantDir() throws IOException {
  Configuration conf = new Configuration();
  conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);

  org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
      .configureTestErrorOnNonExistantDir(conf, localFs);
  JobConf jobConf = new JobConf(conf);
  TextInputFormat fif = new TextInputFormat();
  fif.configure(jobConf);
  try {
    fif.listStatus(jobConf);
    Assert.fail("Expecting an IOException for a missing Input path");
  } catch (IOException e) {
    Path expectedExceptionPath = new Path(TEST_ROOT_DIR, "input2");
    expectedExceptionPath = localFs.makeQualified(expectedExceptionPath);
    Assert.assertTrue(e instanceof InvalidInputException);
    Assert.assertEquals(
        "Input path does not exist: " + expectedExceptionPath.toString(),
        e.getMessage());
  }
}
 
Example 30
Project: hadoop   File: WordStandardDeviation.java   Source Code and License 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  if (args.length != 2) {
    System.err.println("Usage: wordstddev <in> <out>");
    return 0;
  }

  Configuration conf = getConf();

  Job job = Job.getInstance(conf, "word stddev");
  job.setJarByClass(WordStandardDeviation.class);
  job.setMapperClass(WordStandardDeviationMapper.class);
  job.setCombinerClass(WordStandardDeviationReducer.class);
  job.setReducerClass(WordStandardDeviationReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  FileInputFormat.addInputPath(job, new Path(args[0]));
  Path outputpath = new Path(args[1]);
  FileOutputFormat.setOutputPath(job, outputpath);
  boolean result = job.waitForCompletion(true);

  // read output and calculate standard deviation
  stddev = readAndCalcStdDev(outputpath, conf);

  return (result ? 0 : 1);
}
 
Example 31
Project: hadoop   File: WordCount.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
  Configuration conf = new Configuration();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("Usage: wordcount <in> [<in>...] <out>");
    System.exit(2);
  }
  Job job = Job.getInstance(conf, "word count");
  job.setJarByClass(WordCount.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setCombinerClass(IntSumReducer.class);
  job.setReducerClass(IntSumReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  for (int i = 0; i < otherArgs.length - 1; ++i) {
    FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
  }
  FileOutputFormat.setOutputPath(job,
    new Path(otherArgs[otherArgs.length - 1]));
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example 32
Project: hadoop   File: WordMean.java   Source Code and License 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  if (args.length != 2) {
    System.err.println("Usage: wordmean <in> <out>");
    return 0;
  }

  Configuration conf = getConf();

  Job job = Job.getInstance(conf, "word mean");
  job.setJarByClass(WordMean.class);
  job.setMapperClass(WordMeanMapper.class);
  job.setCombinerClass(WordMeanReducer.class);
  job.setReducerClass(WordMeanReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  FileInputFormat.addInputPath(job, new Path(args[0]));
  Path outputpath = new Path(args[1]);
  FileOutputFormat.setOutputPath(job, outputpath);
  boolean result = job.waitForCompletion(true);
  mean = readAndCalcMean(outputpath, conf);

  return (result ? 0 : 1);
}
 
Example 33
Project: hadoop   File: TeraValidate.java   Source Code and License 6 votes vote down vote up
public int run(String[] args) throws Exception {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 1;
  }
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
  job.setInputFormatClass(TeraInputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
Example 34
Project: hadoop   File: CompressionEmulationUtil.java   Source Code and License 6 votes vote down vote up
/**
 * Configure the {@link Job} for enabling compression emulation.
 */
static void configure(final Job job) throws IOException, InterruptedException,
                                            ClassNotFoundException {
  // set the random text mapper
  job.setMapperClass(RandomTextDataMapper.class);
  job.setNumReduceTasks(0);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setInputFormatClass(GenDataFormat.class);
  job.setJarByClass(GenerateData.class);

  // set the output compression true
  FileOutputFormat.setCompressOutput(job, true);
  try {
    FileInputFormat.addInputPath(job, new Path("ignored"));
  } catch (IOException e) {
    LOG.error("Error while adding input path ", e);
  }
}
 
Example 35
Project: hadoop   File: GenerateDistCacheData.java   Source Code and License 6 votes vote down vote up
@Override
public Job call() throws IOException, InterruptedException,
                         ClassNotFoundException {
  UserGroupInformation ugi = UserGroupInformation.getLoginUser();
  ugi.doAs( new PrivilegedExceptionAction <Job>() {
     public Job run() throws IOException, ClassNotFoundException,
                             InterruptedException {
      job.setMapperClass(GenDCDataMapper.class);
      job.setNumReduceTasks(0);
      job.setMapOutputKeyClass(NullWritable.class);
      job.setMapOutputValueClass(BytesWritable.class);
      job.setInputFormatClass(GenDCDataFormat.class);
      job.setOutputFormatClass(NullOutputFormat.class);
      job.setJarByClass(GenerateDistCacheData.class);
      try {
        FileInputFormat.addInputPath(job, new Path("ignored"));
      } catch (IOException e) {
        LOG.error("Error while adding input path ", e);
      }
      job.submit();
      return job;
    }
  });
  return job;
}
 
Example 36
Project: learn-to-hadoop   File: MaxTemperature.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    if(args.length != 2){
        System.err.println("Usage: MaxTemperature <input path> <output path>");
        System.exit(-1);
    }

    Job job = new Job();
    job.setJarByClass(MaxTemperature.class);
    job.setJobName("Max temperature");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTemperatureMapper.class);
    job.setReducerClass(MaxTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example 37
Project: LDA   File: InputDriver.java   Source Code and License 6 votes vote down vote up
public static void runJob(Configuration conf, Path inputPath, Path output) throws IOException, ClassNotFoundException, InterruptedException {

        Job job = new Job(conf, "Input Drive running input:"+inputPath);
        log.info("start running InputDriver");
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(indexToWordWritable.class);
        job.setOutputKeyClass(twoDimensionIndexWritable.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(InputMapper.class);
        job.setReducerClass(InputReducer.class);
        job.setNumReduceTasks(1);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setJarByClass(InputDriver.class);

        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, output);

        boolean succeeded = job.waitForCompletion(true);
        if (!succeeded) {
            throw new IllegalStateException("Job failed!");
        }

    }
 
Example 38
Project: mapreduce-samples   File: SentimentAnalysis.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        Configuration configuration = new Configuration();
        configuration.set("dictionary", args[2]);

        Job job = Job.getInstance(configuration);
        job.setJarByClass(SentimentAnalysis.class);
        job.setMapperClass(SentimentSplit.class);
        job.setReducerClass(SentimentCollection.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }
 
Example 39
Project: Deep_learning_using_Java   File: Recommendation_program.java   Source Code and License 6 votes vote down vote up
private Job jobListFriends(String inputPath, String outputPath) throws IOException, InterruptedException, ClassNotFoundException{      
    Job job = new Job();
    job.setJarByClass(WordCount.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(KeyValueTextInputFormat.class);   // Need to change the import
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.waitForCompletion(true);

    return job;
}
 
Example 40
Project: Deep_learning_using_Java   File: Recommendation_program.java   Source Code and License 6 votes vote down vote up
private Job jobRecommendFriends(String inputPath, String outputPath) throws IOException, InterruptedException, ClassNotFoundException{     
    Job job1 = new Job();
    job1.setJarByClass(WordCount.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
   
    job1.setMapperClass(MapRecommendation.class);
    job1.setReducerClass(ReduceRecommendation.class);
   
    job1.setOutputFormatClass(TextOutputFormat.class);
    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    FileInputFormat.addInputPath(job1, new Path(inputPath));
    FileOutputFormat.setOutputPath(job1, new Path(outputPath));

    job1.waitForCompletion(true);

    return job1;
   
}