org.apache.hadoop.mapreduce.lib.output.TextOutputFormat Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.lib.output.TextOutputFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: P1Q3.java    From IntroToHadoopAndMR__Udacity_Course with Apache License 2.0 6 votes vote down vote up
public final static void main(final String[] args) throws Exception {
	final Configuration conf = new Configuration();

	final Job job = new Job(conf, "P1Q3");
	job.setJarByClass(P1Q3.class);

	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(DoubleWritable.class);

	job.setOutputKeyClass(IntWritable.class);
	job.setOutputValueClass(DoubleWritable.class);

	job.setMapperClass(P1Q3Map.class);
	//job.setCombinerClass(P1Q3Reduce.class);
	job.setReducerClass(P1Q3Reduce.class);

	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);

	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	job.waitForCompletion(true);
}
 
Example #2
Source File: TopKDataGen.java    From sequenceiq-samples with Apache License 2.0 6 votes vote down vote up
private DAG createDag(TezConfiguration tezConf, Path outPath, long outSize, int extraColumns, int numTasks)
        throws IOException {

    long largeOutSizePerTask = outSize / numTasks;

    DAG dag = DAG.create("TopK DataGen");

    Vertex genDataVertex = Vertex.create("datagen", ProcessorDescriptor.create(
                    GenDataProcessor.class.getName()).setUserPayload(
                    UserPayload.create(ByteBuffer.wrap(GenDataProcessor.createConfiguration(largeOutSizePerTask, extraColumns)))),
            numTasks);
    genDataVertex.addDataSink(OUTPUT,
            MROutput.createConfigBuilder(new Configuration(tezConf),
                    TextOutputFormat.class, outPath.toUri().toString()).build());
    dag.addVertex(genDataVertex);

    return dag;
}
 
Example #3
Source File: HadoopWordCount2.java    From ignite with Apache License 2.0 6 votes vote down vote up
/**
 * Sets task classes with related info if needed into configuration object.
 *
 * @param job Configuration to change.
 * @param setMapper Option to set mapper and input format classes.
 * @param setCombiner Option to set combiner class.
 * @param setReducer Option to set reducer and output format classes.
 */
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer,
        boolean outputCompression) {
    if (setMapper) {
        job.setMapperClass(HadoopWordCount2Mapper.class);
        job.setInputFormatClass(TextInputFormat.class);
    }

    if (setCombiner)
        job.setCombinerClass(HadoopWordCount2Combiner.class);

    if (setReducer) {
        job.setReducerClass(HadoopWordCount2Reducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
    }

    if (outputCompression) {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

        SequenceFileOutputFormat.setCompressOutput(job, true);

        job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName());
    }
}
 
Example #4
Source File: LeftJoin.java    From BigData-In-Practice with Apache License 2.0 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser optionparser = new GenericOptionsParser(conf, args);
    conf = optionparser.getConfiguration();

    Job job = Job.getInstance(conf, "leftjoin");
    job.setJarByClass(LeftJoin.class);
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    Path out = new Path(conf.get("output_dir"));
    FileOutputFormat.setOutputPath(job, out);
    job.setNumReduceTasks(conf.getInt("reduce_num", 1));

    job.setMapperClass(LeftJoinMapper.class);
    job.setReducerClass(LeftJoinReduce.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    conf.set("mapred.textoutputformat.separator", ",");

    return (job.waitForCompletion(true) ? 0 : 1);
}
 
Example #5
Source File: BigDiffHadoop.java    From secure-data-service with Apache License 2.0 6 votes vote down vote up
public void execute(String inputPath1, String inputPath2, String outputPath) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "bigdiff");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(inputPath1));
    FileInputFormat.addInputPath(job, new Path(inputPath2));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.waitForCompletion(true);
}
 
Example #6
Source File: CellCounter.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * Sets up the actual job.
 *
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
    throws IOException {
  String tableName = args[0];
  Path outputDir = new Path(args[1]);
  String reportSeparatorString = (args.length > 2) ? args[2]: ":";
  conf.set("ReportSeparator", reportSeparatorString);
  Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
  job.setJarByClass(CellCounter.class);
  Scan scan = getConfiguredScanForJob(conf, args);
  TableMapReduceUtil.initTableMapperJob(tableName, scan,
      CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
  job.setNumReduceTasks(1);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(IntWritable.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setReducerClass(IntSumReducer.class);
  return job;
}
 
Example #7
Source File: KMeansDriver.java    From flink-perf with Apache License 2.0 6 votes vote down vote up
public static void convertCentersSequenceFileToText (Configuration conf, FileSystem fs, String seqFilePath, String outputPath) throws Exception {

		Path seqFile = new Path (seqFilePath);
		Path output = new Path (outputPath);
		if (fs.exists(output)) {
			fs.delete(output, true);
		}
		Job job = Job.getInstance(conf);
		job.setMapperClass(CenterSequenceToTextConverter.class);
		job.setReducerClass(Reducer.class);
		job.setNumReduceTasks(0);
		job.setMapOutputKeyClass(LongWritable.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(LongWritable.class);
		job.setOutputValueClass(Text.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setInputFormatClass(SequenceFileInputFormat.class);
		FileInputFormat.addInputPath(job, seqFile);
		FileOutputFormat.setOutputPath(job, output);
		job.waitForCompletion(true);
	}
 
Example #8
Source File: ValueCooccurrencesTest.java    From marklogic-contentpump with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: ValueCooccurrencesTest configFile outputDir");
        System.exit(2);
    }

    Job job = Job.getInstance(conf);
    job.setJarByClass(ValueCooccurrencesTest.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(ValueCooccurrencesMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, 
            Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, 
        ValueCooccurrencesFunction.class, ValueCooccurrences.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example #9
Source File: P1Q1.java    From IntroToHadoopAndMR__Udacity_Course with Apache License 2.0 6 votes vote down vote up
public final static void main(final String[] args) throws Exception {
	final Configuration conf = new Configuration();

	final Job job = new Job(conf, "P1Q1");
	job.setJarByClass(P1Q1.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);

	job.setMapperClass(P1Q1Map.class);
	job.setCombinerClass(P1Q1Reduce.class);
	job.setReducerClass(P1Q1Reduce.class);

	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);

	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	job.waitForCompletion(true);
}
 
Example #10
Source File: ElemValueCooccurrencesTest.java    From marklogic-contentpump with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 1) {
        System.err.println("Usage: ElemValueCooccurrencesTest configFile outputDir");
        System.exit(2);
    }

    Job job = Job.getInstance(conf);
    job.setJarByClass(ElemValueCooccurrencesTest.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(ElemCooccurrencesMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, 
            Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, 
        ElemValueCooccurrencesFunction.class, ElemValueCooccurrences.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example #11
Source File: WordsTest.java    From marklogic-contentpump with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 1) {
        System.err.println("Usage: WordsTest configFile outputDir");
        System.exit(2);
    }

    Job job = Job.getInstance(conf);
    job.setJarByClass(WordsTest.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(WordsMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, 
            Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, 
        Words.class, Words.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example #12
Source File: ValueMatchTest.java    From marklogic-contentpump with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: ValueMatchTest configFile outputDir");
        System.exit(2);
    }

    Job job = Job.getInstance(conf);
    job.setJarByClass(ValueMatchTest.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(ValueMatchMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, 
            Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, 
        ValueMatchFunction.class, ValueMatch.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example #13
Source File: P1.java    From IntroToHadoopAndMR__Udacity_Course with Apache License 2.0 6 votes vote down vote up
public final static void main(final String[] args) throws Exception {
	final Configuration conf = new Configuration();

	final Job job = new Job(conf, "P1");
	job.setJarByClass(P1.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);

	job.setMapperClass(P1Map.class);
	job.setCombinerClass(P1Reduce.class);
	job.setReducerClass(P1Reduce.class);

	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);

	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	job.waitForCompletion(true);
}
 
Example #14
Source File: ElementValueMatchTest.java    From marklogic-contentpump with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 1) {
        System.err.println("Usage: ElementValueMatchTest configFile outputDir");
        System.exit(2);
    }

    Job job = Job.getInstance(conf);
    job.setJarByClass(ElementValueMatchTest.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(ElementValueMatchMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, 
            Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, 
        ElementValueMatchFunction.class, ElementValueMatch.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example #15
Source File: ElementValuesTest.java    From marklogic-contentpump with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: ElementValuesTest configFile outputDir");
        System.exit(2);
    }

    Job job = Job.getInstance(conf);
    job.setJarByClass(ElementValuesTest.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(ElementValueMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, 
            Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, 
        ElementValuesFunction.class, ElementValues.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example #16
Source File: LinkCountHDFS.java    From marklogic-contentpump with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length < 2) {
        System.err.println("Usage: LinkCountHDFS inputDir outputDir");
        System.exit(2);
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    
    Job job = Job.getInstance(conf, "link count hdfs");
    job.setJarByClass(LinkCountHDFS.class);
    job.setInputFormatClass(HDFSInputFormat.class);
    job.setMapperClass(RefMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    HDFSInputFormat.setInputPaths(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example #17
Source File: FactDistinctColumnsReducerTest.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void testNormalDim() throws IOException {
    setConfigurations();
    setMultipleOutputs(BatchConstants.CFG_OUTPUT_COLUMN, reduceDriver.getConfiguration(),
            SequenceFileOutputFormat.class, NullWritable.class, Text.class);
    setMultipleOutputs(BatchConstants.CFG_OUTPUT_DICT, reduceDriver.getConfiguration(),
            SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
    setMultipleOutputs(BatchConstants.CFG_OUTPUT_PARTITION, reduceDriver.getConfiguration(), TextOutputFormat.class,
            NullWritable.class, LongWritable.class);

    int nDimReducers = cubeDesc.getRowkey().getRowKeyColumns().length;
    setContextTaskId(nDimReducers - 1);

    ByteBuffer tmpBuf = ByteBuffer.allocate(4096);
    String val = "100";
    tmpBuf.put(Bytes.toBytes(val));
    Text outputKey1 = new Text();
    outputKey1.set(tmpBuf.array(), 0, tmpBuf.position());
    SelfDefineSortableKey key1 = new SelfDefineSortableKey();
    key1.init(outputKey1, (byte) 0);

    reduceDriver.setInput(key1, ImmutableList.of(new Text()));
    List<Pair<NullWritable, Text>> result = reduceDriver.run();
    assertEquals(0, result.size());
}
 
Example #18
Source File: JMatrixMultiplicationStep3.java    From RecommendationEngine with MIT License 5 votes vote down vote up
public static void run() throws IOException, ClassNotFoundException,
		InterruptedException {
	String inputPath = ItemBasedCFDriver.path.get("step9InputPath");
	String outputPath = ItemBasedCFDriver.path.get("step9OutputPath");

	Configuration conf = new Configuration();
	conf.set("mapred.textoutputformat.separator", ",");

	Job job = Job.getInstance(conf);

	HDFS hdfs = new HDFS(conf);
	hdfs.rmr(outputPath);

	job.setMapperClass(Step3_Mapper.class);
	job.setReducerClass(Step3_Reducer.class);
	job.setCombinerClass(Step3_Reducer.class);
	job.setJarByClass(JMatrixMultiplicationStep3.class);
	job.setNumReduceTasks(ItemBasedCFDriver.ReducerNumber);

	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(DoubleWritable.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);

	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);

	FileInputFormat.setInputPaths(job, new Path(inputPath));
	FileOutputFormat.setOutputPath(job, new Path(outputPath));

	job.waitForCompletion(true);
}
 
Example #19
Source File: Phase3Step2DistinctDataJob.java    From dkpro-c4corpus with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args)
        throws Exception
{

    Job job = Job.getInstance(getConf());
    job.setJarByClass(Phase3Step2DistinctDataJob.class);
    job.setJobName(Phase3Step2DistinctDataJob.class.getName());

    //mapper
    job.setMapperClass(RemoveRedundantDataMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    //reducer
    job.setReducerClass(RemoveRedundantDataReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    //paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    job.setInputFormatClass(TextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    //i/o paths
    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}
 
Example #20
Source File: ContentTypeAndSizeDistribution.java    From dkpro-c4corpus with Apache License 2.0 5 votes vote down vote up
@Override public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());

    job.setJarByClass(ContentTypeAndSizeDistribution.class);

    job.setJobName(ContentTypeAndSizeDistribution.class.getName());

    // mapper
    job.setMapperClass(ContentAndSizeMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    // reducer
    //        job.setReducerClass(DistributionReducer.class);
    job.setReducerClass(TextLongCountingReducer.class);

    job.setInputFormatClass(WARCInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}
 
Example #21
Source File: BlockMatrixMultiplicationStep1.java    From RecommendationEngine with MIT License 5 votes vote down vote up
public static void run() throws IOException, ClassNotFoundException,
        InterruptedException {
    String inputPath1 = ItemBasedCFDriver.path.get("step7InputPath1");
    String inputPath2 = ItemBasedCFDriver.path.get("step7InputPath2");
    String outputPath = ItemBasedCFDriver.path.get("step7OutputPath");

    Configuration conf = new Configuration();
    conf.set("mapred.textoutputformat.separator", ",");

    Job job = Job.getInstance(conf);

    HDFS
            hdfs = new HDFS(conf);
    hdfs.rmr(outputPath);

    job.setMapperClass(BlockMatrixStep1_Mapper.class);

    job.setJarByClass(BlockMatrixMultiplicationStep1.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(inputPath1), new Path(
            inputPath2));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.waitForCompletion(true);

}
 
Example #22
Source File: TextToSentencesSplitter.java    From dkpro-c4corpus with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args)
        throws Exception
{
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf);
    job.setJarByClass(TextToSentencesSplitter.class);

    job.setJobName(TextToSentencesSplitter.class.getName());

    // mapper
    job.setMapperClass(TextToSentencesSplitter.MapperClass.class);
    job.setInputFormatClass(WARCInputFormat.class);

    // reducer
    job.setReducerClass(ReducerClass.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = otherArgs[0];
    String outputPath = otherArgs[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}
 
Example #23
Source File: CommunityCompression.java    From distributed-graph-analytics with Apache License 2.0 5 votes vote down vote up
public int run(String[] args) throws Exception {
    Configuration mrConf = this.getConf();
    for (java.util.Map.Entry<String, String> entry : dgaConfiguration.getSystemProperties().entrySet()) {
        mrConf.set(entry.getKey(), entry.getValue());
    }

    Job job = Job.getInstance(mrConf);
    job.setJarByClass(CommunityCompression.class);
    Path in = new Path(inputPath);
    Path out = new Path(outputPath);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setJobName("CommunityCompression");

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LouvainVertexWritable.class);

    job.setMapperClass(CommunityCompression.Map.class);
    job.setReducerClass(CommunityCompression.Reduce.class);

    logger.debug("Running Mapreduce step with job configuration: {}", job);

    return job.waitForCompletion(false) ? 0 : 1;
}
 
Example #24
Source File: GrepDriver.java    From flink-perf with Apache License 2.0 5 votes vote down vote up
public static void main(String [] args) throws Exception {

        String in = args[0];
        String out = args[1];
        System.err.println("Using input=" + in);
        System.err.println("Using output=" + out);

        String patterns[] = new String[args.length - 2];
        System.arraycopy(args, 2, patterns, 0, args.length - 2);
        System.err.println("Using patterns: " + Arrays.toString(patterns));

        for (int i = 0; i < patterns.length; i++) {
            String pattern = patterns[i];
            Configuration conf = new Configuration();
            conf.set("pattern", pattern);
            Job job = Job.getInstance(conf, "Grep for " + pattern);
            job.setMapperClass(Grep.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setInputFormatClass(TextInputFormat.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            job.setNumReduceTasks(0);
            job.setJarByClass(Grep.class);
            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1] + "_" + pattern));

            if (!job.waitForCompletion(true)) {
                throw new RuntimeException("Grep job " + i + " failed");
            }
        }
    }
 
Example #25
Source File: PerformanceEvaluation.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Run a mapreduce job.  Run as many maps as asked-for clients.
 * Before we start up the job, write out an input file with instruction
 * per client regards which row they are to start on.
 * @param cmd Command to run.
 */
private void doMapReduce(final Class<? extends Test> cmd)
    throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = Job.getInstance(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(PeInputFormat.class);
  PeInputFormat.setInputPaths(job, inputDir);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);
  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.initCredentials(job);
  job.waitForCompletion(true);
}
 
Example #26
Source File: JsonStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public OutputFormat getOutputFormat() throws IOException {
    // We will use TextOutputFormat, the default Hadoop output format for
    // text.  The key is unused and the value will be a
    // Text (a string writable type) that we store our JSON data in.
    return new TextOutputFormat<LongWritable, Text>();
}
 
Example #27
Source File: AbstractReasoningTool.java    From rya with Apache License 2.0 5 votes vote down vote up
/**
 * Set up the MapReduce job to output a schema (TBox).
 */
protected void configureSchemaOutput() {
    Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration());
    SequenceFileOutputFormat.setOutputPath(job, outPath);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(SchemaWritable.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
    MultipleOutputs.addNamedOutput(job, "schemaobj",
        SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
        TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.setCountersEnabled(job, true);
}
 
Example #28
Source File: JobContextImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Get the {@link OutputFormat} class for the job.
 * 
 * @return the {@link OutputFormat} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends OutputFormat<?,?>> getOutputFormatClass() 
   throws ClassNotFoundException {
  return (Class<? extends OutputFormat<?,?>>) 
    conf.getClass(OUTPUT_FORMAT_CLASS_ATTR, TextOutputFormat.class);
}
 
Example #29
Source File: SimpleTextSearch.java    From dkpro-c4corpus with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args)
        throws Exception
{
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance();
    job.setJarByClass(SimpleTextSearch.class);

    job.setJobName(SimpleTextSearch.class.getName());

    // mapper
    job.setMapperClass(TextSearchMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    // combiner + reducer
    job.setCombinerClass(TextLongCountingReducer.class);
    job.setReducerClass(TextLongCountingReducer.class);

    job.setInputFormatClass(WARCInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = otherArgs[0];
    String outputPath = otherArgs[1];

    // regex with a phrase to be searched for
    String regex = otherArgs[2];
    job.getConfiguration().set(MAPREDUCE_MAP_REGEX, regex);

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}
 
Example #30
Source File: TestRecovery.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void writeOutput(TaskAttempt attempt, Configuration conf)
  throws Exception {
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, 
      TypeConverter.fromYarn(attempt.getID()));
  
  TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat
      .getRecordWriter(tContext);
  
  NullWritable nullWritable = NullWritable.get();
  try {
    theRecordWriter.write(key1, val1);
    theRecordWriter.write(null, nullWritable);
    theRecordWriter.write(null, val1);
    theRecordWriter.write(nullWritable, val2);
    theRecordWriter.write(key2, nullWritable);
    theRecordWriter.write(key1, null);
    theRecordWriter.write(null, null);
    theRecordWriter.write(key2, val2);
  } finally {
    theRecordWriter.close(tContext);
  }
  
  OutputFormat outputFormat = ReflectionUtils.newInstance(
      tContext.getOutputFormatClass(), conf);
  OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
  committer.commitTask(tContext);
}