Java Code Examples for org.apache.hadoop.mapred.JobConf#set()

The following examples show how to use org.apache.hadoop.mapred.JobConf#set() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestTaskTrackerInstrumentation.java    From RDFS with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateInstrumentationWithMultipleClasses() {
  // Set up configuration to create two dummy instrumentation objects
  JobConf conf = new JobConf();
  String dummyClass = DummyTaskTrackerInstrumentation.class.getName();
  String classList = dummyClass + "," + dummyClass;
  conf.set("mapred.tasktracker.instrumentation", classList);
  TaskTracker tracker = new TaskTracker();

  // Check that a composite instrumentation object is created
  TaskTrackerInstrumentation inst =
    TaskTracker.createInstrumentation(tracker, conf);
  assertEquals(CompositeTaskTrackerInstrumentation.class.getName(),
      inst.getClass().getName());
  
  // Check that each member of the composite is a dummy instrumentation
  CompositeTaskTrackerInstrumentation comp =
    (CompositeTaskTrackerInstrumentation) inst;
  List<TaskTrackerInstrumentation> insts = comp.getInstrumentations();
  assertEquals(2, insts.size());
  assertEquals(DummyTaskTrackerInstrumentation.class.getName(),
      insts.get(0).getClass().getName());
  assertEquals(DummyTaskTrackerInstrumentation.class.getName(),
      insts.get(1).getClass().getName());
}
 
Example 2
Source File: OfficeFormatHadoopExcelLowFootPrintStaXTest.java    From hadoopoffice with Apache License 2.0 6 votes vote down vote up
@Test
public void readExcelInputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint() throws IOException {
	JobConf job = new JobConf(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName = "excel2013encrypt.xlsx";
	String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
	Path file = new Path(fileNameSpreadSheet);
	FileInputFormat.setInputPaths(job, file);
	// set locale to the one of the test data
	job.set("hadoopoffice.read.locale.bcp47", "de");
	// low footprint
	job.set("hadoopoffice.read.lowFootprint", "true");

	job.set("hadoopoffice.read.lowFootprint.parser", "stax");
	// for decryption simply set the password

	job.set("hadoopoffice.read.security.crypt.password", "test2");
	ExcelFileInputFormat format = new ExcelFileInputFormat();
	format.configure(job);
	InputSplit[] inputSplits = format.getSplits(job, 1);
	assertEquals(1, inputSplits.length, "Only one split generated for Excel file");
	RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
	assertNull(reader, "Null record reader implies invalid password");
}
 
Example 3
Source File: TestMRAppMaster.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testMRAppMasterMissingStaging() throws IOException,
    InterruptedException {
  String applicationAttemptIdStr = "appattempt_1317529182569_0004_000002";
  String containerIdStr = "container_1317529182569_0004_000002_1";
  String userName = "TestAppMasterUser";
  JobConf conf = new JobConf();
  conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
  ApplicationAttemptId applicationAttemptId = ConverterUtils
      .toApplicationAttemptId(applicationAttemptIdStr);

  //Delete the staging directory
  File dir = new File(stagingDir);
  if(dir.exists()) {
    FileUtils.deleteDirectory(dir);
  }
  
  ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
  MRAppMaster appMaster =
      new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1,
          System.currentTimeMillis(), false, false);
  boolean caught = false;
  try {
    MRAppMaster.initAndStartAppMaster(appMaster, conf, userName);
  } catch (IOException e) {
    //The IO Exception is expected
    LOG.info("Caught expected Exception", e);
    caught = true;
  }
  assertTrue(caught);
  assertTrue(appMaster.errorHappenedShutDown);
  //Copying the history file is disabled, but it is not really visible from 
  //here
  assertEquals(JobStateInternal.ERROR, appMaster.forcedState);
  appMaster.stop();
}
 
Example 4
Source File: AbstractMROldApiSaveTest.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testUpsertScript() throws Exception {
    JobConf conf = createJobConf();
    conf.set(ConfigurationOptions.ES_RESOURCE, resource("mroldapi-upsert-script", "data", clusterInfo.getMajorVersion()));
    conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, "yes");
    conf.set(ConfigurationOptions.ES_WRITE_OPERATION, "upsert");
    conf.set(ConfigurationOptions.ES_MAPPING_ID, "number");
    conf.set(ConfigurationOptions.ES_UPDATE_SCRIPT_INLINE, "counter = 1");

    runJob(conf);
}
 
Example 5
Source File: HoodieMergeOnReadTestUtils.java    From hudi with Apache License 2.0 5 votes vote down vote up
private static void setPropsForInputFormat(FileInputFormat inputFormat, JobConf jobConf, Schema schema,
                                           String basePath) {
  List<Schema.Field> fields = schema.getFields();
  String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(","));
  String postions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
  Configuration conf = HoodieTestUtils.getDefaultHadoopConf();

  String hiveColumnNames = fields.stream().filter(field -> !field.name().equalsIgnoreCase("datestr"))
      .map(Schema.Field::name).collect(Collectors.joining(","));
  hiveColumnNames = hiveColumnNames + ",datestr";

  String hiveColumnTypes = HoodieAvroUtils.addMetadataColumnTypes(HoodieTestDataGenerator.TRIP_HIVE_COLUMN_TYPES);
  hiveColumnTypes = hiveColumnTypes + ",string";
  jobConf.set(hive_metastoreConstants.META_TABLE_COLUMNS, hiveColumnNames);
  jobConf.set(hive_metastoreConstants.META_TABLE_COLUMN_TYPES, hiveColumnTypes);
  jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
  jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
  jobConf.set(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, "datestr");
  conf.set(hive_metastoreConstants.META_TABLE_COLUMNS, hiveColumnNames);
  conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
  conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
  conf.set(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, "datestr");
  conf.set(hive_metastoreConstants.META_TABLE_COLUMN_TYPES, hiveColumnTypes);

  // Hoodie Input formats are also configurable
  Configurable configurable = (Configurable)inputFormat;
  configurable.setConf(conf);
  jobConf.addResource(conf);
}
 
Example 6
Source File: TestReduceProcessor.java    From tez with Apache License 2.0 5 votes vote down vote up
public void setUpJobConf(JobConf job) {
  job.set(TezRuntimeFrameworkConfigs.LOCAL_DIRS, workDir.toString());
  job.set(MRConfig.LOCAL_DIR, workDir.toString());
  job.setClass(
      Constants.TEZ_RUNTIME_TASK_OUTPUT_MANAGER,
      TezTaskOutputFiles.class,
      TezTaskOutput.class);
  job.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, MRPartitioner.class.getName());
  job.setNumReduceTasks(1);
  job.setInt(MRJobConfig.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
}
 
Example 7
Source File: TestKeyFieldBasedPartitioner.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultiConfigure() {
  KeyFieldBasedPartitioner<Text, Text> kfbp =
    new KeyFieldBasedPartitioner<Text, Text>();
  JobConf conf = new JobConf();
  conf.set(KeyFieldBasedPartitioner.PARTITIONER_OPTIONS, "-k1,1");
  kfbp.setConf(conf);
  Text key = new Text("foo\tbar");
  Text val = new Text("val");
  int partNum = kfbp.getPartition(key, val, 4096);
  kfbp.configure(conf);
  assertEquals(partNum, kfbp.getPartition(key,val, 4096));
}
 
Example 8
Source File: ValueAggregatorJob.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public static void setAggregatorDescriptors(JobConf job
    , Class<? extends ValueAggregatorDescriptor>[] descriptors) {
  job.setInt("aggregator.descriptor.num", descriptors.length);
  //specify the aggregator descriptors
  for(int i=0; i< descriptors.length; i++) {
    job.set("aggregator.descriptor." + i, "UserDefined," + descriptors[i].getName());
  }    
}
 
Example 9
Source File: TestMRAppMaster.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testMRAppMasterMidLock() throws IOException,
    InterruptedException {
  String applicationAttemptIdStr = "appattempt_1317529182569_0004_000002";
  String containerIdStr = "container_1317529182569_0004_000002_1";
  String userName = "TestAppMasterUser";
  JobConf conf = new JobConf();
  conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
  ApplicationAttemptId applicationAttemptId = ConverterUtils
      .toApplicationAttemptId(applicationAttemptIdStr);
  JobId jobId =  TypeConverter.toYarn(
      TypeConverter.fromYarn(applicationAttemptId.getApplicationId()));
  Path start = MRApps.getStartJobCommitFile(conf, userName, jobId);
  FileSystem fs = FileSystem.get(conf);
  //Create the file, but no end file so we should unregister with an error.
  fs.create(start).close();
  ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
  MRAppMaster appMaster =
      new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1,
          System.currentTimeMillis(), false, false);
  boolean caught = false;
  try {
    MRAppMaster.initAndStartAppMaster(appMaster, conf, userName);
  } catch (IOException e) {
    //The IO Exception is expected
    LOG.info("Caught expected Exception", e);
    caught = true;
  }
  assertTrue(caught);
  assertTrue(appMaster.errorHappenedShutDown);
  assertEquals(JobStateInternal.ERROR, appMaster.forcedState);
  appMaster.stop();

  // verify the final status is FAILED
  verifyFailedStatus((MRAppMasterTest)appMaster, "FAILED");
}
 
Example 10
Source File: AbstractMROldApiSaveTest.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Test(expected = IOException.class)
public void testUpdateWithoutUpsert() throws Exception {
    JobConf conf = createJobConf();
    conf.set(ConfigurationOptions.ES_WRITE_OPERATION, "update");
    conf.set(ConfigurationOptions.ES_MAPPING_ID, "number");
    conf.set(ConfigurationOptions.ES_RESOURCE, resource("mroldapi-updatewoupsert", "data", clusterInfo.getMajorVersion()));

    runJob(conf);
}
 
Example 11
Source File: AbstractMROldApiSaveTest.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateWithId() throws Exception {
    JobConf conf = createJobConf();
    conf.set(ConfigurationOptions.ES_WRITE_OPERATION, "create");
    conf.set(ConfigurationOptions.ES_MAPPING_ID, "number");
    conf.set(ConfigurationOptions.ES_RESOURCE, resource("mroldapi-createwithid", "data", clusterInfo.getMajorVersion()));

    runJob(conf);
}
 
Example 12
Source File: LinkRank.java    From anthelion with Apache License 2.0 5 votes vote down vote up
/**
 * Runs the link analysis job. The link analysis job applies the link rank
 * formula to create a score per url and stores that score in the NodeDb.
 * 
 * Typically the link analysis job is run a number of times to allow the link
 * rank scores to converge.
 * 
 * @param nodeDb The node database from which we are getting previous link
 * rank scores.
 * @param inverted The inverted inlinks
 * @param output The link analysis output.
 * @param iteration The current iteration number.
 * @param numIterations The total number of link analysis iterations
 * 
 * @throws IOException If an error occurs during link analysis.
 */
private void runAnalysis(Path nodeDb, Path inverted, Path output,
  int iteration, int numIterations, float rankOne)
  throws IOException {

  JobConf analyzer = new NutchJob(getConf());
  analyzer.set("link.analyze.iteration", String.valueOf(iteration + 1));
  analyzer.setJobName("LinkAnalysis Analyzer, iteration " + (iteration + 1)
    + " of " + numIterations);
  FileInputFormat.addInputPath(analyzer, nodeDb);
  FileInputFormat.addInputPath(analyzer, inverted);
  FileOutputFormat.setOutputPath(analyzer, output);
  analyzer.set("link.analyze.rank.one", String.valueOf(rankOne));
  analyzer.setMapOutputKeyClass(Text.class);
  analyzer.setMapOutputValueClass(ObjectWritable.class);
  analyzer.setInputFormat(SequenceFileInputFormat.class);
  analyzer.setMapperClass(Analyzer.class);
  analyzer.setReducerClass(Analyzer.class);
  analyzer.setOutputKeyClass(Text.class);
  analyzer.setOutputValueClass(Node.class);
  analyzer.setOutputFormat(MapFileOutputFormat.class);
  analyzer.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

  LOG.info("Starting analysis job");
  try {
    JobClient.runJob(analyzer);
  }
  catch (IOException e) {
    LOG.error(StringUtils.stringifyException(e));
    throw e;
  }
  LOG.info("Finished analysis job.");
}
 
Example 13
Source File: SparkUtil.java    From spork with Apache License 2.0 5 votes vote down vote up
public static JobConf newJobConf(PigContext pigContext) throws IOException {
    JobConf jobConf = new JobConf(
            ConfigurationUtil.toConfiguration(pigContext.getProperties()));
    jobConf.set("pig.pigContext", ObjectSerializer.serialize(pigContext));
    UDFContext.getUDFContext().serialize(jobConf);
    jobConf.set("udf.import.list",
            ObjectSerializer.serialize(PigContext.getPackageImportList()));
    return jobConf;
}
 
Example 14
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java    From hadoopoffice with Apache License 2.0 4 votes vote down vote up
@Test
public void readExcelInputFormatExcel2013MultiSheetAllLowFootPrint() throws IOException {
	JobConf job = new JobConf(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName = "excel2013testmultisheet.xlsx";
	String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
	Path file = new Path(fileNameSpreadSheet);
	FileInputFormat.setInputPaths(job, file);
	// set locale to the one of the test data
	job.set("hadoopoffice.read.locale.bcp47", "de");
	// low footprint
	job.set("hadoopoffice.read.lowFootprint", "true");

	job.set("hadoopoffice.read.lowFootprint.parser", "sax");
	ExcelFileInputFormat format = new ExcelFileInputFormat();
	format.configure(job);
	InputSplit[] inputSplits = format.getSplits(job, 1);
	assertEquals(1, inputSplits.length, "Only one split generated for Excel file");
	RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
	assertNotNull(reader, "Format returned  null RecordReader");
	Text spreadSheetKey = new Text();
	ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
	assertTrue(reader.next(spreadSheetKey, spreadSheetValue),
			"Input Split for Excel file contains row 1 (first sheet)");
	assertEquals("[excel2013testmultisheet.xlsx]Sheet1!A1", spreadSheetKey.toString(),
			"Input Split for Excel file has keyname == \"[excel2013testmultisheet.xlsx]Sheet1!A1\"");
	assertEquals(4, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 4 columns");
	assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
			"Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
	assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(),
			"Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\"");
	assertEquals("A1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(),
			"Input Split for Excel file contains row 1 with cell 1 address == \"A1\"");
	assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
			"Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
	assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
			"Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
	assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[3]).getFormattedValue(),
			"Input Split for Excel file contains row 1 with cell 4 == \"test4\"");
	assertTrue(reader.next(spreadSheetKey, spreadSheetValue),
			"Input Split for Excel file contains row 2 (first sheet)");
	assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 2 with 1 column");
	assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
			"Input Split for Excel file contains row 2 with cell 1 == \"4\"");
	assertTrue(reader.next(spreadSheetKey, spreadSheetValue),
			"Input Split for Excel file contains row 3 (first sheet)");
	assertEquals(5, spreadSheetValue.get().length, "Input Split for Excel file contains row 3 with 5 columns");
	assertEquals("31/12/99", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
			"Input Split for Excel file contains row 3 with cell 1 == \"31/12/99\"");
	assertEquals("5", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
			"Input Split for Excel file contains row 3 with cell 2 == \"5\"");
	assertNull(spreadSheetValue.get()[2], "Input Split for Excel file contains row 3 with cell 3 == null");
	assertNull(spreadSheetValue.get()[3], "Input Split for Excel file contains row 3 with cell 4 == null");
	assertEquals("null", ((SpreadSheetCellDAO) spreadSheetValue.get()[4]).getFormattedValue(),
			"Input Split for Excel file contains row 3 with cell 5 == \"null\"");
	assertTrue(reader.next(spreadSheetKey, spreadSheetValue),
			"Input Split for Excel file contains row 4 (first sheet)");
	assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 4 with 1 column");
	assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
			"Input Split for Excel file contains row 4 with cell 1 == \"1\"");
	assertTrue(reader.next(spreadSheetKey, spreadSheetValue),
			"Input Split for Excel file contains row 5 (first sheet)");
	assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 5 with 3 columns");
	assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
			"Input Split for Excel file contains row 5 with cell 1 == \"2\"");
	assertEquals("6", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
			"Input Split for Excel file contains row 5 with cell 2== \"6\"");
	assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
			"Input Split for Excel file contains row 5 with cell 3== \"10\"");
	assertTrue(reader.next(spreadSheetKey, spreadSheetValue),
			"Input Split for Excel file contains row 6 (first sheet)");
	assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 6 with 3 columns");
	assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
			"Input Split for Excel file contains row 6 with cell 1 == \"3\"");
	assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
			"Input Split for Excel file contains row 6 with cell 2== \"4\"");
	assertEquals("15", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
			"Input Split for Excel file contains row 6 with cell 3== \"15\"");
	assertTrue(reader.next(spreadSheetKey, spreadSheetValue),
			"Input Split for Excel file contains row 7 (second sheet)");
	assertEquals("8", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
			"Input Split for Excel file contains row 7 with cell 1 == \"8\"");
	assertEquals("99", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
			"Input Split for Excel file contains row 7 with cell 2 == \"99\"");
	assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 7 with 2 columns");
	assertTrue(reader.next(spreadSheetKey, spreadSheetValue),
			"Input Split for Excel file contains row 8 (second sheet)");
	assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 8 with 1 column");
	assertEquals("test", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
			"Input Split for Excel file contains row 8 with cell 1 == \"test\"");
	assertTrue(reader.next(spreadSheetKey, spreadSheetValue),
			"Input Split for Excel file contains row 9 (second sheet)");
	assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 9 with 3 columns");
	assertNull(spreadSheetValue.get()[0], "Input Split for Excel file contains row 9 with cell 1 == null");
	assertNull(spreadSheetValue.get()[1], "Input Split for Excel file contains row 9 with cell 2 == null");
	assertEquals("seven", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
			"Input Split for Excel file contains row 9 with cell 3 == \"seven\"");
}
 
Example 15
Source File: TestTaskAttempt.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testAppDiognosticEventOnNewTask() throws Exception {
  ApplicationId appId = ApplicationId.newInstance(1, 2);
  ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(
      appId, 0);
  JobId jobId = MRBuilderUtils.newJobId(appId, 1);
  TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP);
  TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0);
  Path jobFile = mock(Path.class);

  MockEventHandler eventHandler = new MockEventHandler();
  TaskAttemptListener taListener = mock(TaskAttemptListener.class);
  when(taListener.getAddress()).thenReturn(
      new InetSocketAddress("localhost", 0));

  JobConf jobConf = new JobConf();
  jobConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
  jobConf.setBoolean("fs.file.impl.disable.cache", true);
  jobConf.set(JobConf.MAPRED_MAP_TASK_ENV, "");
  jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10");

  TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class);
  when(splits.getLocations()).thenReturn(new String[] { "127.0.0.1" });

  AppContext appCtx = mock(AppContext.class);
  ClusterInfo clusterInfo = mock(ClusterInfo.class);
  Resource resource = mock(Resource.class);
  when(appCtx.getClusterInfo()).thenReturn(clusterInfo);
  when(resource.getMemory()).thenReturn(1024);

  TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler,
      jobFile, 1, splits, jobConf, taListener,
      new Token(), new Credentials(), new SystemClock(), appCtx);

  NodeId nid = NodeId.newInstance("127.0.0.1", 0);
  ContainerId contId = ContainerId.newContainerId(appAttemptId, 3);
  Container container = mock(Container.class);
  when(container.getId()).thenReturn(contId);
  when(container.getNodeId()).thenReturn(nid);
  when(container.getNodeHttpAddress()).thenReturn("localhost:0");
  taImpl.handle(new TaskAttemptDiagnosticsUpdateEvent(attemptId,
      "Task got killed"));
  assertFalse(
      "InternalError occurred trying to handle TA_DIAGNOSTICS_UPDATE on assigned task",
      eventHandler.internalError);
}
 
Example 16
Source File: AbstractExtraMRTests.java    From elasticsearch-hadoop with Apache License 2.0 4 votes vote down vote up
private void runJob(JobConf conf) throws Exception {
    String string = conf.get(ConfigurationOptions.ES_RESOURCE);
    string = indexPrefix + (string.startsWith("/") ? string.substring(1) : string);
    conf.set(ConfigurationOptions.ES_RESOURCE, string);
    JobClient.runJob(conf);
}
 
Example 17
Source File: CloudBurst.java    From emr-sample-apps with Apache License 2.0 4 votes vote down vote up
public static RunningJob alignall(String refpath, 
		                          String qrypath,
		                          String outpath,
		                          int MIN_READ_LEN,
		                          int MAX_READ_LEN,
		                          int K,
		                          int ALLOW_DIFFERENCES,
		                          boolean FILTER_ALIGNMENTS,
		                          int NUM_MAP_TASKS,
		                          int NUM_REDUCE_TASKS,
		                          int BLOCK_SIZE,
		                          int REDUNDANCY) throws IOException, Exception
{
	int SEED_LEN   = MIN_READ_LEN / (K+1);
	int FLANK_LEN  = MAX_READ_LEN-SEED_LEN+K;
	
	System.out.println("refath: "            + refpath);
	System.out.println("qrypath: "           + qrypath);
	System.out.println("outpath: "           + outpath);
	System.out.println("MIN_READ_LEN: "      + MIN_READ_LEN);
	System.out.println("MAX_READ_LEN: "      + MAX_READ_LEN);
	System.out.println("K: "                 + K);
	System.out.println("SEED_LEN: "          + SEED_LEN);
	System.out.println("FLANK_LEN: "         + FLANK_LEN);
	System.out.println("ALLOW_DIFFERENCES: " + ALLOW_DIFFERENCES);
	System.out.println("FILTER_ALIGNMENTS: " + FILTER_ALIGNMENTS);
	System.out.println("NUM_MAP_TASKS: "     + NUM_MAP_TASKS);
	System.out.println("NUM_REDUCE_TASKS: "  + NUM_REDUCE_TASKS);
	System.out.println("BLOCK_SIZE: "        + BLOCK_SIZE);
	System.out.println("REDUNDANCY: "        + REDUNDANCY);
	
	JobConf conf = new JobConf(MerReduce.class);
	conf.setJobName("CloudBurst");
	conf.setNumMapTasks(NUM_MAP_TASKS);
	conf.setNumReduceTasks(NUM_REDUCE_TASKS);
	
	FileInputFormat.addInputPath(conf, new Path(refpath));
	FileInputFormat.addInputPath(conf, new Path(qrypath));

	conf.set("refpath",           refpath);
	conf.set("qrypath",           qrypath);
	conf.set("MIN_READ_LEN",      Integer.toString(MIN_READ_LEN));
	conf.set("MAX_READ_LEN",      Integer.toString(MAX_READ_LEN));
	conf.set("K",                 Integer.toString(K));
	conf.set("SEED_LEN",          Integer.toString(SEED_LEN));
	conf.set("FLANK_LEN",         Integer.toString(FLANK_LEN));
	conf.set("ALLOW_DIFFERENCES", Integer.toString(ALLOW_DIFFERENCES));
	conf.set("BLOCK_SIZE",        Integer.toString(BLOCK_SIZE));
	conf.set("REDUNDANCY",        Integer.toString(REDUNDANCY));
	conf.set("FILTER_ALIGNMENTS", (FILTER_ALIGNMENTS ? "1" : "0"));
	
	conf.setMapperClass(MapClass.class);
	
	conf.setInputFormat(SequenceFileInputFormat.class);			
	conf.setMapOutputKeyClass(BytesWritable.class);
	conf.setMapOutputValueClass(BytesWritable.class);
	
	conf.setReducerClass(ReduceClass.class);		
	conf.setOutputKeyClass(IntWritable.class);
	conf.setOutputValueClass(BytesWritable.class);
	conf.setOutputFormat(SequenceFileOutputFormat.class);

	Path oPath = new Path(outpath);
	FileOutputFormat.setOutputPath(conf, oPath);
	System.err.println("  Removing old results");
	FileSystem.get(conf).delete(oPath);
	
	RunningJob rj = JobClient.runJob(conf);
	System.err.println("CloudBurst Finished");
	return rj;
}
 
Example 18
Source File: TestPipeApplication.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * test org.apache.hadoop.mapred.pipes.Application
 * test a internal functions: MessageType.REGISTER_COUNTER,  INCREMENT_COUNTER, STATUS, PROGRESS...
 *
 * @throws Throwable
 */

@Test
public void testApplication() throws Throwable {
  JobConf conf = new JobConf();

  RecordReader<FloatWritable, NullWritable> rReader = new Reader();

  // client for test
  File fCommand = getFileCommand("org.apache.hadoop.mapred.pipes.PipeApplicationStub");

  TestTaskReporter reporter = new TestTaskReporter();

  File[] psw = cleanTokenPasswordFile();
  try {

    conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskName);
    conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());

    // token for authorization
    Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>(
            "user".getBytes(), "password".getBytes(), new Text("kind"), new Text(
            "service"));

    TokenCache.setJobToken(token, conf.getCredentials());
    FakeCollector output = new FakeCollector(new Counters.Counter(),
            new Progress());
    FileSystem fs = new RawLocalFileSystem();
    fs.setConf(conf);
    Writer<IntWritable, Text> wr = new Writer<IntWritable, Text>(conf, fs.create(
            new Path(workSpace.getAbsolutePath() + File.separator + "outfile")),
            IntWritable.class, Text.class, null, null, true);
    output.setWriter(wr);
    conf.set(Submitter.PRESERVE_COMMANDFILE, "true");

    initStdOut(conf);

    Application<WritableComparable<IntWritable>, Writable, IntWritable, Text> application = new Application<WritableComparable<IntWritable>, Writable, IntWritable, Text>(
            conf, rReader, output, reporter, IntWritable.class, Text.class);
    application.getDownlink().flush();

    application.getDownlink().mapItem(new IntWritable(3), new Text("txt"));

    application.getDownlink().flush();

    application.waitForFinish();

    wr.close();

    // test getDownlink().mapItem();
    String stdOut = readStdOut(conf);
    assertTrue(stdOut.contains("key:3"));
    assertTrue(stdOut.contains("value:txt"));

    // reporter test counter, and status should be sended
    // test MessageType.REGISTER_COUNTER and INCREMENT_COUNTER
    assertEquals(1.0, reporter.getProgress(), 0.01);
    assertNotNull(reporter.getCounter("group", "name"));
    // test status MessageType.STATUS
    assertEquals(reporter.getStatus(), "PROGRESS");
    stdOut = readFile(new File(workSpace.getAbsolutePath() + File.separator
            + "outfile"));
    // check MessageType.PROGRESS
    assertEquals(0.55f, rReader.getProgress(), 0.001);
    application.getDownlink().close();
    // test MessageType.OUTPUT
    Entry<IntWritable, Text> entry = output.getCollect().entrySet()
            .iterator().next();
    assertEquals(123, entry.getKey().get());
    assertEquals("value", entry.getValue().toString());
    try {
      // try to abort
      application.abort(new Throwable());
      fail();
    } catch (IOException e) {
      // abort works ?
      assertEquals("pipe child exception", e.getMessage());
    }
  } finally {
    if (psw != null) {
      // remove password files
      for (File file : psw) {
        file.deleteOnExit();
      }
    }
  }
}
 
Example 19
Source File: TestPipesNonJavaInputFormat.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 *  test PipesNonJavaInputFormat
  */

@Test
public void testFormat() throws IOException {

  PipesNonJavaInputFormat inputFormat = new PipesNonJavaInputFormat();
  JobConf conf = new JobConf();

  Reporter reporter= mock(Reporter.class);
  RecordReader<FloatWritable, NullWritable> reader = inputFormat
      .getRecordReader(new FakeSplit(), conf, reporter);
  assertEquals(0.0f, reader.getProgress(), 0.001);

  // input and output files
  File input1 = new File(workSpace + File.separator + "input1");
  if (!input1.getParentFile().exists()) {
    Assert.assertTrue(input1.getParentFile().mkdirs());
  }

  if (!input1.exists()) {
    Assert.assertTrue(input1.createNewFile());
  }

  File input2 = new File(workSpace + File.separator + "input2");
  if (!input2.exists()) {
    Assert.assertTrue(input2.createNewFile());
  }
  // set data for splits
  conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
      StringUtils.escapeString(input1.getAbsolutePath()) + ","
          + StringUtils.escapeString(input2.getAbsolutePath()));
  InputSplit[] splits = inputFormat.getSplits(conf, 2);
  assertEquals(2, splits.length);

  PipesNonJavaInputFormat.PipesDummyRecordReader dummyRecordReader = new PipesNonJavaInputFormat.PipesDummyRecordReader(
      conf, splits[0]);
  // empty dummyRecordReader
  assertNull(dummyRecordReader.createKey());
  assertNull(dummyRecordReader.createValue());
  assertEquals(0, dummyRecordReader.getPos());
  assertEquals(0.0, dummyRecordReader.getProgress(), 0.001);
   // test method next
  assertTrue(dummyRecordReader.next(new FloatWritable(2.0f), NullWritable.get()));
  assertEquals(2.0, dummyRecordReader.getProgress(), 0.001);
  dummyRecordReader.close();
}
 
Example 20
Source File: Submitter.java    From big-c with Apache License 2.0 2 votes vote down vote up
/**
 * Set the configuration, if it doesn't already have a value for the given
 * key.
 * @param conf the configuration to modify
 * @param key the key to set
 * @param value the new "default" value to set
 */
private static void setIfUnset(JobConf conf, String key, String value) {
  if (conf.get(key) == null) {
    conf.set(key, value);
  }
}