org.apache.hadoop.mapred.TaskAttemptID Java Examples

The following examples show how to use org.apache.hadoop.mapred.TaskAttemptID. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopCfgUtils.java    From elasticsearch-hadoop with Apache License 2.0 6 votes vote down vote up
public static TaskID getTaskID(Configuration cfg) {
    // first try with the attempt since some Hadoop versions mix the two
    String taskAttemptId = HadoopCfgUtils.getTaskAttemptId(cfg);
    if (StringUtils.hasText(taskAttemptId)) {
        try {
            return TaskAttemptID.forName(taskAttemptId).getTaskID();
        } catch (IllegalArgumentException ex) {
            // the task attempt is invalid (Tez in particular uses the wrong string - see #346)
            // try to fallback to task id
            return parseTaskIdFromTaskAttemptId(taskAttemptId);
        }
    }
    String taskIdProp = HadoopCfgUtils.getTaskId(cfg);
    // double-check task id bug in Hadoop 2.5.x
    if (StringUtils.hasText(taskIdProp) && !taskIdProp.contains("attempt")) {
        return TaskID.forName(taskIdProp);
    }
    return null;
}
 
Example #2
Source File: ContentIndexingColumnBasedHandlerTest.java    From datawave with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
    
    conf = new Configuration();
    conf.addResource("config/all-config.xml");
    ctx = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    ctx.getConfiguration().setInt(ContentIndexingColumnBasedHandler.NUM_SHARDS, 131);
    ctx.getConfiguration().set(ContentIndexingColumnBasedHandler.SHARD_TNAME, "shard");
    ctx.getConfiguration().set(ContentIndexingColumnBasedHandler.SHARD_GIDX_TNAME, "shardIndex");
    ctx.getConfiguration().set(ContentIndexingColumnBasedHandler.SHARD_GRIDX_TNAME, "shardIndex");
    ctx.getConfiguration().set(TypeRegistry.INGEST_DATA_TYPES, "test");
    ctx.getConfiguration().set("data.name", "test");
    ctx.getConfiguration().set("test.data.auth.id.mode", "NEVER");
    ctx.getConfiguration().set("test" + BaseIngestHelper.DEFAULT_TYPE, LcNoDiacriticsType.class.getName());
    ctx.getConfiguration().set("test" + TypeRegistry.HANDLER_CLASSES, TestContentIndexingColumnBasedHandler.class.getName());
    ctx.getConfiguration().set("test" + TypeRegistry.RAW_READER, TestEventRecordReader.class.getName());
    ctx.getConfiguration().set("test" + TypeRegistry.INGEST_HELPER, TestContentBaseIngestHelper.class.getName());
    ctx.getConfiguration().set(TypeRegistry.EXCLUDED_HANDLER_CLASSES, "FAKE_HANDLER_CLASS"); // it will die if this field is not faked
    
    helper = new TestContentBaseIngestHelper();
    colVis = new ColumnVisibility("");
}
 
Example #3
Source File: SplitBasedHashPartitionerTest.java    From datawave with Apache License 2.0 6 votes vote down vote up
private TaskInputOutputContextImpl getTaskInputOutputContext(final String testFilePath, final Configuration conf) {
    return new TaskInputOutputContextImpl(conf, new TaskAttemptID(), null, null, null) {
        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            throw new UnsupportedOperationException();
        }
        
        @Override
        public Object getCurrentKey() throws IOException, InterruptedException {
            throw new UnsupportedOperationException();
        }
        
        @Override
        public Object getCurrentValue() throws IOException, InterruptedException {
            throw new UnsupportedOperationException();
        }
        
        @Deprecated
        public Path[] getLocalCacheFiles() throws IOException {
            return new Path[] {new Path(testFilePath)};
        }
    };
}
 
Example #4
Source File: HadoopSource.java    From twister2 with Apache License 2.0 6 votes vote down vote up
@Override
public boolean hasNext() {
  if (currentReader != null) {
    try {
      boolean current = currentReader.nextKeyValue();
      while (!current && consumingSplit < assignedSplits.size() - 1) {
        TaskID taskID = new TaskID(context.getId(), context.getIndex(),
            TaskType.MAP, context.getIndex());
        TaskAttemptID taskAttemptID = new TaskAttemptID(taskID, context.getIndex());
        consumingSplit++;
        TaskAttemptContextImpl taskAttemptContext =
            new TaskAttemptContextImpl(jconf, taskAttemptID);
        currentReader = format.createRecordReader(assignedSplits.get(consumingSplit),
            taskAttemptContext);
        currentReader.initialize(assignedSplits.get(consumingSplit), taskAttemptContext);
        current = currentReader.nextKeyValue();
      }
      return current;
    } catch (IOException | InterruptedException e) {
      throw new RuntimeException("Failed to read the next key vale", e);
    }
  }
  return false;
}
 
Example #5
Source File: HadoopV1OutputCollector.java    From ignite with Apache License 2.0 6 votes vote down vote up
/**
 * @param jobConf Job configuration.
 * @param taskCtx Task context.
 * @param directWrite Direct write flag.
 * @param fileName File name.
 * @throws IOException In case of IO exception.
 */
HadoopV1OutputCollector(JobConf jobConf, HadoopTaskContext taskCtx, boolean directWrite,
    @Nullable String fileName, TaskAttemptID attempt) throws IOException {
    this.jobConf = jobConf;
    this.taskCtx = taskCtx;
    this.attempt = attempt;

    if (directWrite) {
        jobConf.set("mapreduce.task.attempt.id", attempt.toString());

        OutputFormat outFormat = jobConf.getOutputFormat();

        writer = outFormat.getRecordWriter(null, jobConf, fileName, Reporter.NULL);
    }
    else
        writer = null;
}
 
Example #6
Source File: HadoopSourceWithMap.java    From twister2 with Apache License 2.0 6 votes vote down vote up
@Override
public boolean hasNext() {
  if (currentReader != null) {
    try {
      boolean current = currentReader.nextKeyValue();
      while (!current && consumingSplit < assignedSplits.size() - 1) {
        TaskID taskID = new TaskID(context.getId(), context.getIndex(),
            TaskType.MAP, context.getIndex());
        TaskAttemptID taskAttemptID = new TaskAttemptID(taskID, context.getIndex());
        consumingSplit++;
        TaskAttemptContextImpl taskAttemptContext =
            new TaskAttemptContextImpl(jconf, taskAttemptID);
        currentReader = format.createRecordReader(assignedSplits.get(consumingSplit),
            taskAttemptContext);
        currentReader.initialize(assignedSplits.get(consumingSplit), taskAttemptContext);
        current = currentReader.nextKeyValue();
      }
      return current;
    } catch (IOException | InterruptedException e) {
      throw new RuntimeException("Failed to read the next key vale", e);
    }
  }
  return false;
}
 
Example #7
Source File: FileOutputCommitterWrapper.java    From stratosphere with Apache License 2.0 6 votes vote down vote up
public void commitTask(JobConf conf, TaskAttemptID taskAttemptID)
	throws IOException {
	Path taskOutputPath = getTempTaskOutputPath(conf, taskAttemptID);
	if (taskOutputPath != null) {
		FileSystem fs = taskOutputPath.getFileSystem(conf);
		if (fs.exists(taskOutputPath)) {
			Path jobOutputPath = taskOutputPath.getParent().getParent();
			// Move the task outputs to their final place
			moveTaskOutputs(conf,taskAttemptID, fs, jobOutputPath, taskOutputPath);
			// Delete the temporary task-specific output directory
			if (!fs.delete(taskOutputPath, true)) {
				LOG.info("Failed to delete the temporary output" +
					" directory of task: " + taskAttemptID + " - " + taskOutputPath);
			}
			LOG.info("Saved output of task '" + taskAttemptID + "' to " +
				jobOutputPath);
		}
	}
}
 
Example #8
Source File: FileOutputCommitterWrapper.java    From stratosphere with Apache License 2.0 6 votes vote down vote up
public boolean needsTaskCommit(JobConf conf, TaskAttemptID taskAttemptID)
	throws IOException {
	try {
		Path taskOutputPath = getTempTaskOutputPath(conf, taskAttemptID);
		if (taskOutputPath != null) {
			// Get the file-system for the task output directory
			FileSystem fs = taskOutputPath.getFileSystem(conf);
			// since task output path is created on demand,
			// if it exists, task needs a commit
			if (fs.exists(taskOutputPath)) {
				return true;
			}
		}
	} catch (IOException  ioe) {
		throw ioe;
	}
	return false;
}
 
Example #9
Source File: FileOutputCommitterWrapper.java    From stratosphere with Apache License 2.0 6 votes vote down vote up
public Path getTempTaskOutputPath(JobConf conf, TaskAttemptID taskAttemptID) {
	Path outputPath = FileOutputFormat.getOutputPath(conf);
	if (outputPath != null) {
		Path p = new Path(outputPath,
			(FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR +
				"_" + taskAttemptID.toString()));
		try {
			FileSystem fs = p.getFileSystem(conf);
			return p.makeQualified(fs);
		} catch (IOException ie) {
			LOG.warn(StringUtils.stringifyException(ie));
			return p;
		}
	}
	return null;
}
 
Example #10
Source File: TestStreamingStatus.java    From big-c with Apache License 2.0 5 votes vote down vote up
void validateTaskStderr(StreamJob job, TaskType type)
    throws IOException {
  TaskAttemptID attemptId =
      new TaskAttemptID(new TaskID(job.jobId_, type, 0), 0);

  String log = MapReduceTestUtil.readTaskLog(TaskLog.LogName.STDERR,
      attemptId, false);

  // trim() is called on expectedStderr here because the method
  // MapReduceTestUtil.readTaskLog() returns trimmed String.
  assertTrue(log.equals(expectedStderr.trim()));
}
 
Example #11
Source File: HadoopOutputFormatBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}
 
Example #12
Source File: TestEventFetcher.java    From big-c with Apache License 2.0 5 votes vote down vote up
private MapTaskCompletionEventsUpdate getMockedCompletionEventsUpdate(
    int startIdx, int numEvents) {
  ArrayList<TaskCompletionEvent> tceList =
      new ArrayList<TaskCompletionEvent>(numEvents);
  for (int i = 0; i < numEvents; ++i) {
    int eventIdx = startIdx + i;
    TaskCompletionEvent tce = new TaskCompletionEvent(eventIdx,
        new TaskAttemptID("12345", 1, TaskType.MAP, eventIdx, 0),
        eventIdx, true, TaskCompletionEvent.Status.SUCCEEDED,
        "http://somehost:8888");
    tceList.add(tce);
  }
  TaskCompletionEvent[] events = {};
  return new MapTaskCompletionEventsUpdate(tceList.toArray(events), false);
}
 
Example #13
Source File: TestShuffleScheduler.java    From big-c with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public void testTipFailed() throws Exception {
  JobConf job = new JobConf();
  job.setNumMapTasks(2);

  TaskStatus status = new TaskStatus() {
    @Override
    public boolean getIsMap() {
      return false;
    }

    @Override
    public void addFetchFailedMap(TaskAttemptID mapTaskId) {
    }
  };
  Progress progress = new Progress();

  TaskAttemptID reduceId = new TaskAttemptID("314159", 0, TaskType.REDUCE,
      0, 0);
  ShuffleSchedulerImpl scheduler = new ShuffleSchedulerImpl(job, status,
      reduceId, null, progress, null, null, null);

  JobID jobId = new JobID();
  TaskID taskId1 = new TaskID(jobId, TaskType.REDUCE, 1);
  scheduler.tipFailed(taskId1);

  Assert.assertEquals("Progress should be 0.5", 0.5f, progress.getProgress(),
      0.0f);
  Assert.assertFalse(scheduler.waitUntilDone(1));

  TaskID taskId0 = new TaskID(jobId, TaskType.REDUCE, 0);
  scheduler.tipFailed(taskId0);
  Assert.assertEquals("Progress should be 1.0", 1.0f, progress.getProgress(),
      0.0f);
  Assert.assertTrue(scheduler.waitUntilDone(1));
}
 
Example #14
Source File: HadoopOutputFormatBase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}
 
Example #15
Source File: HadoopSource.java    From twister2 with Apache License 2.0 5 votes vote down vote up
@Override
public void prepare(TSetContext ctx) {
  this.context = ctx;
  Configuration hadoopConf = this.wrappedConfiguration.getConfiguration();
  jconf = new JobConf(hadoopConf);
  try {
    format = inputClazz.newInstance();
    JobContext jobContext = new JobContextImpl(hadoopConf, new JobID(context.getId(),
        context.getIndex()));
    List<InputSplit> splits = format.getSplits(jobContext);

    for (int i = 0; i < splits.size(); i++) {
      if (i % context.getParallelism() == context.getIndex()) {
        assignedSplits.add(splits.get(i));
      }
    }

    if (assignedSplits.size() > 0) {
      TaskID taskID = new TaskID(context.getId(), context.getIndex(),
          TaskType.MAP, context.getIndex());
      TaskAttemptID taskAttemptID = new TaskAttemptID(taskID, context.getIndex());
      TaskAttemptContextImpl taskAttemptContext =
          new TaskAttemptContextImpl(jconf, taskAttemptID);
      currentReader = format.createRecordReader(assignedSplits.get(consumingSplit),
          taskAttemptContext);
      currentReader.initialize(assignedSplits.get(consumingSplit), taskAttemptContext);
    }
  } catch (InstantiationException | IllegalAccessException
      | InterruptedException | IOException e) {
    throw new RuntimeException("Failed to initialize hadoop input", e);
  }
}
 
Example #16
Source File: HadoopSourceWithMap.java    From twister2 with Apache License 2.0 5 votes vote down vote up
@Override
public void prepare(TSetContext ctx) {
  this.context = ctx;
  Configuration hadoopConf = this.wrappedConfiguration.getConfiguration();
  jconf = new JobConf(hadoopConf);
  try {
    format = inputClazz.newInstance();
    JobContext jobContext = new JobContextImpl(hadoopConf, new JobID(context.getId(),
        context.getIndex()));
    List<InputSplit> splits = format.getSplits(jobContext);

    for (int i = 0; i < splits.size(); i++) {
      if (i % context.getParallelism() == context.getIndex()) {
        assignedSplits.add(splits.get(i));
      }
    }

    if (assignedSplits.size() > 0) {
      TaskID taskID = new TaskID(context.getId(), context.getIndex(),
          TaskType.MAP, context.getIndex());
      TaskAttemptID taskAttemptID = new TaskAttemptID(taskID, context.getIndex());
      TaskAttemptContextImpl taskAttemptContext =
          new TaskAttemptContextImpl(jconf, taskAttemptID);
      currentReader = format.createRecordReader(assignedSplits.get(consumingSplit),
          taskAttemptContext);
      currentReader.initialize(assignedSplits.get(consumingSplit), taskAttemptContext);
    }
  } catch (InstantiationException | IllegalAccessException
      | InterruptedException | IOException e) {
    throw new RuntimeException("Failed to initialize hadoop input", e);
  }
}
 
Example #17
Source File: TestPipeApplication.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * clean previous std error and outs
 */

private void initStdOut(JobConf configuration) {
  TaskAttemptID taskId = TaskAttemptID.forName(configuration
          .get(MRJobConfig.TASK_ATTEMPT_ID));
  File stdOut = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDOUT);
  File stdErr = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDERR);
  // prepare folder
  if (!stdOut.getParentFile().exists()) {
    stdOut.getParentFile().mkdirs();
  } else { // clean logs
    stdOut.deleteOnExit();
    stdErr.deleteOnExit();
  }
}
 
Example #18
Source File: TestPipeApplication.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private String readStdOut(JobConf conf) throws Exception {
  TaskAttemptID taskId = TaskAttemptID.forName(conf
          .get(MRJobConfig.TASK_ATTEMPT_ID));
  File stdOut = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDOUT);

  return readFile(stdOut);

}
 
Example #19
Source File: TestEventFetcher.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private MapTaskCompletionEventsUpdate getMockedCompletionEventsUpdate(
    int startIdx, int numEvents) {
  ArrayList<TaskCompletionEvent> tceList =
      new ArrayList<TaskCompletionEvent>(numEvents);
  for (int i = 0; i < numEvents; ++i) {
    int eventIdx = startIdx + i;
    TaskCompletionEvent tce = new TaskCompletionEvent(eventIdx,
        new TaskAttemptID("12345", 1, TaskType.MAP, eventIdx, 0),
        eventIdx, true, TaskCompletionEvent.Status.SUCCEEDED,
        "http://somehost:8888");
    tceList.add(tce);
  }
  TaskCompletionEvent[] events = {};
  return new MapTaskCompletionEventsUpdate(tceList.toArray(events), false);
}
 
Example #20
Source File: TestShuffleScheduler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public void testTipFailed() throws Exception {
  JobConf job = new JobConf();
  job.setNumMapTasks(2);

  TaskStatus status = new TaskStatus() {
    @Override
    public boolean getIsMap() {
      return false;
    }

    @Override
    public void addFetchFailedMap(TaskAttemptID mapTaskId) {
    }
  };
  Progress progress = new Progress();

  TaskAttemptID reduceId = new TaskAttemptID("314159", 0, TaskType.REDUCE,
      0, 0);
  ShuffleSchedulerImpl scheduler = new ShuffleSchedulerImpl(job, status,
      reduceId, null, progress, null, null, null);

  JobID jobId = new JobID();
  TaskID taskId1 = new TaskID(jobId, TaskType.REDUCE, 1);
  scheduler.tipFailed(taskId1);

  Assert.assertEquals("Progress should be 0.5", 0.5f, progress.getProgress(),
      0.0f);
  Assert.assertFalse(scheduler.waitUntilDone(1));

  TaskID taskId0 = new TaskID(jobId, TaskType.REDUCE, 0);
  scheduler.tipFailed(taskId0);
  Assert.assertEquals("Progress should be 1.0", 1.0f, progress.getProgress(),
      0.0f);
  Assert.assertTrue(scheduler.waitUntilDone(1));
}
 
Example #21
Source File: MneMapredChunkDataTest.java    From mnemonic with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public void setUp() throws Exception {
  m_workdir = new Path(
          System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR));
  m_conf = new JobConf();
  m_rand = Utils.createRandom();
  unsafe = Utils.getUnsafe();

  try {
    m_fs = FileSystem.getLocal(m_conf).getRaw();
    m_fs.delete(m_workdir, true);
    m_fs.mkdirs(m_workdir);
  } catch (IOException e) {
    throw new IllegalStateException("bad fs init", e);
  }

  m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
  m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid);

  MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString());
  MneConfigHelper.setBaseOutputName(m_conf, null, "chunk-data");

  MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME);
  MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID);
  MneConfigHelper.setDurableTypes(m_conf,
          MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[]{DurableType.CHUNK});
  MneConfigHelper.setEntityFactoryProxies(m_conf,
          MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[]{});
  MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME);
  MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID);
  MneConfigHelper.setMemPoolSize(m_conf,
          MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4);
  MneConfigHelper.setDurableTypes(m_conf,
          MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[]{DurableType.CHUNK});
  MneConfigHelper.setEntityFactoryProxies(m_conf,
          MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[]{});
}
 
Example #22
Source File: MneMapredLongDataTest.java    From mnemonic with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public void setUp() throws IOException {
  m_workdir = new Path(
          System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR));
  m_conf = new JobConf();
  m_rand = Utils.createRandom();

  try {
    m_fs = FileSystem.getLocal(m_conf).getRaw();
    m_fs.delete(m_workdir, true);
    m_fs.mkdirs(m_workdir);
  } catch (IOException e) {
    throw new IllegalStateException("bad fs init", e);
  }

  m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
  m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid);

  MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString());
  MneConfigHelper.setBaseOutputName(m_conf, null, "long-data");

  MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME);
  MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID);
  MneConfigHelper.setDurableTypes(m_conf,
          MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[]{DurableType.LONG});
  MneConfigHelper.setEntityFactoryProxies(m_conf,
          MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[]{});
  MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME);
  MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID);
  MneConfigHelper.setMemPoolSize(m_conf,
          MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 2);
  MneConfigHelper.setDurableTypes(m_conf,
          MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[]{DurableType.LONG});
  MneConfigHelper.setEntityFactoryProxies(m_conf,
          MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[]{});
}
 
Example #23
Source File: MneMapredPersonDataTest.java    From mnemonic with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public void setUp() throws IOException {
  m_workdir = new Path(
          System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR));
  m_conf = new JobConf();
  m_rand = Utils.createRandom();

  try {
    m_fs = FileSystem.getLocal(m_conf).getRaw();
    m_fs.delete(m_workdir, true);
    m_fs.mkdirs(m_workdir);
  } catch (IOException e) {
    throw new IllegalStateException("bad fs init", e);
  }

  m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
  m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid);

  MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString());
  MneConfigHelper.setBaseOutputName(m_conf, null, "person-data");

  MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME);
  MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID);
  MneConfigHelper.setDurableTypes(m_conf,
          MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[]{DurableType.DURABLE});
  MneConfigHelper.setEntityFactoryProxies(m_conf,
          MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[]{PersonListEFProxy.class});
  MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME);
  MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID);
  MneConfigHelper.setMemPoolSize(m_conf,
          MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4);
  MneConfigHelper.setDurableTypes(m_conf,
          MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[]{DurableType.DURABLE});
  MneConfigHelper.setEntityFactoryProxies(m_conf,
          MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[]{PersonListEFProxy.class});
}
 
Example #24
Source File: HadoopOutputFormatBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}
 
Example #25
Source File: TestVCFOutputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Before
public void setup() throws IOException, NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException {
    test_vcf_output = File.createTempFile("test_vcf_output", "");
    test_vcf_output.delete();
    writable = new VariantContextWritable();
    Configuration conf = new Configuration();
    conf.set("hadoopbam.vcf.output-format", "VCF");
    KeyIgnoringVCFOutputFormat<Long> outputFormat = new KeyIgnoringVCFOutputFormat<Long>(conf);
    outputFormat.setHeader(readHeader());
    taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
    writer = outputFormat.getRecordWriter(taskAttemptContext, new Path("file://" + test_vcf_output));
}
 
Example #26
Source File: RunningJobProxyV2.java    From pentaho-hadoop-shims with Apache License 2.0 5 votes vote down vote up
/**
 * Retrieve the diagnostic messages for a given task attempt.
 *
 * @param taskAttemptId Identifier of the task
 * @return an array of diagnostic messages for the task attempt with the id provided.
 * @throws java.io.IOException
 */
@Override public String[] getTaskDiagnostics( Object taskAttemptId ) throws IOException {
  TaskAttemptID id = (TaskAttemptID) taskAttemptId;
  try {
    return delegateJob.getTaskDiagnostics( id );
  } catch ( InterruptedException e ) {
    throw new RuntimeException( e );
  }
}
 
Example #27
Source File: HadoopOutputFormatWrapper.java    From stratosphere with Apache License 2.0 5 votes vote down vote up
/**
 * commit the task by moving the output file out from the temporary directory.
 * @throws IOException
 */
@Override
public void close() throws IOException {
	this.recordWriter.close(new DummyHadoopReporter());
	if (this.fileOutputCommitterWrapper.needsTaskCommit(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id")))) {
		this.fileOutputCommitterWrapper.commitTask(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id")));
	}
//TODO: commitjob when all the tasks are finished
}
 
Example #28
Source File: TaskCompletionEventProxyTest.java    From pentaho-hadoop-shims with Apache License 2.0 5 votes vote down vote up
@Test
public void getTaskAttemptId() {
  final TaskAttemptID id = new TaskAttemptID( new TaskID(), 0 );
  org.apache.hadoop.mapred.TaskCompletionEvent delegate = new org.apache.hadoop.mapred.TaskCompletionEvent() {
    public org.apache.hadoop.mapred.TaskAttemptID getTaskAttemptId() {
      return id;
    }
  };
  TaskCompletionEventProxy proxy = new TaskCompletionEventProxy( delegate );

  assertEquals( id, proxy.getTaskAttemptId() );
}
 
Example #29
Source File: HadoopOutputFormatWrapper.java    From stratosphere with Apache License 2.0 5 votes vote down vote up
/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
	this.fileOutputCommitterWrapper.setupJob(this.jobConf);
	if (Integer.toString(taskNumber + 1).length() <= 6) {
		this.jobConf.set("mapred.task.id", "attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s"," ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0");
		//compatible for hadoop 2.2.0, the temporary output directory is different from hadoop 1.2.1
		this.jobConf.set("mapreduce.task.output.dir", this.fileOutputCommitterWrapper.getTempTaskOutputPath(this.jobConf,TaskAttemptID.forName(this.jobConf.get("mapred.task.id"))).toString());
	} else {
		throw new IOException("task id too large");
	}
	this.recordWriter = this.hadoopOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new DummyHadoopProgressable());
}
 
Example #30
Source File: MRInputBase.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public List<Event> initialize() throws IOException {
  getContext().requestInitialMemory(0l, null); // mandatory call
  MRRuntimeProtos.MRInputUserPayloadProto mrUserPayload =
      MRHelpers.parseMRInputPayload(getContext().getUserPayload());
  Preconditions.checkArgument(mrUserPayload.hasSplits() == false,
      "Split information not expected in " + this.getClass().getName());
  Configuration conf = MRHelpers.createConfFromByteString(mrUserPayload.getConfigurationBytes());

  this.jobConf = new JobConf(conf);
  // Add tokens to the jobConf - in case they are accessed within the RR / IF
  jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());

  TaskAttemptID taskAttemptId = new TaskAttemptID(
      new TaskID(
          Long.toString(getContext().getApplicationId().getClusterTimestamp()),
          getContext().getApplicationId().getId(), TaskType.MAP,
          getContext().getTaskIndex()),
      getContext().getTaskAttemptNumber());

  jobConf.set(MRJobConfig.TASK_ATTEMPT_ID,
      taskAttemptId.toString());
  jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID,
      getContext().getDAGAttemptNumber());

  this.inputRecordCounter = getContext().getCounters().findCounter(
      TaskCounter.INPUT_RECORDS_PROCESSED);

  useNewApi = this.jobConf.getUseNewMapper();
  return null;
}