org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: YARNRunner.java    From tez with Apache License 2.0 6 votes vote down vote up
private List<TaskLocationHint> getMapLocationHintsFromInputSplits(JobID jobId,
    FileSystem fs, Configuration conf,
    String jobSubmitDir) throws IOException {
  TaskSplitMetaInfo[] splitsInfo =
      SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, conf,
          new Path(jobSubmitDir));
  int splitsCount = splitsInfo.length;
  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splitsCount);
  for (int i = 0; i < splitsCount; ++i) {
    TaskLocationHint locationHint =
        TaskLocationHint.createTaskLocationHint(
            new HashSet<String>(
                Arrays.asList(splitsInfo[i].getLocations())), null
        );
    locationHints.add(locationHint);
  }
  return locationHints;
}
 
Example #2
Source File: JobImpl.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void createMapTasks(JobImpl job, long inputLength,
                            TaskSplitMetaInfo[] splits) {
  for (int i=0; i < job.numMapTasks; ++i) {
    TaskImpl task =
        new MapTaskImpl(job.jobId, i,
            job.eventHandler, 
            job.remoteJobConfFile, 
            job.conf, splits[i], 
            job.taskAttemptListener, 
            job.jobToken, job.jobCredentials,
            job.clock,
            job.applicationAttemptId.getAttemptId(),
            job.metrics, job.appContext);
    job.addTask(task);
  }
  LOG.info("Input size for job " + job.jobId + " = " + inputLength
      + ". Number of splits = " + splits.length);
}
 
Example #3
Source File: YARNRunner.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
private List<TaskLocationHint> getMapLocationHintsFromInputSplits(JobID jobId,
    FileSystem fs, Configuration conf,
    String jobSubmitDir) throws IOException {
  TaskSplitMetaInfo[] splitsInfo =
      SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, conf,
          new Path(jobSubmitDir));
  int splitsCount = splitsInfo.length;
  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splitsCount);
  for (int i = 0; i < splitsCount; ++i) {
    TaskLocationHint locationHint =
        new TaskLocationHint(
            new HashSet<String>(
                Arrays.asList(splitsInfo[i].getLocations())), null);
    locationHints.add(locationHint);
  }
  return locationHints;
}
 
Example #4
Source File: TestMRHelpers.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
private void verifyLocationHints(Path inputSplitsDir,
    List<TaskLocationHint> actual) throws Exception {
  JobID jobId = new JobID("dummy", 1);
  TaskSplitMetaInfo[] splitsInfo =
      SplitMetaInfoReader.readSplitMetaInfo(jobId , remoteFs,
          conf, inputSplitsDir);
  int splitsCount = splitsInfo.length;
  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splitsCount);
  for (int i = 0; i < splitsCount; ++i) {
    locationHints.add(
        new TaskLocationHint(new HashSet<String>(
            Arrays.asList(splitsInfo[i].getLocations())), null));
  }

  Assert.assertEquals(locationHints, actual);
}
 
Example #5
Source File: JobImpl.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void createMapTasks(JobImpl job, long inputLength,
                            TaskSplitMetaInfo[] splits) {
  for (int i=0; i < job.numMapTasks; ++i) {
    TaskImpl task =
        new MapTaskImpl(job.jobId, i,
            job.eventHandler, 
            job.remoteJobConfFile, 
            job.conf, splits[i], 
            job.taskAttemptListener, 
            job.jobToken, job.jobCredentials,
            job.clock,
            job.applicationAttemptId.getAttemptId(),
            job.metrics, job.appContext);
    job.addTask(task);
  }
  LOG.info("Input size for job " + job.jobId + " = " + inputLength
      + ". Number of splits = " + splits.length);
}
 
Example #6
Source File: SplitMetaInfoReaderTez.java    From tez with Apache License 2.0 6 votes vote down vote up
public static TaskSplitMetaInfo[] readSplitMetaInfo(Configuration conf,
    FileSystem fs) throws IOException {
  FSDataInputStream in = null;
  try {
    in = getFSDataIS(conf, fs);
    final String jobSplitFile = MRJobConfig.JOB_SPLIT;
    final String basePath = conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, ".");
    int numSplits = WritableUtils.readVInt(in); // TODO: check for insane values
    JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo = new JobSplit.TaskSplitMetaInfo[numSplits];
    for (int i = 0; i < numSplits; i++) {
      JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
      splitMetaInfo.readFields(in);
      JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(
          new Path(basePath, jobSplitFile)
              .toUri().toString(), splitMetaInfo.getStartOffset());
      allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex,
          splitMetaInfo.getLocations(), splitMetaInfo.getInputDataLength());
    }
    return allSplitMetaInfo;
  } finally {
    if (in != null) {
      in.close();
    }
  }
}
 
Example #7
Source File: JobImpl.java    From hadoop with Apache License 2.0 5 votes vote down vote up
protected TaskSplitMetaInfo[] createSplits(JobImpl job, JobId jobId) {
  TaskSplitMetaInfo[] allTaskSplitMetaInfo;
  try {
    allTaskSplitMetaInfo = SplitMetaInfoReader.readSplitMetaInfo(
        job.oldJobId, job.fs, 
        job.conf, 
        job.remoteJobSubmitDir);
  } catch (IOException e) {
    throw new YarnRuntimeException(e);
  }
  return allTaskSplitMetaInfo;
}
 
Example #8
Source File: MapTaskImpl.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public MapTaskImpl(JobId jobId, int partition, EventHandler eventHandler,
    Path remoteJobConfFile, JobConf conf,
    TaskSplitMetaInfo taskSplitMetaInfo,
    TaskAttemptListener taskAttemptListener,
    Token<JobTokenIdentifier> jobToken,
    Credentials credentials, Clock clock,
    int appAttemptId, MRAppMetrics metrics, AppContext appContext) {
  super(jobId, TaskType.MAP, partition, eventHandler, remoteJobConfFile,
      conf, taskAttemptListener, jobToken, credentials, clock,
      appAttemptId, metrics, appContext);
  this.taskSplitMetaInfo = taskSplitMetaInfo;
}
 
Example #9
Source File: TestTaskAttempt.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testSingleRackRequest() throws Exception {
  TaskAttemptImpl.RequestContainerTransition rct =
      new TaskAttemptImpl.RequestContainerTransition(false);

  EventHandler eventHandler = mock(EventHandler.class);
  String[] hosts = new String[3];
  hosts[0] = "host1";
  hosts[1] = "host2";
  hosts[2] = "host3";
  TaskSplitMetaInfo splitInfo =
      new TaskSplitMetaInfo(hosts, 0, 128 * 1024 * 1024l);

  TaskAttemptImpl mockTaskAttempt =
      createMapTaskAttemptImplForTest(eventHandler, splitInfo);
  TaskAttemptEvent mockTAEvent = mock(TaskAttemptEvent.class);

  rct.transition(mockTaskAttempt, mockTAEvent);

  ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
  verify(eventHandler, times(2)).handle(arg.capture());
  if (!(arg.getAllValues().get(1) instanceof ContainerRequestEvent)) {
    Assert.fail("Second Event not of type ContainerRequestEvent");
  }
  ContainerRequestEvent cre =
      (ContainerRequestEvent) arg.getAllValues().get(1);
  String[] requestedRacks = cre.getRacks();
  //Only a single occurrence of /DefaultRack
  assertEquals(1, requestedRacks.length);
}
 
Example #10
Source File: TestRecovery.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private MapTaskImpl getMockMapTask(long clusterTimestamp, EventHandler eh) {

    ApplicationId appId = ApplicationId.newInstance(clusterTimestamp, 1);
    JobId jobId = MRBuilderUtils.newJobId(appId, 1);

    int partitions = 2;

    Path remoteJobConfFile = mock(Path.class);
    JobConf conf = new JobConf();
    TaskAttemptListener taskAttemptListener = mock(TaskAttemptListener.class);
    Token<JobTokenIdentifier> jobToken =
        (Token<JobTokenIdentifier>) mock(Token.class);
    Credentials credentials = null;
    Clock clock = new SystemClock();
    int appAttemptId = 3;
    MRAppMetrics metrics = mock(MRAppMetrics.class);
    Resource minContainerRequirements = mock(Resource.class);
    when(minContainerRequirements.getMemory()).thenReturn(1000);

    ClusterInfo clusterInfo = mock(ClusterInfo.class);
    AppContext appContext = mock(AppContext.class);
    when(appContext.getClusterInfo()).thenReturn(clusterInfo);

    TaskSplitMetaInfo taskSplitMetaInfo = mock(TaskSplitMetaInfo.class);
    MapTaskImpl mapTask = new MapTaskImpl(jobId, partitions,
        eh, remoteJobConfFile, conf,
        taskSplitMetaInfo, taskAttemptListener, jobToken, credentials, clock,
        appAttemptId, metrics, appContext);
    return mapTask;
  }
 
Example #11
Source File: TestTaskAttempt.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testHostResolveAttempt() throws Exception {
  TaskAttemptImpl.RequestContainerTransition rct =
      new TaskAttemptImpl.RequestContainerTransition(false);

  EventHandler eventHandler = mock(EventHandler.class);
  String[] hosts = new String[3];
  hosts[0] = "192.168.1.1";
  hosts[1] = "host2";
  hosts[2] = "host3";
  TaskSplitMetaInfo splitInfo =
      new TaskSplitMetaInfo(hosts, 0, 128 * 1024 * 1024l);

  TaskAttemptImpl mockTaskAttempt =
      createMapTaskAttemptImplForTest(eventHandler, splitInfo);
  TaskAttemptImpl spyTa = spy(mockTaskAttempt);
  when(spyTa.resolveHost(hosts[0])).thenReturn("host1");
  spyTa.dataLocalHosts = spyTa.resolveHosts(splitInfo.getLocations());

  TaskAttemptEvent mockTAEvent = mock(TaskAttemptEvent.class);
  rct.transition(spyTa, mockTAEvent);
  verify(spyTa).resolveHost(hosts[0]);
  ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
  verify(eventHandler, times(2)).handle(arg.capture());
  if (!(arg.getAllValues().get(1) instanceof ContainerRequestEvent)) {
    Assert.fail("Second Event not of type ContainerRequestEvent");
  }
  Map<String, Boolean> expected = new HashMap<String, Boolean>();
  expected.put("host1", true);
  expected.put("host2", true);
  expected.put("host3", true);
  ContainerRequestEvent cre =
      (ContainerRequestEvent) arg.getAllValues().get(1);
  String[] requestedHosts = cre.getHosts();
  for (String h : requestedHosts) {
    expected.remove(h);
  }
  assertEquals(0, expected.size());
}
 
Example #12
Source File: TestTaskImpl.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Before 
@SuppressWarnings("unchecked")
public void setup() {
   dispatcher = new InlineDispatcher();
  
  ++startCount;
  
  conf = new JobConf();
  taskAttemptListener = mock(TaskAttemptListener.class);
  jobToken = (Token<JobTokenIdentifier>) mock(Token.class);
  remoteJobConfFile = mock(Path.class);
  credentials = null;
  clock = new SystemClock();
  metrics = mock(MRAppMetrics.class);  
  dataLocations = new String[1];
  
  appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);

  jobId = Records.newRecord(JobId.class);
  jobId.setId(1);
  jobId.setAppId(appId);
  appContext = mock(AppContext.class);

  taskSplitMetaInfo = mock(TaskSplitMetaInfo.class);
  when(taskSplitMetaInfo.getLocations()).thenReturn(dataLocations); 
  
  taskAttempts = new ArrayList<MockTaskAttemptImpl>();    
}
 
Example #13
Source File: MRApp.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
protected TaskSplitMetaInfo[] createSplits(JobImpl job, JobId jobId) {
  TaskSplitMetaInfo[] splits = new TaskSplitMetaInfo[maps];
  for (int i = 0; i < maps ; i++) {
    splits[i] = new TaskSplitMetaInfo();
  }
  return splits;
}
 
Example #14
Source File: LocalJobRunner.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public MapTaskRunnable(TaskSplitMetaInfo info, int taskId, JobID jobId,
    Map<TaskAttemptID, MapOutputFile> mapOutputFiles) {
  this.info = info;
  this.taskId = taskId;
  this.mapOutputFiles = mapOutputFiles;
  this.jobId = jobId;
  this.localConf = new JobConf(job);
}
 
Example #15
Source File: LocalJobRunner.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Create Runnables to encapsulate map tasks for use by the executor
 * service.
 * @param taskInfo Info about the map task splits
 * @param jobId the job id
 * @param mapOutputFiles a mapping from task attempts to output files
 * @return a List of Runnables, one per map task.
 */
protected List<RunnableWithThrowable> getMapTaskRunnables(
    TaskSplitMetaInfo [] taskInfo, JobID jobId,
    Map<TaskAttemptID, MapOutputFile> mapOutputFiles) {

  int numTasks = 0;
  ArrayList<RunnableWithThrowable> list =
      new ArrayList<RunnableWithThrowable>();
  for (TaskSplitMetaInfo task : taskInfo) {
    list.add(new MapTaskRunnable(task, numTasks++, jobId,
        mapOutputFiles));
  }

  return list;
}
 
Example #16
Source File: MapTaskAttemptImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
public MapTaskAttemptImpl(TaskId taskId, int attempt, 
    EventHandler eventHandler, Path jobFile, 
    int partition, TaskSplitMetaInfo splitInfo, JobConf conf,
    TaskAttemptListener taskAttemptListener, 
    Token<JobTokenIdentifier> jobToken,
    Credentials credentials, Clock clock,
    AppContext appContext) {
  super(taskId, attempt, eventHandler, 
      taskAttemptListener, jobFile, partition, conf, splitInfo.getLocations(),
      jobToken, credentials, clock, appContext);
  this.splitInfo = splitInfo;
}
 
Example #17
Source File: MapTaskImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
public MapTaskImpl(JobId jobId, int partition, EventHandler eventHandler,
    Path remoteJobConfFile, JobConf conf,
    TaskSplitMetaInfo taskSplitMetaInfo,
    TaskAttemptListener taskAttemptListener,
    Token<JobTokenIdentifier> jobToken,
    Credentials credentials, Clock clock,
    int appAttemptId, MRAppMetrics metrics, AppContext appContext) {
  super(jobId, TaskType.MAP, partition, eventHandler, remoteJobConfFile,
      conf, taskAttemptListener, jobToken, credentials, clock,
      appAttemptId, metrics, appContext);
  this.taskSplitMetaInfo = taskSplitMetaInfo;
}
 
Example #18
Source File: TestTaskAttempt.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private TaskAttemptImpl createMapTaskAttemptImplForTest(
    EventHandler eventHandler, TaskSplitMetaInfo taskSplitMetaInfo, Clock clock) {
  ApplicationId appId = ApplicationId.newInstance(1, 1);
  JobId jobId = MRBuilderUtils.newJobId(appId, 1);
  TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP);
  TaskAttemptListener taListener = mock(TaskAttemptListener.class);
  Path jobFile = mock(Path.class);
  JobConf jobConf = new JobConf();
  TaskAttemptImpl taImpl =
      new MapTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1,
          taskSplitMetaInfo, jobConf, taListener, null,
          null, clock, null);
  return taImpl;
}
 
Example #19
Source File: JobImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
protected TaskSplitMetaInfo[] createSplits(JobImpl job, JobId jobId) {
  TaskSplitMetaInfo[] allTaskSplitMetaInfo;
  try {
    allTaskSplitMetaInfo = SplitMetaInfoReader.readSplitMetaInfo(
        job.oldJobId, job.fs, 
        job.conf, 
        job.remoteJobSubmitDir);
  } catch (IOException e) {
    throw new YarnRuntimeException(e);
  }
  return allTaskSplitMetaInfo;
}
 
Example #20
Source File: TestRecovery.java    From big-c with Apache License 2.0 5 votes vote down vote up
private MapTaskImpl getMockMapTask(long clusterTimestamp, EventHandler eh) {

    ApplicationId appId = ApplicationId.newInstance(clusterTimestamp, 1);
    JobId jobId = MRBuilderUtils.newJobId(appId, 1);

    int partitions = 2;

    Path remoteJobConfFile = mock(Path.class);
    JobConf conf = new JobConf();
    TaskAttemptListener taskAttemptListener = mock(TaskAttemptListener.class);
    Token<JobTokenIdentifier> jobToken =
        (Token<JobTokenIdentifier>) mock(Token.class);
    Credentials credentials = null;
    Clock clock = new SystemClock();
    int appAttemptId = 3;
    MRAppMetrics metrics = mock(MRAppMetrics.class);
    Resource minContainerRequirements = mock(Resource.class);
    when(minContainerRequirements.getMemory()).thenReturn(1000);

    ClusterInfo clusterInfo = mock(ClusterInfo.class);
    AppContext appContext = mock(AppContext.class);
    when(appContext.getClusterInfo()).thenReturn(clusterInfo);

    TaskSplitMetaInfo taskSplitMetaInfo = mock(TaskSplitMetaInfo.class);
    MapTaskImpl mapTask = new MapTaskImpl(jobId, partitions,
        eh, remoteJobConfFile, conf,
        taskSplitMetaInfo, taskAttemptListener, jobToken, credentials, clock,
        appAttemptId, metrics, appContext);
    return mapTask;
  }
 
Example #21
Source File: TestTaskAttempt.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testSingleRackRequest() throws Exception {
  TaskAttemptImpl.RequestContainerTransition rct =
      new TaskAttemptImpl.RequestContainerTransition(false);

  EventHandler eventHandler = mock(EventHandler.class);
  String[] hosts = new String[3];
  hosts[0] = "host1";
  hosts[1] = "host2";
  hosts[2] = "host3";
  TaskSplitMetaInfo splitInfo =
      new TaskSplitMetaInfo(hosts, 0, 128 * 1024 * 1024l);

  TaskAttemptImpl mockTaskAttempt =
      createMapTaskAttemptImplForTest(eventHandler, splitInfo);
  TaskAttemptEvent mockTAEvent = mock(TaskAttemptEvent.class);

  rct.transition(mockTaskAttempt, mockTAEvent);

  ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
  verify(eventHandler, times(2)).handle(arg.capture());
  if (!(arg.getAllValues().get(1) instanceof ContainerRequestEvent)) {
    Assert.fail("Second Event not of type ContainerRequestEvent");
  }
  ContainerRequestEvent cre =
      (ContainerRequestEvent) arg.getAllValues().get(1);
  String[] requestedRacks = cre.getRacks();
  //Only a single occurrence of /DefaultRack
  assertEquals(1, requestedRacks.length);
}
 
Example #22
Source File: TestTaskAttempt.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testHostResolveAttempt() throws Exception {
  TaskAttemptImpl.RequestContainerTransition rct =
      new TaskAttemptImpl.RequestContainerTransition(false);

  EventHandler eventHandler = mock(EventHandler.class);
  String[] hosts = new String[3];
  hosts[0] = "192.168.1.1";
  hosts[1] = "host2";
  hosts[2] = "host3";
  TaskSplitMetaInfo splitInfo =
      new TaskSplitMetaInfo(hosts, 0, 128 * 1024 * 1024l);

  TaskAttemptImpl mockTaskAttempt =
      createMapTaskAttemptImplForTest(eventHandler, splitInfo);
  TaskAttemptImpl spyTa = spy(mockTaskAttempt);
  when(spyTa.resolveHost(hosts[0])).thenReturn("host1");
  spyTa.dataLocalHosts = spyTa.resolveHosts(splitInfo.getLocations());

  TaskAttemptEvent mockTAEvent = mock(TaskAttemptEvent.class);
  rct.transition(spyTa, mockTAEvent);
  verify(spyTa).resolveHost(hosts[0]);
  ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
  verify(eventHandler, times(2)).handle(arg.capture());
  if (!(arg.getAllValues().get(1) instanceof ContainerRequestEvent)) {
    Assert.fail("Second Event not of type ContainerRequestEvent");
  }
  Map<String, Boolean> expected = new HashMap<String, Boolean>();
  expected.put("host1", true);
  expected.put("host2", true);
  expected.put("host3", true);
  ContainerRequestEvent cre =
      (ContainerRequestEvent) arg.getAllValues().get(1);
  String[] requestedHosts = cre.getHosts();
  for (String h : requestedHosts) {
    expected.remove(h);
  }
  assertEquals(0, expected.size());
}
 
Example #23
Source File: TestTaskAttempt.java    From big-c with Apache License 2.0 5 votes vote down vote up
private TaskAttemptImpl createMapTaskAttemptImplForTest(
    EventHandler eventHandler, TaskSplitMetaInfo taskSplitMetaInfo, Clock clock) {
  ApplicationId appId = ApplicationId.newInstance(1, 1);
  JobId jobId = MRBuilderUtils.newJobId(appId, 1);
  TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP);
  TaskAttemptListener taListener = mock(TaskAttemptListener.class);
  Path jobFile = mock(Path.class);
  JobConf jobConf = new JobConf();
  TaskAttemptImpl taImpl =
      new MapTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1,
          taskSplitMetaInfo, jobConf, taListener, null,
          null, clock, null);
  return taImpl;
}
 
Example #24
Source File: TestJobImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
private static InitTransition getInitTransition(final int numSplits) {
  InitTransition initTransition = new InitTransition() {
    @Override
    protected TaskSplitMetaInfo[] createSplits(JobImpl job, JobId jobId) {
      TaskSplitMetaInfo[] splits = new TaskSplitMetaInfo[numSplits];
      for (int i = 0; i < numSplits; ++i) {
        splits[i] = new TaskSplitMetaInfo();
      }
      return splits;
    }
  };
  return initTransition;
}
 
Example #25
Source File: TestTaskImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Before 
@SuppressWarnings("unchecked")
public void setup() {
   dispatcher = new InlineDispatcher();
  
  ++startCount;
  
  conf = new JobConf();
  taskAttemptListener = mock(TaskAttemptListener.class);
  jobToken = (Token<JobTokenIdentifier>) mock(Token.class);
  remoteJobConfFile = mock(Path.class);
  credentials = null;
  clock = new SystemClock();
  metrics = mock(MRAppMetrics.class);  
  dataLocations = new String[1];
  
  appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);

  jobId = Records.newRecord(JobId.class);
  jobId.setId(1);
  jobId.setAppId(appId);
  appContext = mock(AppContext.class);

  taskSplitMetaInfo = mock(TaskSplitMetaInfo.class);
  when(taskSplitMetaInfo.getLocations()).thenReturn(dataLocations); 
  
  taskAttempts = new ArrayList<MockTaskAttemptImpl>();    
}
 
Example #26
Source File: MRApp.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
protected TaskSplitMetaInfo[] createSplits(JobImpl job, JobId jobId) {
  TaskSplitMetaInfo[] splits = new TaskSplitMetaInfo[maps];
  for (int i = 0; i < maps ; i++) {
    splits[i] = new TaskSplitMetaInfo();
  }
  return splits;
}
 
Example #27
Source File: LocalJobRunner.java    From big-c with Apache License 2.0 5 votes vote down vote up
public MapTaskRunnable(TaskSplitMetaInfo info, int taskId, JobID jobId,
    Map<TaskAttemptID, MapOutputFile> mapOutputFiles) {
  this.info = info;
  this.taskId = taskId;
  this.mapOutputFiles = mapOutputFiles;
  this.jobId = jobId;
  this.localConf = new JobConf(job);
}
 
Example #28
Source File: LocalJobRunner.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Create Runnables to encapsulate map tasks for use by the executor
 * service.
 * @param taskInfo Info about the map task splits
 * @param jobId the job id
 * @param mapOutputFiles a mapping from task attempts to output files
 * @return a List of Runnables, one per map task.
 */
protected List<RunnableWithThrowable> getMapTaskRunnables(
    TaskSplitMetaInfo [] taskInfo, JobID jobId,
    Map<TaskAttemptID, MapOutputFile> mapOutputFiles) {

  int numTasks = 0;
  ArrayList<RunnableWithThrowable> list =
      new ArrayList<RunnableWithThrowable>();
  for (TaskSplitMetaInfo task : taskInfo) {
    list.add(new MapTaskRunnable(task, numTasks++, jobId,
        mapOutputFiles));
  }

  return list;
}
 
Example #29
Source File: MRInput.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Private
void initializeInternal() throws IOException {
  // Primarily for visibility
  rrLock.lock();
  try {
    
    if (splitInfoViaEvents) {
      if (useNewApi) {
        mrReader = new MRReaderMapReduce(jobConf, getContext().getCounters(), inputRecordCounter,
            getContext().getApplicationId().getClusterTimestamp(), getContext()
                .getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext()
                .getTaskIndex(), getContext().getTaskAttemptNumber());
      } else {
        mrReader = new MRReaderMapred(jobConf, getContext().getCounters(), inputRecordCounter);
      }
    } else {
      TaskSplitMetaInfo[] allMetaInfo = MRInputUtils.readSplits(jobConf);
      TaskSplitMetaInfo thisTaskMetaInfo = allMetaInfo[getContext().getTaskIndex()];
      TaskSplitIndex splitMetaInfo = new TaskSplitIndex(thisTaskMetaInfo.getSplitLocation(),
          thisTaskMetaInfo.getStartOffset());
      if (useNewApi) {
        org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils
            .getNewSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters()
                .findCounter(TaskCounter.SPLIT_RAW_BYTES));
        mrReader = new MRReaderMapReduce(jobConf, newInputSplit, getContext().getCounters(),
            inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(),
            getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(),
            getContext().getTaskIndex(), getContext().getTaskAttemptNumber());
      } else {
        org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils
            .getOldSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters()
                .findCounter(TaskCounter.SPLIT_RAW_BYTES));
        mrReader = new MRReaderMapred(jobConf, oldInputSplit, getContext().getCounters(), inputRecordCounter);
      }
    }
  } finally {
    rrLock.unlock();
  }
  LOG.info("Initialzed MRInput: " + getContext().getSourceVertexName());
}
 
Example #30
Source File: SplitMetaInfoReaderTez.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * Get the split meta info for the task with a specific index. This method
 * reduces the overhead of creating meta objects below the index of the task.
 *
 * @param conf job configuration.
 * @param fs FileSystem.
 * @param index the index of the task.
 * @return split meta info object of the task.
 * @throws IOException
 */
public static TaskSplitMetaInfo getSplitMetaInfo(Configuration conf,
    FileSystem fs, int index) throws IOException {
  FSDataInputStream in = null;
  try {
    in = getFSDataIS(conf, fs);
    final String jobSplitFile = MRJobConfig.JOB_SPLIT;
    final String basePath =
        conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, ".");
    final int numSplits = WritableUtils.readVInt(in); // TODO: check for insane values
    if (numSplits <= index) {
      throw new IOException("Index is larger than the number of splits");
    }
    JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
    int iter = 0;
    while (iter++ <= index) {
      splitMetaInfo.readFields(in);
    }
    JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(
        new Path(basePath, jobSplitFile)
            .toUri().toString(), splitMetaInfo.getStartOffset());
    return new JobSplit.TaskSplitMetaInfo(splitIndex,
        splitMetaInfo.getLocations(), splitMetaInfo.getInputDataLength());
  } finally {
    if (in != null) {
      in.close();
    }
  }
}