java source code of MRTask

incubator-tez-master
- NOTICE.txt
- tez-mapreduce
  - src
    - main
      - proto
        MRRuntimeProtos.proto
      - resources
        META-INF
        services
        org.apache.hadoop.mapreduce.protocol.ClientProtocolProvider
      - java
        org
        apache
        hadoop
        mapred
        LocalClientProtocolProviderTez.java
        LocalJobRunnerTez.java
        LocalJobRunnerMetricsTez.java
        split
        TezGroupedSplit.java
        TezMapredSplitsGrouper.java
        TezGroupedSplitsInputFormat.java
        mapreduce
        split
        TezMapReduceSplitsGrouper.java
        SplitMetaInfoReaderTez.java
        TezGroupedSplit.java
        TezGroupedSplitsInputFormat.java
        tez
        common
        MRFrameworkConfigs.java
        TezTaskStatus.java
        mapreduce
        committer
        MROutputCommitter.java
        processor
        MRTaskReporter.java
        MRTask.java
        reduce
        ReduceProcessor.java
        map
        MapProcessor.java
        SimpleMRProcessor.java
        hadoop
        MRHelpers.java
        MRJobConfig.java
        TezTypeConverters.java
        MRConfig.java
        IDConverter.java
        mapred
        MRCounters.java
        JobContextImpl.java
        MRReporter.java
        TaskAttemptContextImpl.java
        InputSplitInfoDisk.java
        MultiStageMRConfToTezTranslator.java
        MultiStageMRConfigUtil.java
        MRTaskStatus.java
        InputSplitInfo.java
        DeprecatedKeys.java
        mapreduce
        JobContextImpl.java
        MapContextImpl.java
        TezNullOutputCommitter.java
        TaskInputOutputContextImpl.java
        TaskAttemptContextImpl.java
        InputSplitInfoMem.java
        client
        ClientServiceDelegate.java
        YARNRunner.java
        ResourceMgrDelegate.java
        YarnTezClientProtocolProvider.java
        ClientCache.java
        NotRunningJob.java
        DAGJobStatus.java
        partition
        MRPartitioner.java
        combine
        MRCombiner.java
        output
        MROutputLegacy.java
        MROutput.java
        common
        Utils.java
        MRInputAMSplitGenerator.java
        MRInputSplitDistributor.java
        lib
        MRInputUtils.java
        MRReaderMapred.java
        MRReader.java
        MRReaderMapReduce.java
        input
        base
        MRInputBase.java
        MRInputLegacy.java
        MRInput.java
        MultiMRInput.java
    - test
      - resources
        log4j.properties
      - java
        org
        apache
        hadoop
        mapred
        split
        TestGroupedSplits.java
        tez
        mapreduce
        processor
        reduce
        TestReduceProcessor.java
        map
        TestMapProcessor.java
        MapUtils.java
        hadoop
        TestMRHelpers.java
        TestDeprecatedKeys.java
        TestConfigTranslationMRToTez.java
        TezTestUtils.java
        common
        TestMRInputSplitDistributor.java
        TestUmbilical.java
        input
        TestMultiMRInput.java
  - pom.xml
  - findbugs-exclude.xml
- tez-tests
  - src
    - test
      - java
        org
        apache
        tez
        test
        FaultToleranceTestRunner.java
        TestDAGRecovery2.java
        TestSecureShuffle.java
        dag
        ThreeLevelsFailingDAG.java
        SixLevelsFailingDAG.java
        TwoLevelsFailingDAG.java
        SimpleReverseVTestDAG.java
        SimpleVTestDAG.java
        MultiAttemptDAG.java
        TestDAGRecovery.java
        SimpleTestDAG.java
        TestDriver.java
        TestTezJobs.java
        TestFaultTolerance.java
        TestInput.java
        TestProcessor.java
        TestOutput.java
        SimpleTestDAG3Vertices.java
        MiniTezCluster.java
        mapreduce
        TestMRRJobsDAGApi.java
        TestMRRJobs.java
  - pom.xml
- CHANGES.txt
- tez-runtime-internals
  - src
    - main
      - proto
        Events.proto
      - java
        org
        apache
        tez
        common
        ProtoConverters.java
        TezLocalResource.java
        TezConverterUtils.java
        TezTaskUmbilicalProtocol.java
        ContainerTask.java
        ContainerContext.java
        runtime
        RuntimeTask.java
        InputReadyTracker.java
        api
        impl
        TezHeartbeatResponse.java
        TezHeartbeatRequest.java
        EventType.java
        TezCountersDelegate.java
        TezProcessorContextImpl.java
        TezEvent.java
        EventMetaData.java
        GroupInputSpec.java
        TaskSpec.java
        TezOutputContextImpl.java
        TezUmbilical.java
        TezInputContextImpl.java
        InputSpec.java
        OutputSpec.java
        TezTaskContextImpl.java
        events
        TaskAttemptCompletedEvent.java
        TaskAttemptFailedEvent.java
        TaskStatusUpdateEvent.java
        common
        security
        JobTokenSelector.java
        resources
        MemoryDistributor.java
        ScalingAllocator.java
        objectregistry
        ObjectRegistryModule.java
        ObjectRegistryImpl.java
        LogicalIOProcessorRuntimeTask.java
        metrics
        FileSystemStatisticUpdater.java
        TaskCounterUpdater.java
        GcTimeUpdater.java
    - test
      - java
        org
        apache
        tez
        runtime
        common
        resources
        TestMemoryDistributor.java
        objectregistry
        TestObjectRegistry.java
        TestLogicalIOProcessorRuntimeTask.java
        TestReflectionUtils.java
        TestInputReadyTracker.java
  - pom.xml
  - findbugs-exclude.xml
- tez-dag
  - src
    - main
      - proto
        HistoryEvents.proto
      - resources
        tez-container-log4j.properties
      - java
        org
        apache
        tez
        dag
        utils
        ProtoUtils.java
        TezBuilderUtils.java
        JavaProfilerOptions.java
        Graph.java
        EnvironmentUpdateUtils.java
        TezRuntimeChildJVM.java
        RelocalizationUtils.java
        api
        client
        VertexStatusBuilder.java
        ProgressBuilder.java
        DAGClientHandler.java
        rpc
        DAGClientAMProtocolBlockingPBServerImpl.java
        DAGClientServer.java
        DAGStatusBuilder.java
        oldrecords
        TaskAttemptState.java
        AMInfo.java
        TaskReport.java
        TaskState.java
        TaskAttemptReport.java
        history
        utils
        DAGUtils.java
        ATSConstants.java
        HistoryEvent.java
        DAGHistoryEvent.java
        HistoryEventType.java
        SummaryEvent.java
        recovery
        RecoveryService.java
        HistoryEventHandler.java
        logging
        impl
        HistoryEventJsonConversion.java
        SimpleHistoryLoggingService.java
        EntityTypes.java
        HistoryLoggingService.java
        events
        DAGCommitStartedEvent.java
        VertexStartedEvent.java
        AMLaunchedEvent.java
        ContainerStoppedEvent.java
        TaskStartedEvent.java
        VertexGroupCommitFinishedEvent.java
        VertexDataMovementEventsGeneratedEvent.java
        VertexGroupCommitStartedEvent.java
        ContainerLaunchedEvent.java
        VertexCommitStartedEvent.java
        VertexInitializedEvent.java
        VertexFinishedEvent.java
        TaskFinishedEvent.java
        DAGFinishedEvent.java
        TaskAttemptStartedEvent.java
        DAGSubmittedEvent.java
        DAGStartedEvent.java
        TaskAttemptFinishedEvent.java
        DAGInitializedEvent.java
        VertexParallelismUpdatedEvent.java
        AMStartedEvent.java
        app
        security
        authorize
        TezAMPolicyProvider.java
        package-info.java
        RecoveryParser.java
        DAGAppMaster.java
        dag
        impl
        TaskImpl.java
        RootInputLeafOutputDescriptor.java
        TezRootInputInitializerContextImpl.java
        OneToOneEdgeManager.java
        DAGSchedulerNaturalOrder.java
        DAGImpl.java
        BroadcastEdgeManager.java
        Edge.java
        TaskReportImpl.java
        RootInputVertexManager.java
        ImmediateStartVertexManager.java
        OutputCommitterContextImpl.java
        ScatterGatherEdgeManager.java
        package-info.java
        VertexManager.java
        TaskAttemptImplHelpers.java
        TaskAttemptImpl.java
        VertexStats.java
        VertexImpl.java
        DAGSchedulerMRR.java
        Task.java
        DAGState.java
        VertexTerminationCause.java
        DAGTerminationCause.java
        Vertex.java
        RootInputInitializerManager.java
        TaskStateInternal.java
        package-info.java
        DAGReport.java
        event
        VertexEventTaskAttemptCompleted.java
        VertexEventSourceVertexStarted.java
        TaskAttemptEventContainerTerminating.java
        DAGAppMasterEvent.java
        VertexEventRootInputFailed.java
        TaskAttemptEvent.java
        TaskEvent.java
        DAGEvent.java
        TaskEventRecoverTask.java
        DAGAppMasterEventDAGFinished.java
        DAGAppMasterEventType.java
        TaskAttemptEventStartedRemotely.java
        DAGEventSchedulerUpdate.java
        TaskAttemptEventNodeFailed.java
        DAGEventRecoverEvent.java
        DiagnosableEvent.java
        TaskAttemptEventSchedule.java
        VertexEventRecoverVertex.java
        VertexEventSourceTaskAttemptCompleted.java
        TaskAttemptEventContainerTerminated.java
        VertexEventOneToOneSourceSplit.java
        TaskAttemptEventContainerTerminatedBySystem.java
        TaskEventType.java
        DAGEventVertexReRunning.java
        DAGEventStartDag.java
        DAGEventSchedulerUpdateTAAssigned.java
        TaskAttemptEventFailRequest.java
        DAGEventCounterUpdate.java
        TaskAttemptEventDiagnosticsUpdate.java
        TaskAttemptEventType.java
        VertexEventRouteEvent.java
        DAGEventType.java
        DAGEventVertexCompleted.java
        package-info.java
        TaskEventAddTezEvent.java
        VertexEventRootInputInitialized.java
        VertexEventTermination.java
        VertexEvent.java
        VertexEventSourceVertexRecovered.java
        DAGEventDiagnosticsUpdate.java
        TaskAttemptEventOutputFailed.java
        TaskAttemptEventKillRequest.java
        VertexEventTaskReschedule.java
        TaskAttemptEventStatusUpdate.java
        VertexEventType.java
        TaskEventTermination.java
        VertexEventNullEdgeInitialized.java
        VertexEventTaskCompleted.java
        TaskAttemptEventAttemptFailed.java
        TaskEventTAUpdate.java
        DAG.java
        TaskTerminationCause.java
        DAGScheduler.java
        TaskAttemptStateInternal.java
        VertexState.java
        TaskAttempt.java
        TaskAttemptListenerImpTezDag.java
        AppContext.java
        ClusterInfo.java
        HeartbeatHandlerBase.java
        package-info.java
        rm
        TaskSchedulerAppCallbackWrapper.java
        ContainerAllocator.java
        AMSchedulerEventTALaunchRequest.java
        NMCommunicatorStopRequestEvent.java
        AMSchedulerEventTAEnded.java
        TezAMRMClientAsync.java
        TaskSchedulerEventHandler.java
        NMCommunicatorLaunchRequestEvent.java
        node
        AMNodeImpl.java
        AMNodeEventNodeCountUpdated.java
        AMNodeEventTaskAttemptEnded.java
        AMNodeMap.java
        AMNodeEventTaskAttemptSucceeded.java
        AMNodeEventType.java
        AMNode.java
        AMNodeEventContainerAllocated.java
        AMNodeState.java
        AMNodeEventStateChanged.java
        AMNodeEvent.java
        AMSchedulerEventType.java
        TaskSchedulerService.java
        package-info.java
        AMSchedulerEventDeallocateContainer.java
        LocalTaskSchedulerService.java
        AMSchedulerEvent.java
        container
        AMContainerMap.java
        AMContainerEventAssignTA.java
        AMContainerImpl.java
        AMContainerEventCompleted.java
        AMContainerEventLaunched.java
        AMContainerEventLaunchRequest.java
        ContainerContextMatcher.java
        AMContainer.java
        AMContainerEvent.java
        AMContainerEventNodeFailed.java
        AMContainerEventStopFailed.java
        AMContainerTask.java
        AMContainerHelpers.java
        AMContainerState.java
        AMContainerEventType.java
        AMContainerEventTASucceeded.java
        ContainerSignatureMatcher.java
        AMContainerEventStopRequest.java
        AMContainerEventLaunchFailed.java
        AMSchedulerEventNodeBlacklistUpdate.java
        NMCommunicatorEvent.java
        NMCommunicatorEventType.java
        YarnTaskSchedulerService.java
        DAGAppMasterState.java
        TaskAttemptListener.java
        TaskHeartbeatHandler.java
        ControlledClock.java
        ContainerContext.java
        ContainerHeartbeatHandler.java
        launcher
        ContainerLauncherImpl.java
        package-info.java
        ContainerLauncher.java
        runtime
        task
        TezTaskRunner.java
        ContainerReporter.java
        ErrorReporter.java
        TezChild.java
        TaskReporter.java
    - test
      - resources
        log4j.properties
      - java
        org
        apache
        tez
        dag
        utils
        TestJavaProfilerOptions.java
        TestEnvironmentUpdateUtils.java
        api
        client
        TestDAGClientHandler.java
        TestVertexStatusBuilder.java
        history
        utils
        TestDAGUtils.java
        logging
        impl
        TestHistoryEventJsonConversion.java
        events
        TestHistoryEventsProtoConversion.java
        app
        dag
        impl
        TestDAGScheduler.java
        TestEdge.java
        TestRootInputVertexManager.java
        TestVertexImpl.java
        TestVertexStats.java
        TestTaskImpl.java
        TestDAGImpl.java
        TestTaskAttempt.java
        rm
        node
        TestAMNodeMap.java
        TestTaskSchedulerHelpers.java
        TestTezAMRMClient.java
        TestTaskSchedulerEventHandler.java
        container
        TestAMContainerMap.java
        TestAMContainer.java
        TestContainerReuse.java
        TestLocalTaskScheduler.java
        TestTaskScheduler.java
        test
        EdgeManagerForTest.java
        VertexManagerPluginForTest.java
        runtime
        task
        TestTaskExecution.java
  - pom.xml
  - findbugs-exclude.xml
- tez-plugins
  - pom.xml
  - tez-yarn-timeline-history
    - src
      - main
        java
        org
        apache
        tez
        dag
        history
        logging
        ats
        HistoryEventTimelineConversion.java
        ATSHistoryLoggingService.java
      - test
        java
        org
        apache
        tez
        dag
        history
        logging
        ats
        TestATSHistoryLoggingService.java
        TestHistoryEventTimelineConversion.java
    - pom.xml
- pom.xml
- tez-api
  - src
    - main
      - proto
        Events.proto
        DAGClientAMProtocol.proto
        DAGApiRecords.proto
      - resources
        META-INF
        services
        org.apache.hadoop.security.SecurityInfo
      - java
        org
        apache
        tez
        dag
        api
        EdgeManagerDescriptor.java
        TezConfiguration.java
        VertexManagerPlugin.java
        VertexManagerPluginContext.java
        EdgeManager.java
        InputDescriptor.java
        client
        DAGClient.java
        DAGStatus.java
        VertexStatus.java
        StatusGetOpts.java
        rpc
        DAGClientAMProtocolBlockingPB.java
        DAGClientRPCImpl.java
        Progress.java
        DagTypeConverters.java
        Edge.java
        DuplicateDAGName.java
        VertexGroup.java
        Vertex.java
        TezException.java
        GroupInputEdge.java
        EdgeProperty.java
        VertexLocationHint.java
        DAGSubmissionTimedOut.java
        TezConstants.java
        TezEntityDescriptor.java
        DAG.java
        ProcessorDescriptor.java
        OutputDescriptor.java
        EdgeManagerContext.java
        TezUncheckedException.java
        VertexManagerPluginDescriptor.java
        SessionNotRunning.java
        RootInputLeafOutput.java
        client
        AMConfiguration.java
        TezAppMasterStatus.java
        TezClient.java
        TezClientUtils.java
        PreWarmContext.java
        common
        security
        Master.java
        JobTokenIdentifier.java
        TokenCache.java
        DAGClientSecurityInfo.java
        JobTokenSecretManager.java
        TezYARNUtils.java
        impl
        LogUtils.java
        counters
        TezCounter.java
        GenericCounter.java
        CounterGroupFactory.java
        AbstractCounters.java
        CounterGroupBase.java
        CounterGroup.java
        FileSystemCounter.java
        JobCounter.java
        AbstractCounterGroup.java
        AbstractCounter.java
        ResourceBundles.java
        Limits.java
        FileSystemCounterGroup.java
        DAGCounter.java
        LimitExceededException.java
        FrameworkCounterGroup.java
        TezCounters.java
        TaskCounter.java
        TezCommonUtils.java
        TezJobConfig.java
        TezUserPayload.java
        runtime
        api
        Reader.java
        LogicalIOProcessor.java
        LogicalInput.java
        InputReadyCallback.java
        TezOutputContext.java
        Event.java
        Input.java
        Writer.java
        TezInputContext.java
        OutputCommitter.java
        AbstractLogicalIOProcessor.java
        TezTaskContext.java
        Processor.java
        Output.java
        AbstractLogicalInput.java
        RootInputSpecUpdate.java
        TezRootInputInitializer.java
        AbstractLogicalOutput.java
        TezRootInputInitializerContext.java
        MemoryUpdateCallback.java
        MergedLogicalInput.java
        LogicalOutput.java
        TezProcessorContext.java
        OutputCommitterContext.java
        events
        RootInputConfigureVertexTasksEvent.java
        RootInputDataInformationEvent.java
        VertexManagerEvent.java
        CompositeDataMovementEvent.java
        RootInputInitializerEvent.java
        InputFailedEvent.java
        RootInputUpdatePayloadEvent.java
        DataMovementEvent.java
        InputReadErrorEvent.java
        common
        resources
        InitialMemoryRequestContext.java
        InitialMemoryAllocator.java
        objectregistry
        ObjectRegistry.java
        ObjectLifeCycle.java
        ObjectRegistryFactory.java
    - test
      - java
        org
        apache
        tez
        dag
        api
        TestDagTypeConverters.java
        client
        rpc
        TestDAGClient.java
        TestDAGVerify.java
        TestDAGPlan.java
        client
        TestTezClientUtils.java
        TestTezClient.java
        common
        security
        TestTokenCache.java
        TestTezCommonUtils.java
        TestTezJobConfig.java
        runtime
        api
        event
        TestCompositeDataMovementEvent.java
  - pom.xml
  - findbugs-exclude.xml
- BUILDING.txt
- tez-common
  - src
    - main
      - java
        org
        apache
        tez
        dag
        records
        TezTaskID.java
        TezDAGID.java
        TezTaskAttemptID.java
        TezVertexID.java
        TezID.java
        common
        ReflectionUtils.java
        TezContainerLogAppender.java
        TezUtils.java
    - test
      - resources
        log4j.properties
      - java
        org
        apache
        tez
        dag
        records
        TestTezIds.java
        common
        TestTezUtils.java
  - pom.xml
  - findbugs-exclude.xml
- tez-mapreduce-examples
  - src
    - main
      - java
        org
        apache
        tez
        processor
        FilterByWordOutputProcessor.java
        FilterByWordInputProcessor.java
        mapreduce
        examples
        MRRSleepJob.java
        WordCount.java
        BroadcastAndOneToOneExample.java
        helpers
        SplitsInClientOptionParser.java
        FilterLinesByWordOneToOne.java
        SecondarySort.java
        Sort.java
        IntersectExample.java
        IntersectValidate.java
        UnionExample.java
        RandomTextWriter.java
        Join.java
        OrderedWordCount.java
        terasort
        Unsigned16.java
        TeraSort.java
        job_history_summary.py
        TeraChecksum.java
        TeraInputFormat.java
        2009-write-up
        Yahoo2009.tex
        tera.bib
        .gitignore
        GenSort.java
        Random16.java
        TeraGen.java
        TeraOutputFormat.java
        TeraValidate.java
        package.html
        TeraScheduler.java
        IntersectDataGen.java
        RandomWriter.java
        MapredWordCount.java
        FilterLinesByWord.java
        package.html
        GroupByOrderByMRRTest.java
        ExampleDriver.java
    - test
      - java
        org
        apache
        tez
        mapreduce
        examples
        terasort
        TestTeraSort.java
  - pom.xml
  - findbugs-exclude.xml
- INSTALL.txt
- README.md
- DISCLAIMER.txt
- KEYS
- tez-runtime-library
  - src
    - main
      - proto
        ShufflePayloads.proto
      - java
        org
        apache
        hadoop
        io
        BufferUtils.java
        HashComparator.java
        tez
        dag
        library
        vertexmanager
        ShuffleVertexManager.java
        InputReadyVertexManager.java
        common
        TezRuntimeFrameworkConfigs.java
        runtime
        library
        resources
        WeightedScalingMemoryDistributor.java
        processor
        SleepProcessor.java
        SimpleProcessor.java
        hadoop
        compat
        NullProgressable.java
        output
        LocalOnFileSorterOutput.java
        OnFileUnorderedPartitionedKVOutput.java
        OnFileSortedOutput.java
        OnFileUnorderedKVOutput.java
        api
        KeyValueWriter.java
        KeyValuesReader.java
        KeyValuesWriter.java
        KeyValueReader.java
        Partitioner.java
        common
        security
        SecureShuffleUtils.java
        task
        impl
        ValuesIterator.java
        local
        output
        TezLocalTaskOutputFiles.java
        TezTaskOutput.java
        TezTaskOutputFiles.java
        ConfigUtils.java
        ValuesIterator.java
        TezRuntimeUtils.java
        combine
        Combiner.java
        InputAttemptIdentifier.java
        sort
        impl
        TezSpillRecord.java
        TezRawKeyValueIterator.java
        dflt
        DefaultSorter.java
        PipelinedSorter.java
        IFileOutputStream.java
        ExternalSorter.java
        TezIndexRecord.java
        IFile.java
        TezMerger.java
        IFileInputStream.java
        InputIdentifier.java
        Constants.java
        localshuffle
        LocalShuffle.java
        writers
        BaseUnorderedPartitionedKVWriter.java
        UnorderedPartitionedKVWriter.java
        YARNMaster.java
        readers
        ShuffledUnorderedKVReader.java
        MemoryUpdateCallbackHandler.java
        shuffle
        impl
        ShuffleInputEventHandler.java
        Shuffle.java
        ShuffleClientMetrics.java
        InMemoryReader.java
        MapHost.java
        ShuffleScheduler.java
        Fetcher.java
        ShuffleHeader.java
        InMemoryWriter.java
        ExceptionReporter.java
        MergeManager.java
        MergeThread.java
        MapOutput.java
        broadcast
        output
        FileBasedKVWriter.java
        partitioner
        HashPartitioner.java
        exceptions
        InputAlreadyClosedException.java
        shuffle
        common
        impl
        SimpleFetchedInputAllocator.java
        ShuffleManager.java
        ShuffleInputEventHandlerImpl.java
        InputHost.java
        FetchedInputAllocator.java
        DiskFetchedInput.java
        FetcherCallback.java
        FetchResult.java
        ShuffleEventHandler.java
        ShuffleUtils.java
        Fetcher.java
        MemoryFetchedInput.java
        FetchedInput.java
        HttpConnection.java
        FetchedInputCallback.java
        input
        ShuffledMergedInput.java
        ConcatenatedMergedKeyValuesInput.java
        SortedGroupedMergedInput.java
        ShuffledUnorderedKVInput.java
        ShuffledMergedInputLegacy.java
        LocalMergedInput.java
        ConcatenatedMergedKeyValueInput.java
        conf
        ShuffledUnorderedKVInputConfiguration.java
        OnFileUnorderedPartitionedKVOutputConfiguration.java
        BaseConfigurer.java
        ShuffledMergedInputConfiguration.java
        HadoopKeyValuesBasedBaseConf.java
        OnFileSortedOutputConfiguration.java
        UnorderedUnpartitionedKVEdgeConfigurer.java
        OrderedPartitionedKVEdgeConfigurer.java
        UnorderedPartitionedKVEdgeConfigurer.java
        OnFileUnorderedKVOutputConfiguration.java
    - test
      - resources
        tez-site.xml
        log4j.properties
      - java
        org
        apache
        tez
        dag
        library
        vertexmanager
        TestInputReadyVertexManager.java
        TestShuffleVertexManager.java
        runtime
        library
        testutils
        KVDataGen.java
        output
        TestOnFileUnorderedKVOutput.java
        common
        sort
        impl
        TestIFile.java
        writers
        TestUnorderedPartitionedKVWriter.java
        TestInputIdentifiers.java
        shuffle
        common
        impl
        TestSimpleFetchedInputAllocator.java
        TestShuffleInputEventHandlerImpl.java
        input
        TestSortedGroupedMergedInput.java
        conf
        TestUnorderedPartitionedKVEdgeConfigurer.java
        TestOrderedPartitionedKVEdgeConfigurer.java
        TestShuffledUnorderedKVInputConfiguration.java
        TestShuffledMergedInputConfiguration.java
        TestOnFileSortedOutputConfiguration.java
        TestOnFileUnorderedPartitionedKVOutput.java
        TestOnFileUnorderedKVOutputConfiguration.java
        TestUnorderedUnpartitionedKVEdgeConfigurer.java
        common
        resources
        TestWeightedScalingMemoryDistributor.java
  - pom.xml
- .gitignore
- docs
  - src
    - site
      - resources
        images
      - site.xml
      - apt
        talks.apt
        privacy-policy.apt
        install.apt
        index.apt
  - pom.xml
- LICENSE.txt
- tez-dist
  - src
    - main
      - assembly
        tez-dist-full.xml
        tez-dist.xml
  - pom.xml

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.tez.mapreduce.processor;

import java.io.IOException;
import java.net.URI;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicBoolean;

import javax.crypto.SecretKey;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileSystem.Statistics;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.mapred.FileAlreadyExistsException;
import org.apache.hadoop.mapred.FileOutputCommitter;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobContext;
import org.apache.hadoop.mapred.RawKeyValueIterator;
import org.apache.hadoop.mapred.TaskAttemptContext;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.mapred.TaskID;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.filecache.DistributedCache;
import org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer;
import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
import org.apache.hadoop.mapreduce.task.ReduceContextImpl;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
import org.apache.hadoop.util.Progress;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.tez.common.MRFrameworkConfigs;
import org.apache.tez.common.TezRuntimeFrameworkConfigs;
import org.apache.tez.common.TezTaskStatus.State;
import org.apache.tez.common.TezUtils;
import org.apache.tez.common.counters.TezCounters;
import org.apache.tez.common.security.JobTokenIdentifier;
import org.apache.tez.common.security.TokenCache;
import org.apache.tez.dag.records.TezDAGID;
import org.apache.tez.mapreduce.hadoop.DeprecatedKeys;
import org.apache.tez.mapreduce.hadoop.IDConverter;
import org.apache.tez.mapreduce.hadoop.MRConfig;
import org.apache.tez.mapreduce.hadoop.MRJobConfig;
import org.apache.tez.mapreduce.hadoop.mapred.TaskAttemptContextImpl;
import org.apache.tez.mapreduce.hadoop.mapreduce.JobContextImpl;
import org.apache.tez.mapreduce.output.MROutputLegacy;
import org.apache.tez.runtime.api.LogicalInput;
import org.apache.tez.runtime.api.LogicalOutput;
import org.apache.tez.runtime.api.TezProcessorContext;
import org.apache.tez.runtime.library.common.Constants;
import org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator;

@SuppressWarnings("deprecation")
public abstract class MRTask {

  static final Log LOG = LogFactory.getLog(MRTask.class);

  protected JobConf jobConf;
  protected JobContext jobContext;
  protected TaskAttemptContext taskAttemptContext;
  protected OutputCommitter committer;

  // Current counters
  transient TezCounters counters;
  protected TezProcessorContext processorContext;
  protected TaskAttemptID taskAttemptId;
  protected Progress progress = new Progress();
  protected SecretKey jobTokenSecret;
  
  LogicalInput input;
  LogicalOutput output;

  boolean isMap;

  /* flag to track whether task is done */
  AtomicBoolean taskDone = new AtomicBoolean(false);

  /** Construct output file names so that, when an output directory listing is
   * sorted lexicographically, positions correspond to output partitions.*/
  private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance();
  static {
    NUMBER_FORMAT.setMinimumIntegerDigits(5);
    NUMBER_FORMAT.setGroupingUsed(false);
  }

  protected MRTaskReporter mrReporter;
  protected boolean useNewApi;

  public MRTask(boolean isMap) {
    this.isMap = isMap;
  }

  // TODO how to update progress
  public void initialize(TezProcessorContext context) throws IOException,
  InterruptedException {

    DeprecatedKeys.init();

    processorContext = context;
    counters = context.getCounters();
    this.taskAttemptId = new TaskAttemptID(
        new TaskID(
            Long.toString(context.getApplicationId().getClusterTimestamp()),
            context.getApplicationId().getId(),
            (isMap ? TaskType.MAP : TaskType.REDUCE),
            context.getTaskIndex()),
          context.getTaskAttemptNumber());

    byte[] userPayload = context.getUserPayload();
    Configuration conf = TezUtils.createConfFromUserPayload(userPayload);
    if (conf instanceof JobConf) {
      this.jobConf = (JobConf)conf;
    } else {
      this.jobConf = new JobConf(conf);
    }
    jobConf.set(Constants.TEZ_RUNTIME_TASK_ATTEMPT_ID,
        taskAttemptId.toString());
    jobConf.set(MRJobConfig.TASK_ATTEMPT_ID,
      taskAttemptId.toString());
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID,
        context.getDAGAttemptNumber());

    LOG.info("MRTask.inited: taskAttemptId = " + taskAttemptId.toString());

    // TODO Post MRR
    // A single file per vertex will likely be a better solution. Does not
    // require translation - client can take care of this. Will work independent
    // of whether the configuration is for intermediate tasks or not. Has the
    // overhead of localizing multiple files per job - i.e. the client would
    // need to write these files to hdfs, add them as local resources per
    // vertex. A solution like this may be more practical once it's possible to
    // submit configuration parameters to the AM and effectively tasks via RPC.

    jobConf.set(MRJobConfig.VERTEX_NAME, processorContext.getTaskVertexName());

    if (LOG.isDebugEnabled() && userPayload != null) {
      Iterator<Entry<String, String>> iter = jobConf.iterator();
      String taskIdStr = taskAttemptId.getTaskID().toString();
      while (iter.hasNext()) {
        Entry<String, String> confEntry = iter.next();
        LOG.debug("TaskConf Entry"
            + ", taskId=" + taskIdStr
            + ", key=" + confEntry.getKey()
            + ", value=" + confEntry.getValue());
      }
    }

    configureMRTask();
  }

  private void configureMRTask()
      throws IOException, InterruptedException {

    Credentials credentials = UserGroupInformation.getCurrentUser()
        .getCredentials();
    jobConf.setCredentials(credentials);
    // TODO Can this be avoided all together. Have the MRTezOutputCommitter use
    // the Tez parameter.
    // TODO This could be fetched from the env if YARN is setting it for all
    // Containers.
    // Set it in conf, so as to be able to be used the the OutputCommitter.

    // Not needed. This is probably being set via the source/consumer meta
    Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
    if (jobToken != null) {
      // Will MR ever run without a job token.
      SecretKey sk = JobTokenSecretManager.createSecretKey(jobToken
          .getPassword());
      this.jobTokenSecret = sk;
    } else {
      LOG.warn("No job token set");
    }

    configureLocalDirs();

    // Set up the DistributedCache related configs
    setupDistributedCacheConfig(jobConf);
  }

  private void configureLocalDirs() throws IOException {
    // TODO NEWTEZ Is most of this functionality required ?
    jobConf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, processorContext.getWorkDirs());
    if (jobConf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR) == null) {
      jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, System.getenv(Environment.PWD.name()));
    }

    jobConf.setStrings(MRConfig.LOCAL_DIR, processorContext.getWorkDirs());

    LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    Path workDir = null;
    // First, try to find the JOB_LOCAL_DIR on this host.
    try {
      workDir = lDirAlloc.getLocalPathToRead("work", jobConf);
    } catch (DiskErrorException e) {
      // DiskErrorException means dir not found. If not found, it will
      // be created below.
    }
    if (workDir == null) {
      // JOB_LOCAL_DIR doesn't exist on this host -- Create it.
      workDir = lDirAlloc.getLocalPathForWrite("work", jobConf);
      FileSystem lfs = FileSystem.getLocal(jobConf).getRaw();
      boolean madeDir = false;
      try {
        madeDir = lfs.mkdirs(workDir);
      } catch (FileAlreadyExistsException e) {
        // Since all tasks will be running in their own JVM, the race condition
        // exists where multiple tasks could be trying to create this directory
        // at the same time. If this task loses the race, it's okay because
        // the directory already exists.
        madeDir = true;
        workDir = lDirAlloc.getLocalPathToRead("work", jobConf);
      }
      if (!madeDir) {
          throw new IOException("Mkdirs failed to create "
              + workDir.toString());
      }
    }
    // TODO NEWTEZ Is this required ?
    jobConf.set(MRFrameworkConfigs.JOB_LOCAL_DIR, workDir.toString());
    jobConf.set(MRJobConfig.JOB_LOCAL_DIR, workDir.toString());
  }

  /**
   * Set up the DistributedCache related configs to make
   * {@link DistributedCache#getLocalCacheFiles(Configuration)} and
   * {@link DistributedCache#getLocalCacheArchives(Configuration)} working.
   *
   * @param job
   * @throws IOException
   */
  private static void setupDistributedCacheConfig(final JobConf job)
      throws IOException {

    String localWorkDir = (job.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR));
    // ^ ^ all symlinks are created in the current work-dir

    // Update the configuration object with localized archives.
    URI[] cacheArchives = DistributedCache.getCacheArchives(job);
    if (cacheArchives != null) {
      List<String> localArchives = new ArrayList<String>();
      for (int i = 0; i < cacheArchives.length; ++i) {
        URI u = cacheArchives[i];
        Path p = new Path(u);
        Path name = new Path((null == u.getFragment()) ? p.getName()
            : u.getFragment());
        String linkName = name.toUri().getPath();
        localArchives.add(new Path(localWorkDir, linkName).toUri().getPath());
      }
      if (!localArchives.isEmpty()) {
        job.set(MRJobConfig.CACHE_LOCALARCHIVES, StringUtils
            .arrayToString(localArchives.toArray(new String[localArchives
                .size()])));
      }
    }

    // Update the configuration object with localized files.
    URI[] cacheFiles = DistributedCache.getCacheFiles(job);
    if (cacheFiles != null) {
      List<String> localFiles = new ArrayList<String>();
      for (int i = 0; i < cacheFiles.length; ++i) {
        URI u = cacheFiles[i];
        Path p = new Path(u);
        Path name = new Path((null == u.getFragment()) ? p.getName()
            : u.getFragment());
        String linkName = name.toUri().getPath();
        localFiles.add(new Path(localWorkDir, linkName).toUri().getPath());
      }
      if (!localFiles.isEmpty()) {
        job.set(MRJobConfig.CACHE_LOCALFILES, StringUtils
            .arrayToString(localFiles.toArray(new String[localFiles.size()])));
      }
    }
  }

  public TezProcessorContext getUmbilical() {
    return this.processorContext;
  }

  public void initTask(LogicalOutput output) throws IOException,
                                InterruptedException {
    // By this time output has been initialized
    this.output = output;
    if (output instanceof MROutputLegacy) {
      committer = ((MROutputLegacy)output).getOutputCommitter();
    }
    this.mrReporter = new MRTaskReporter(processorContext);
    this.useNewApi = jobConf.getUseNewMapper();
    TezDAGID dagId = IDConverter.fromMRTaskAttemptId(taskAttemptId).getTaskID()
        .getVertexID().getDAGId();

    this.jobContext = new JobContextImpl(jobConf, dagId, mrReporter);
    this.taskAttemptContext =
        new TaskAttemptContextImpl(jobConf, taskAttemptId, mrReporter);

    if (getState() == State.UNASSIGNED) {
      setState(State.RUNNING);
    }

    localizeConfiguration(jobConf);
  }

  public MRTaskReporter getMRReporter() {
    return mrReporter;
  }

  public void setState(State state) {
    // TODO Auto-generated method stub

  }

  public State getState() {
    // TODO Auto-generated method stub
    return null;
  }

  public TezCounters getCounters() { return counters; }

  public void setConf(JobConf jobConf) {
    this.jobConf = jobConf;
  }

  public JobConf getConf() {
    return this.jobConf;
  }

  /**
   * Gets a handle to the Statistics instance based on the scheme associated
   * with path.
   *
   * @param path the path.
   * @param conf the configuration to extract the scheme from if not part of
   *   the path.
   * @return a Statistics instance, or null if none is found for the scheme.
   */
  @Private
  public static List<Statistics> getFsStatistics(Path path, Configuration conf) throws IOException {
    List<Statistics> matchedStats = new ArrayList<FileSystem.Statistics>();
    path = path.getFileSystem(conf).makeQualified(path);
    String scheme = path.toUri().getScheme();
    for (Statistics stats : FileSystem.getAllStatistics()) {
      if (stats.getScheme().equals(scheme)) {
        matchedStats.add(stats);
      }
    }
    return matchedStats;
  }

  @Private
  public synchronized String getOutputName() {
    return "part-" + NUMBER_FORMAT.format(taskAttemptId.getTaskID().getId());
  }

  public void waitBeforeCompletion(MRTaskReporter reporter) throws IOException,
      InterruptedException {
  }

  public void done() throws IOException, InterruptedException {

    LOG.info("Task:" + taskAttemptId + " is done."
        + " And is in the process of committing");
    // TODO change this to use the new context
    // TODO TEZ Interaciton between Commit and OutputReady. Merge ?
    if (output instanceof MROutputLegacy) {
      MROutputLegacy sOut = (MROutputLegacy)output;
      if (sOut.isCommitRequired()) {
        //wait for commit approval and commit
        // TODO EVENTUALLY - Commit is not required for map tasks.
        // skip a couple of RPCs before exiting.
        commit(sOut);
      }
    }
    taskDone.set(true);
    sendLastUpdate();
  }

  /**
   * Send a status update to the task tracker
   * @throws IOException
   */
  public void statusUpdate() throws IOException, InterruptedException {
    // TODO call progress update here if not being called within Map/Reduce
  }

  /**
   * Sends last status update before sending umbilical.done();
   */
  private void sendLastUpdate()
      throws IOException, InterruptedException {
    statusUpdate();
  }

  private void commit(MROutputLegacy output) throws IOException {
    int retries = 3;
    while (true) {
      // This will loop till the AM asks for the task to be killed. As
      // against, the AM sending a signal to the task to kill itself
      // gracefully.
      try {
        if (processorContext.canCommit()) {
          break;
        }
        Thread.sleep(1000);
      } catch(InterruptedException ie) {
        //ignore
      } catch (IOException ie) {
        LOG.warn("Failure sending canCommit: "
            + StringUtils.stringifyException(ie));
        if (--retries == 0) {
          throw ie;
        }
      }
    }

    // task can Commit now
    try {
      LOG.info("Task " + taskAttemptId + " is allowed to commit now");
      output.commit();
      return;
    } catch (IOException iee) {
      LOG.warn("Failure committing: " +
          StringUtils.stringifyException(iee));
      //if it couldn't commit a successfully then delete the output
      discardOutput(output);
      throw iee;
    }
  }

  private
  void discardOutput(MROutputLegacy output) {
    try {
      output.abort();
    } catch (IOException ioe)  {
      LOG.warn("Failure cleaning up: " +
               StringUtils.stringifyException(ioe));
    }
  }

  public static String normalizeStatus(String status, Configuration conf) {
    // Check to see if the status string is too long
    // and truncate it if needed.
    int progressStatusLength = conf.getInt(
        MRConfig.PROGRESS_STATUS_LEN_LIMIT_KEY,
        MRConfig.PROGRESS_STATUS_LEN_LIMIT_DEFAULT);
    if (status.length() > progressStatusLength) {
      LOG.warn("Task status: \"" + status + "\" truncated to max limit ("
          + progressStatusLength + " characters)");
      status = status.substring(0, progressStatusLength);
    }
    return status;
  }

  protected static <INKEY,INVALUE,OUTKEY,OUTVALUE>
  org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context
  createReduceContext(org.apache.hadoop.mapreduce.Reducer
                        <INKEY,INVALUE,OUTKEY,OUTVALUE> reducer,
                      Configuration job,
                      TaskAttemptID taskId,
                      final TezRawKeyValueIterator rIter,
                      org.apache.hadoop.mapreduce.Counter inputKeyCounter,
                      org.apache.hadoop.mapreduce.Counter inputValueCounter,
                      org.apache.hadoop.mapreduce.RecordWriter<OUTKEY,OUTVALUE> output,
                      org.apache.hadoop.mapreduce.OutputCommitter committer,
                      org.apache.hadoop.mapreduce.StatusReporter reporter,
                      RawComparator<INKEY> comparator,
                      Class<INKEY> keyClass, Class<INVALUE> valueClass
  ) throws IOException, InterruptedException {
    RawKeyValueIterator r =
        new RawKeyValueIterator() {

          @Override
          public boolean next() throws IOException {
            return rIter.next();
          }

          @Override
          public DataInputBuffer getValue() throws IOException {
            return rIter.getValue();
          }

          @Override
          public Progress getProgress() {
            return rIter.getProgress();
          }

          @Override
          public DataInputBuffer getKey() throws IOException {
            return rIter.getKey();
          }

          @Override
          public void close() throws IOException {
            rIter.close();
          }
        };
    org.apache.hadoop.mapreduce.ReduceContext<INKEY, INVALUE, OUTKEY, OUTVALUE>
    reduceContext =
      new ReduceContextImpl<INKEY, INVALUE, OUTKEY, OUTVALUE>(
          job,
          taskId,
          r,
          inputKeyCounter,
          inputValueCounter,
          output,
          committer,
          reporter,
          comparator,
          keyClass,
          valueClass);
    if (LOG.isDebugEnabled()) {
      LOG.debug("Using key class: " + keyClass
          + ", valueClass: " + valueClass);
    }

    org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context
        reducerContext =
          new WrappedReducer<INKEY, INVALUE, OUTKEY, OUTVALUE>().getReducerContext(
              reduceContext);

    return reducerContext;
  }

  public void taskCleanup()
      throws IOException, InterruptedException {
    // set phase for this task
    statusUpdate();
    LOG.info("Runnning cleanup for the task");
    // do the cleanup
    if (output instanceof MROutputLegacy) {
      ((MROutputLegacy) output).abort();
    }
  }

  public void localizeConfiguration(JobConf jobConf)
      throws IOException, InterruptedException {
    jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
    jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
    jobConf.setInt(JobContext.TASK_PARTITION,
        taskAttemptId.getTaskID().getId());
    jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString());
    
    jobConf.setBoolean(MRJobConfig.TASK_ISMAP, isMap);
    
    Path outputPath = FileOutputFormat.getOutputPath(jobConf);
    if (outputPath != null) {
      if ((committer instanceof FileOutputCommitter)) {
        FileOutputFormat.setWorkOutputPath(jobConf, 
          ((FileOutputCommitter)committer).getTaskAttemptPath(taskAttemptContext));
      } else {
        FileOutputFormat.setWorkOutputPath(jobConf, outputPath);
      }
    }
  }

  public org.apache.hadoop.mapreduce.TaskAttemptContext getTaskAttemptContext() {
    return taskAttemptContext;
  }

  public JobContext getJobContext() {
    return jobContext;
  }

  public TaskAttemptID getTaskAttemptId() {
    return taskAttemptId;
  }

}