java source code of TestGroupedSplits

Project: incubator-tez (GitHub Link)

incubator-tez-master
- NOTICE.txt
- tez-mapreduce
  - src
    - main
      - proto
        MRRuntimeProtos.proto
      - resources
        META-INF
        services
        org.apache.hadoop.mapreduce.protocol.ClientProtocolProvider
      - java
        org
        apache
        hadoop
        mapred
        LocalClientProtocolProviderTez.java
        LocalJobRunnerTez.java
        LocalJobRunnerMetricsTez.java
        split
        TezGroupedSplit.java
        TezMapredSplitsGrouper.java
        TezGroupedSplitsInputFormat.java
        mapreduce
        split
        TezMapReduceSplitsGrouper.java
        SplitMetaInfoReaderTez.java
        TezGroupedSplit.java
        TezGroupedSplitsInputFormat.java
        tez
        common
        MRFrameworkConfigs.java
        TezTaskStatus.java
        mapreduce
        committer
        MROutputCommitter.java
        processor
        MRTaskReporter.java
        MRTask.java
        reduce
        ReduceProcessor.java
        map
        MapProcessor.java
        SimpleMRProcessor.java
        hadoop
        MRHelpers.java
        MRJobConfig.java
        TezTypeConverters.java
        MRConfig.java
        IDConverter.java
        mapred
        MRCounters.java
        JobContextImpl.java
        MRReporter.java
        TaskAttemptContextImpl.java
        InputSplitInfoDisk.java
        MultiStageMRConfToTezTranslator.java
        MultiStageMRConfigUtil.java
        MRTaskStatus.java
        InputSplitInfo.java
        DeprecatedKeys.java
        mapreduce
        JobContextImpl.java
        MapContextImpl.java
        TezNullOutputCommitter.java
        TaskInputOutputContextImpl.java
        TaskAttemptContextImpl.java
        InputSplitInfoMem.java
        client
        ClientServiceDelegate.java
        YARNRunner.java
        ResourceMgrDelegate.java
        YarnTezClientProtocolProvider.java
        ClientCache.java
        NotRunningJob.java
        DAGJobStatus.java
        partition
        MRPartitioner.java
        combine
        MRCombiner.java
        output
        MROutputLegacy.java
        MROutput.java
        common
        Utils.java
        MRInputAMSplitGenerator.java
        MRInputSplitDistributor.java
        lib
        MRInputUtils.java
        MRReaderMapred.java
        MRReader.java
        MRReaderMapReduce.java
        input
        base
        MRInputBase.java
        MRInputLegacy.java
        MRInput.java
        MultiMRInput.java
    - test
      - resources
        log4j.properties
      - java
        org
        apache
        hadoop
        mapred
        split
        TestGroupedSplits.java
        tez
        mapreduce
        processor
        reduce
        TestReduceProcessor.java
        map
        TestMapProcessor.java
        MapUtils.java
        hadoop
        TestMRHelpers.java
        TestDeprecatedKeys.java
        TestConfigTranslationMRToTez.java
        TezTestUtils.java
        common
        TestMRInputSplitDistributor.java
        TestUmbilical.java
        input
        TestMultiMRInput.java
  - pom.xml
  - findbugs-exclude.xml
- tez-tests
  - src
    - test
      - java
        org
        apache
        tez
        test
        FaultToleranceTestRunner.java
        TestDAGRecovery2.java
        TestSecureShuffle.java
        dag
        ThreeLevelsFailingDAG.java
        SixLevelsFailingDAG.java
        TwoLevelsFailingDAG.java
        SimpleReverseVTestDAG.java
        SimpleVTestDAG.java
        MultiAttemptDAG.java
        TestDAGRecovery.java
        SimpleTestDAG.java
        TestDriver.java
        TestTezJobs.java
        TestFaultTolerance.java
        TestInput.java
        TestProcessor.java
        TestOutput.java
        SimpleTestDAG3Vertices.java
        MiniTezCluster.java
        mapreduce
        TestMRRJobsDAGApi.java
        TestMRRJobs.java
  - pom.xml
- CHANGES.txt
- tez-runtime-internals
  - src
    - main
      - proto
        Events.proto
      - java
        org
        apache
        tez
        common
        ProtoConverters.java
        TezLocalResource.java
        TezConverterUtils.java
        TezTaskUmbilicalProtocol.java
        ContainerTask.java
        ContainerContext.java
        runtime
        RuntimeTask.java
        InputReadyTracker.java
        api
        impl
        TezHeartbeatResponse.java
        TezHeartbeatRequest.java
        EventType.java
        TezCountersDelegate.java
        TezProcessorContextImpl.java
        TezEvent.java
        EventMetaData.java
        GroupInputSpec.java
        TaskSpec.java
        TezOutputContextImpl.java
        TezUmbilical.java
        TezInputContextImpl.java
        InputSpec.java
        OutputSpec.java
        TezTaskContextImpl.java
        events
        TaskAttemptCompletedEvent.java
        TaskAttemptFailedEvent.java
        TaskStatusUpdateEvent.java
        common
        security
        JobTokenSelector.java
        resources
        MemoryDistributor.java
        ScalingAllocator.java
        objectregistry
        ObjectRegistryModule.java
        ObjectRegistryImpl.java
        LogicalIOProcessorRuntimeTask.java
        metrics
        FileSystemStatisticUpdater.java
        TaskCounterUpdater.java
        GcTimeUpdater.java
    - test
      - java
        org
        apache
        tez
        runtime
        common
        resources
        TestMemoryDistributor.java
        objectregistry
        TestObjectRegistry.java
        TestLogicalIOProcessorRuntimeTask.java
        TestReflectionUtils.java
        TestInputReadyTracker.java
  - pom.xml
  - findbugs-exclude.xml
- tez-dag
  - src
    - main
      - proto
        HistoryEvents.proto
      - resources
        tez-container-log4j.properties
      - java
        org
        apache
        tez
        dag
        utils
        ProtoUtils.java
        TezBuilderUtils.java
        JavaProfilerOptions.java
        Graph.java
        EnvironmentUpdateUtils.java
        TezRuntimeChildJVM.java
        RelocalizationUtils.java
        api
        client
        VertexStatusBuilder.java
        ProgressBuilder.java
        DAGClientHandler.java
        rpc
        DAGClientAMProtocolBlockingPBServerImpl.java
        DAGClientServer.java
        DAGStatusBuilder.java
        oldrecords
        TaskAttemptState.java
        AMInfo.java
        TaskReport.java
        TaskState.java
        TaskAttemptReport.java
        history
        utils
        DAGUtils.java
        ATSConstants.java
        HistoryEvent.java
        DAGHistoryEvent.java
        HistoryEventType.java
        SummaryEvent.java
        recovery
        RecoveryService.java
        HistoryEventHandler.java
        logging
        impl
        HistoryEventJsonConversion.java
        SimpleHistoryLoggingService.java
        EntityTypes.java
        HistoryLoggingService.java
        events
        DAGCommitStartedEvent.java
        VertexStartedEvent.java
        AMLaunchedEvent.java
        ContainerStoppedEvent.java
        TaskStartedEvent.java
        VertexGroupCommitFinishedEvent.java
        VertexDataMovementEventsGeneratedEvent.java
        VertexGroupCommitStartedEvent.java
        ContainerLaunchedEvent.java
        VertexCommitStartedEvent.java
        VertexInitializedEvent.java
        VertexFinishedEvent.java
        TaskFinishedEvent.java
        DAGFinishedEvent.java
        TaskAttemptStartedEvent.java
        DAGSubmittedEvent.java
        DAGStartedEvent.java
        TaskAttemptFinishedEvent.java
        DAGInitializedEvent.java
        VertexParallelismUpdatedEvent.java
        AMStartedEvent.java
        app
        security
        authorize
        TezAMPolicyProvider.java
        package-info.java
        RecoveryParser.java
        DAGAppMaster.java
        dag
        impl
        TaskImpl.java
        RootInputLeafOutputDescriptor.java
        TezRootInputInitializerContextImpl.java
        OneToOneEdgeManager.java
        DAGSchedulerNaturalOrder.java
        DAGImpl.java
        BroadcastEdgeManager.java
        Edge.java
        TaskReportImpl.java
        RootInputVertexManager.java
        ImmediateStartVertexManager.java
        OutputCommitterContextImpl.java
        ScatterGatherEdgeManager.java
        package-info.java
        VertexManager.java
        TaskAttemptImplHelpers.java
        TaskAttemptImpl.java
        VertexStats.java
        VertexImpl.java
        DAGSchedulerMRR.java
        Task.java
        DAGState.java
        VertexTerminationCause.java
        DAGTerminationCause.java
        Vertex.java
        RootInputInitializerManager.java
        TaskStateInternal.java
        package-info.java
        DAGReport.java
        event
        VertexEventTaskAttemptCompleted.java
        VertexEventSourceVertexStarted.java
        TaskAttemptEventContainerTerminating.java
        DAGAppMasterEvent.java
        VertexEventRootInputFailed.java
        TaskAttemptEvent.java
        TaskEvent.java
        DAGEvent.java
        TaskEventRecoverTask.java
        DAGAppMasterEventDAGFinished.java
        DAGAppMasterEventType.java
        TaskAttemptEventStartedRemotely.java
        DAGEventSchedulerUpdate.java
        TaskAttemptEventNodeFailed.java
        DAGEventRecoverEvent.java
        DiagnosableEvent.java
        TaskAttemptEventSchedule.java
        VertexEventRecoverVertex.java
        VertexEventSourceTaskAttemptCompleted.java
        TaskAttemptEventContainerTerminated.java
        VertexEventOneToOneSourceSplit.java
        TaskAttemptEventContainerTerminatedBySystem.java
        TaskEventType.java
        DAGEventVertexReRunning.java
        DAGEventStartDag.java
        DAGEventSchedulerUpdateTAAssigned.java
        TaskAttemptEventFailRequest.java
        DAGEventCounterUpdate.java
        TaskAttemptEventDiagnosticsUpdate.java
        TaskAttemptEventType.java
        VertexEventRouteEvent.java
        DAGEventType.java
        DAGEventVertexCompleted.java
        package-info.java
        TaskEventAddTezEvent.java
        VertexEventRootInputInitialized.java
        VertexEventTermination.java
        VertexEvent.java
        VertexEventSourceVertexRecovered.java
        DAGEventDiagnosticsUpdate.java
        TaskAttemptEventOutputFailed.java
        TaskAttemptEventKillRequest.java
        VertexEventTaskReschedule.java
        TaskAttemptEventStatusUpdate.java
        VertexEventType.java
        TaskEventTermination.java
        VertexEventNullEdgeInitialized.java
        VertexEventTaskCompleted.java
        TaskAttemptEventAttemptFailed.java
        TaskEventTAUpdate.java
        DAG.java
        TaskTerminationCause.java
        DAGScheduler.java
        TaskAttemptStateInternal.java
        VertexState.java
        TaskAttempt.java
        TaskAttemptListenerImpTezDag.java
        AppContext.java
        ClusterInfo.java
        HeartbeatHandlerBase.java
        package-info.java
        rm
        TaskSchedulerAppCallbackWrapper.java
        ContainerAllocator.java
        AMSchedulerEventTALaunchRequest.java
        NMCommunicatorStopRequestEvent.java
        AMSchedulerEventTAEnded.java
        TezAMRMClientAsync.java
        TaskSchedulerEventHandler.java
        NMCommunicatorLaunchRequestEvent.java
        node
        AMNodeImpl.java
        AMNodeEventNodeCountUpdated.java
        AMNodeEventTaskAttemptEnded.java
        AMNodeMap.java
        AMNodeEventTaskAttemptSucceeded.java
        AMNodeEventType.java
        AMNode.java
        AMNodeEventContainerAllocated.java
        AMNodeState.java
        AMNodeEventStateChanged.java
        AMNodeEvent.java
        AMSchedulerEventType.java
        TaskSchedulerService.java
        package-info.java
        AMSchedulerEventDeallocateContainer.java
        LocalTaskSchedulerService.java
        AMSchedulerEvent.java
        container
        AMContainerMap.java
        AMContainerEventAssignTA.java
        AMContainerImpl.java
        AMContainerEventCompleted.java
        AMContainerEventLaunched.java
        AMContainerEventLaunchRequest.java
        ContainerContextMatcher.java
        AMContainer.java
        AMContainerEvent.java
        AMContainerEventNodeFailed.java
        AMContainerEventStopFailed.java
        AMContainerTask.java
        AMContainerHelpers.java
        AMContainerState.java
        AMContainerEventType.java
        AMContainerEventTASucceeded.java
        ContainerSignatureMatcher.java
        AMContainerEventStopRequest.java
        AMContainerEventLaunchFailed.java
        AMSchedulerEventNodeBlacklistUpdate.java
        NMCommunicatorEvent.java
        NMCommunicatorEventType.java
        YarnTaskSchedulerService.java
        DAGAppMasterState.java
        TaskAttemptListener.java
        TaskHeartbeatHandler.java
        ControlledClock.java
        ContainerContext.java
        ContainerHeartbeatHandler.java
        launcher
        ContainerLauncherImpl.java
        package-info.java
        ContainerLauncher.java
        runtime
        task
        TezTaskRunner.java
        ContainerReporter.java
        ErrorReporter.java
        TezChild.java
        TaskReporter.java
    - test
      - resources
        log4j.properties
      - java
        org
        apache
        tez
        dag
        utils
        TestJavaProfilerOptions.java
        TestEnvironmentUpdateUtils.java
        api
        client
        TestDAGClientHandler.java
        TestVertexStatusBuilder.java
        history
        utils
        TestDAGUtils.java
        logging
        impl
        TestHistoryEventJsonConversion.java
        events
        TestHistoryEventsProtoConversion.java
        app
        dag
        impl
        TestDAGScheduler.java
        TestEdge.java
        TestRootInputVertexManager.java
        TestVertexImpl.java
        TestVertexStats.java
        TestTaskImpl.java
        TestDAGImpl.java
        TestTaskAttempt.java
        rm
        node
        TestAMNodeMap.java
        TestTaskSchedulerHelpers.java
        TestTezAMRMClient.java
        TestTaskSchedulerEventHandler.java
        container
        TestAMContainerMap.java
        TestAMContainer.java
        TestContainerReuse.java
        TestLocalTaskScheduler.java
        TestTaskScheduler.java
        test
        EdgeManagerForTest.java
        VertexManagerPluginForTest.java
        runtime
        task
        TestTaskExecution.java
  - pom.xml
  - findbugs-exclude.xml
- tez-plugins
  - pom.xml
  - tez-yarn-timeline-history
    - src
      - main
        java
        org
        apache
        tez
        dag
        history
        logging
        ats
        HistoryEventTimelineConversion.java
        ATSHistoryLoggingService.java
      - test
        java
        org
        apache
        tez
        dag
        history
        logging
        ats
        TestATSHistoryLoggingService.java
        TestHistoryEventTimelineConversion.java
    - pom.xml
- pom.xml
- tez-api
  - src
    - main
      - proto
        Events.proto
        DAGClientAMProtocol.proto
        DAGApiRecords.proto
      - resources
        META-INF
        services
        org.apache.hadoop.security.SecurityInfo
      - java
        org
        apache
        tez
        dag
        api
        EdgeManagerDescriptor.java
        TezConfiguration.java
        VertexManagerPlugin.java
        VertexManagerPluginContext.java
        EdgeManager.java
        InputDescriptor.java
        client
        DAGClient.java
        DAGStatus.java
        VertexStatus.java
        StatusGetOpts.java
        rpc
        DAGClientAMProtocolBlockingPB.java
        DAGClientRPCImpl.java
        Progress.java
        DagTypeConverters.java
        Edge.java
        DuplicateDAGName.java
        VertexGroup.java
        Vertex.java
        TezException.java
        GroupInputEdge.java
        EdgeProperty.java
        VertexLocationHint.java
        DAGSubmissionTimedOut.java
        TezConstants.java
        TezEntityDescriptor.java
        DAG.java
        ProcessorDescriptor.java
        OutputDescriptor.java
        EdgeManagerContext.java
        TezUncheckedException.java
        VertexManagerPluginDescriptor.java
        SessionNotRunning.java
        RootInputLeafOutput.java
        client
        AMConfiguration.java
        TezAppMasterStatus.java
        TezClient.java
        TezClientUtils.java
        PreWarmContext.java
        common
        security
        Master.java
        JobTokenIdentifier.java
        TokenCache.java
        DAGClientSecurityInfo.java
        JobTokenSecretManager.java
        TezYARNUtils.java
        impl
        LogUtils.java
        counters
        TezCounter.java
        GenericCounter.java
        CounterGroupFactory.java
        AbstractCounters.java
        CounterGroupBase.java
        CounterGroup.java
        FileSystemCounter.java
        JobCounter.java
        AbstractCounterGroup.java
        AbstractCounter.java
        ResourceBundles.java
        Limits.java
        FileSystemCounterGroup.java
        DAGCounter.java
        LimitExceededException.java
        FrameworkCounterGroup.java
        TezCounters.java
        TaskCounter.java
        TezCommonUtils.java
        TezJobConfig.java
        TezUserPayload.java
        runtime
        api
        Reader.java
        LogicalIOProcessor.java
        LogicalInput.java
        InputReadyCallback.java
        TezOutputContext.java
        Event.java
        Input.java
        Writer.java
        TezInputContext.java
        OutputCommitter.java
        AbstractLogicalIOProcessor.java
        TezTaskContext.java
        Processor.java
        Output.java
        AbstractLogicalInput.java
        RootInputSpecUpdate.java
        TezRootInputInitializer.java
        AbstractLogicalOutput.java
        TezRootInputInitializerContext.java
        MemoryUpdateCallback.java
        MergedLogicalInput.java
        LogicalOutput.java
        TezProcessorContext.java
        OutputCommitterContext.java
        events
        RootInputConfigureVertexTasksEvent.java
        RootInputDataInformationEvent.java
        VertexManagerEvent.java
        CompositeDataMovementEvent.java
        RootInputInitializerEvent.java
        InputFailedEvent.java
        RootInputUpdatePayloadEvent.java
        DataMovementEvent.java
        InputReadErrorEvent.java
        common
        resources
        InitialMemoryRequestContext.java
        InitialMemoryAllocator.java
        objectregistry
        ObjectRegistry.java
        ObjectLifeCycle.java
        ObjectRegistryFactory.java
    - test
      - java
        org
        apache
        tez
        dag
        api
        TestDagTypeConverters.java
        client
        rpc
        TestDAGClient.java
        TestDAGVerify.java
        TestDAGPlan.java
        client
        TestTezClientUtils.java
        TestTezClient.java
        common
        security
        TestTokenCache.java
        TestTezCommonUtils.java
        TestTezJobConfig.java
        runtime
        api
        event
        TestCompositeDataMovementEvent.java
  - pom.xml
  - findbugs-exclude.xml
- BUILDING.txt
- tez-common
  - src
    - main
      - java
        org
        apache
        tez
        dag
        records
        TezTaskID.java
        TezDAGID.java
        TezTaskAttemptID.java
        TezVertexID.java
        TezID.java
        common
        ReflectionUtils.java
        TezContainerLogAppender.java
        TezUtils.java
    - test
      - resources
        log4j.properties
      - java
        org
        apache
        tez
        dag
        records
        TestTezIds.java
        common
        TestTezUtils.java
  - pom.xml
  - findbugs-exclude.xml
- tez-mapreduce-examples
  - src
    - main
      - java
        org
        apache
        tez
        processor
        FilterByWordOutputProcessor.java
        FilterByWordInputProcessor.java
        mapreduce
        examples
        MRRSleepJob.java
        WordCount.java
        BroadcastAndOneToOneExample.java
        helpers
        SplitsInClientOptionParser.java
        FilterLinesByWordOneToOne.java
        SecondarySort.java
        Sort.java
        IntersectExample.java
        IntersectValidate.java
        UnionExample.java
        RandomTextWriter.java
        Join.java
        OrderedWordCount.java
        terasort
        Unsigned16.java
        TeraSort.java
        job_history_summary.py
        TeraChecksum.java
        TeraInputFormat.java
        2009-write-up
        Yahoo2009.tex
        tera.bib
        .gitignore
        GenSort.java
        Random16.java
        TeraGen.java
        TeraOutputFormat.java
        TeraValidate.java
        package.html
        TeraScheduler.java
        IntersectDataGen.java
        RandomWriter.java
        MapredWordCount.java
        FilterLinesByWord.java
        package.html
        GroupByOrderByMRRTest.java
        ExampleDriver.java
    - test
      - java
        org
        apache
        tez
        mapreduce
        examples
        terasort
        TestTeraSort.java
  - pom.xml
  - findbugs-exclude.xml
- INSTALL.txt
- README.md
- DISCLAIMER.txt
- KEYS
- tez-runtime-library
  - src
    - main
      - proto
        ShufflePayloads.proto
      - java
        org
        apache
        hadoop
        io
        BufferUtils.java
        HashComparator.java
        tez
        dag
        library
        vertexmanager
        ShuffleVertexManager.java
        InputReadyVertexManager.java
        common
        TezRuntimeFrameworkConfigs.java
        runtime
        library
        resources
        WeightedScalingMemoryDistributor.java
        processor
        SleepProcessor.java
        SimpleProcessor.java
        hadoop
        compat
        NullProgressable.java
        output
        LocalOnFileSorterOutput.java
        OnFileUnorderedPartitionedKVOutput.java
        OnFileSortedOutput.java
        OnFileUnorderedKVOutput.java
        api
        KeyValueWriter.java
        KeyValuesReader.java
        KeyValuesWriter.java
        KeyValueReader.java
        Partitioner.java
        common
        security
        SecureShuffleUtils.java
        task
        impl
        ValuesIterator.java
        local
        output
        TezLocalTaskOutputFiles.java
        TezTaskOutput.java
        TezTaskOutputFiles.java
        ConfigUtils.java
        ValuesIterator.java
        TezRuntimeUtils.java
        combine
        Combiner.java
        InputAttemptIdentifier.java
        sort
        impl
        TezSpillRecord.java
        TezRawKeyValueIterator.java
        dflt
        DefaultSorter.java
        PipelinedSorter.java
        IFileOutputStream.java
        ExternalSorter.java
        TezIndexRecord.java
        IFile.java
        TezMerger.java
        IFileInputStream.java
        InputIdentifier.java
        Constants.java
        localshuffle
        LocalShuffle.java
        writers
        BaseUnorderedPartitionedKVWriter.java
        UnorderedPartitionedKVWriter.java
        YARNMaster.java
        readers
        ShuffledUnorderedKVReader.java
        MemoryUpdateCallbackHandler.java
        shuffle
        impl
        ShuffleInputEventHandler.java
        Shuffle.java
        ShuffleClientMetrics.java
        InMemoryReader.java
        MapHost.java
        ShuffleScheduler.java
        Fetcher.java
        ShuffleHeader.java
        InMemoryWriter.java
        ExceptionReporter.java
        MergeManager.java
        MergeThread.java
        MapOutput.java
        broadcast
        output
        FileBasedKVWriter.java
        partitioner
        HashPartitioner.java
        exceptions
        InputAlreadyClosedException.java
        shuffle
        common
        impl
        SimpleFetchedInputAllocator.java
        ShuffleManager.java
        ShuffleInputEventHandlerImpl.java
        InputHost.java
        FetchedInputAllocator.java
        DiskFetchedInput.java
        FetcherCallback.java
        FetchResult.java
        ShuffleEventHandler.java
        ShuffleUtils.java
        Fetcher.java
        MemoryFetchedInput.java
        FetchedInput.java
        HttpConnection.java
        FetchedInputCallback.java
        input
        ShuffledMergedInput.java
        ConcatenatedMergedKeyValuesInput.java
        SortedGroupedMergedInput.java
        ShuffledUnorderedKVInput.java
        ShuffledMergedInputLegacy.java
        LocalMergedInput.java
        ConcatenatedMergedKeyValueInput.java
        conf
        ShuffledUnorderedKVInputConfiguration.java
        OnFileUnorderedPartitionedKVOutputConfiguration.java
        BaseConfigurer.java
        ShuffledMergedInputConfiguration.java
        HadoopKeyValuesBasedBaseConf.java
        OnFileSortedOutputConfiguration.java
        UnorderedUnpartitionedKVEdgeConfigurer.java
        OrderedPartitionedKVEdgeConfigurer.java
        UnorderedPartitionedKVEdgeConfigurer.java
        OnFileUnorderedKVOutputConfiguration.java
    - test
      - resources
        tez-site.xml
        log4j.properties
      - java
        org
        apache
        tez
        dag
        library
        vertexmanager
        TestInputReadyVertexManager.java
        TestShuffleVertexManager.java
        runtime
        library
        testutils
        KVDataGen.java
        output
        TestOnFileUnorderedKVOutput.java
        common
        sort
        impl
        TestIFile.java
        writers
        TestUnorderedPartitionedKVWriter.java
        TestInputIdentifiers.java
        shuffle
        common
        impl
        TestSimpleFetchedInputAllocator.java
        TestShuffleInputEventHandlerImpl.java
        input
        TestSortedGroupedMergedInput.java
        conf
        TestUnorderedPartitionedKVEdgeConfigurer.java
        TestOrderedPartitionedKVEdgeConfigurer.java
        TestShuffledUnorderedKVInputConfiguration.java
        TestShuffledMergedInputConfiguration.java
        TestOnFileSortedOutputConfiguration.java
        TestOnFileUnorderedPartitionedKVOutput.java
        TestOnFileUnorderedKVOutputConfiguration.java
        TestUnorderedUnpartitionedKVEdgeConfigurer.java
        common
        resources
        TestWeightedScalingMemoryDistributor.java
  - pom.xml
- .gitignore
- docs
  - src
    - site
      - resources
        images
      - site.xml
      - apt
        talks.apt
        privacy-policy.apt
        install.apt
        index.apt
  - pom.xml
- LICENSE.txt
- tez-dist
  - src
    - main
      - assembly
        tez-dist-full.xml
        tez-dist.xml
  - pom.xml

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapred.split;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import java.util.Random;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.tez.dag.api.TezConfiguration;
import org.junit.Assert;
import org.junit.Test;

import com.google.common.collect.Sets;

import static org.mockito.Mockito.*;

public class TestGroupedSplits {
  private static final Log LOG =
    LogFactory.getLog(TestGroupedSplits.class);

  private static JobConf defaultConf = new JobConf();
  private static FileSystem localFs = null;

  static {
    try {
      defaultConf.set("fs.defaultFS", "file:///");
      localFs = FileSystem.getLocal(defaultConf);
    } catch (IOException e) {
      throw new RuntimeException("init failure", e);
    }
  }

  @SuppressWarnings("deprecation")
  private static Path workDir =
    new Path(new Path(System.getProperty("test.build.data", "/tmp")),
             "TestCombineTextInputFormat").makeQualified(localFs);

  // A reporter that does nothing
  private static final Reporter voidReporter = Reporter.NULL;

  //@Test(timeout=10000)
  public void testFormat() throws Exception {
    JobConf job = new JobConf(defaultConf);

    Random random = new Random();
    long seed = random.nextLong();
    LOG.info("seed = "+seed);
    random.setSeed(seed);

    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, workDir);

    final int length = 10000;
    final int numFiles = 10;

    createFiles(length, numFiles, random);

    // create a combined split for the files
    TextInputFormat wrappedFormat = new TextInputFormat();
    wrappedFormat.configure(job);
    TezGroupedSplitsInputFormat<LongWritable , Text> format = 
        new TezGroupedSplitsInputFormat<LongWritable, Text>();
    format.setConf(job);
    format.setDesiredNumberOfSplits(1);
    format.setInputFormat(wrappedFormat);
    LongWritable key = new LongWritable();
    Text value = new Text();
    for (int i = 0; i < 3; i++) {
      int numSplits = random.nextInt(length/20)+1;
      LOG.info("splitting: requesting = " + numSplits);
      InputSplit[] splits = format.getSplits(job, numSplits);
      LOG.info("splitting: got =        " + splits.length);

      // we should have a single split as the length is comfortably smaller than
      // the block size
      Assert.assertEquals("We got more than one splits!", 1, splits.length);
      InputSplit split = splits[0];
      Assert.assertEquals("It should be TezGroupedSplit",
        TezGroupedSplit.class, split.getClass());

      // check the split
      BitSet bits = new BitSet(length);
      LOG.debug("split= " + split);
      RecordReader<LongWritable, Text> reader =
        format.getRecordReader(split, job, voidReporter);
      try {
        int count = 0;
        while (reader.next(key, value)) {
          int v = Integer.parseInt(value.toString());
          LOG.debug("read " + v);
          if (bits.get(v)) {
            LOG.warn("conflict with " + v +
                     " at position "+reader.getPos());
          }
          Assert.assertFalse("Key in multiple partitions.", bits.get(v));
          bits.set(v);
          count++;
        }
        LOG.info("splits="+split+" count=" + count);
      } finally {
        reader.close();
      }
      Assert.assertEquals("Some keys in no partition.", length, bits.cardinality());
    }
  }

  private static class Range {
    private final int start;
    private final int end;

    Range(int start, int end) {
      this.start = start;
      this.end = end;
    }

    @Override
    public String toString() {
      return "(" + start + ", " + end + ")";
    }
  }

  private static Range[] createRanges(int length, int numFiles, Random random) {
    // generate a number of files with various lengths
    Range[] ranges = new Range[numFiles];
    for (int i = 0; i < numFiles; i++) {
      int start = i == 0 ? 0 : ranges[i-1].end;
      int end = i == numFiles - 1 ?
        length :
        (length/numFiles)*(2*i + 1)/2 + random.nextInt(length/numFiles) + 1;
      ranges[i] = new Range(start, end);
    }
    return ranges;
  }

  private static void createFiles(int length, int numFiles, Random random)
    throws IOException {
    Range[] ranges = createRanges(length, numFiles, random);

    for (int i = 0; i < numFiles; i++) {
      Path file = new Path(workDir, "test_" + i + ".txt");
      Writer writer = new OutputStreamWriter(localFs.create(file));
      Range range = ranges[i];
      try {
        for (int j = range.start; j < range.end; j++) {
          writer.write(Integer.toString(j));
          writer.write("\n");
        }
      } finally {
        writer.close();
      }
    }
  }

  private static void writeFile(FileSystem fs, Path name,
                                CompressionCodec codec,
                                String contents) throws IOException {
    OutputStream stm;
    if (codec == null) {
      stm = fs.create(name);
    } else {
      stm = codec.createOutputStream(fs.create(name));
    }
    stm.write(contents.getBytes());
    stm.close();
  }

  private static List<Text> readSplit(InputFormat<LongWritable,Text> format,
                                      InputSplit split,
                                      JobConf job) throws IOException {
    List<Text> result = new ArrayList<Text>();
    RecordReader<LongWritable, Text> reader =
      format.getRecordReader(split, job, voidReporter);
    LongWritable key = reader.createKey();
    Text value = reader.createValue();
    while (reader.next(key, value)) {
      result.add(value);
      value = reader.createValue();
    }
    reader.close();
    return result;
  }

  /**
   * Test using the gzip codec for reading
   */
  //@Test(timeout=10000)
  public void testGzip() throws IOException {
    JobConf job = new JobConf(defaultConf);
    CompressionCodec gzip = new GzipCodec();
    ReflectionUtils.setConf(gzip, job);
    localFs.delete(workDir, true);
    writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip,
              "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
    writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
              "is\ngzip\n");
    writeFile(localFs, new Path(workDir, "part3.txt.gz"), gzip,
        "one\nmore\nsplit\n");
    FileInputFormat.setInputPaths(job, workDir);
    TextInputFormat wrappedFormat = new TextInputFormat();
    wrappedFormat.configure(job);
    TezGroupedSplitsInputFormat<LongWritable , Text> format = 
        new TezGroupedSplitsInputFormat<LongWritable, Text>();
    format.setConf(job);
    format.setInputFormat(wrappedFormat);
    
    // TextInputFormat will produce 3 splits
    for (int j=1; j<=3; ++j) {
      format.setDesiredNumberOfSplits(j);
      InputSplit[] splits = format.getSplits(job, 100);
      if (j==1 || j==3) {
        // j==1 covers single split corner case
        // j==3 cases exercises the code where desired == actual
        // and does not do grouping
        Assert.assertEquals("compressed splits == " + j, j, splits.length);
      }
      List<Text> results = new ArrayList<Text>();
      for (int i=0; i<splits.length; ++i) { 
        List<Text> read = readSplit(format, splits[i], job);
        results.addAll(read);
      }
      Assert.assertEquals("splits length", 11, results.size());
  
      final String[] firstList =
        {"the quick", "brown", "fox jumped", "over", " the lazy", " dog"};
      final String[] secondList = {"is", "gzip"};
      final String[] thirdList = {"one", "more", "split"};
      String first = results.get(0).toString();
      int start = 0;
      switch (first.charAt(0)) {
      case 't':
        start = testResults(results, firstList, start);
        break;
      case 'i':
        start = testResults(results, secondList, start);
        break;
      case 'o':
        start = testResults(results, thirdList, start);
        break;
      default:
        Assert.fail("unexpected first token - " + first);
      }
    }
  }

  private static int testResults(List<Text> results, String[] first, int start) {
    for (int i = 0; i < first.length; i++) {
      Assert.assertEquals("splits["+i+"]", first[i], results.get(start+i).toString());
    }
    return first.length+start;
  }  
  
  @SuppressWarnings({ "rawtypes", "unchecked" })
  @Test(timeout=10000)
  public void testGroupedSplitSize() throws IOException {
    JobConf job = new JobConf(defaultConf);
    InputFormat mockWrappedFormat = mock(InputFormat.class);
    TezGroupedSplitsInputFormat<LongWritable , Text> format = 
        new TezGroupedSplitsInputFormat<LongWritable, Text>();
    format.setConf(job);
    format.setInputFormat(mockWrappedFormat);
    
    job.setLong(TezConfiguration.TEZ_AM_GROUPING_SPLIT_MAX_SIZE, 500*1000*1000l);
    job.setLong(TezConfiguration.TEZ_AM_GROUPING_SPLIT_MIN_SIZE, 50*1000*1000l);
    InputSplit mockSplit1 = mock(InputSplit.class);
    when(mockSplit1.getLength()).thenReturn(10*1000*1000l);
    when(mockSplit1.getLocations()).thenReturn(null);
    int numSplits = 100;
    InputSplit[] mockSplits = new InputSplit[numSplits];
    for (int i=0; i<numSplits; i++) {
      mockSplits[i] = mockSplit1;
    }
    when(mockWrappedFormat.getSplits((JobConf)anyObject(), anyInt())).thenReturn(mockSplits);
    
    // desired splits not set. We end up choosing min/max split size based on 
    // total data and num original splits. In this case, min size will be hit
    InputSplit[] splits = format.getSplits(job, 0);
    Assert.assertEquals(25, splits.length);
    
    // split too big. override with max
    format.setDesiredNumberOfSplits(1);
    splits = format.getSplits(job, 0);
    Assert.assertEquals(4, splits.length);
    
    // splits too small. override with min
    format.setDesiredNumberOfSplits(1000);
    splits = format.getSplits(job, 0);
    Assert.assertEquals(25, splits.length);
    
  }

  @SuppressWarnings({ "rawtypes", "unchecked" })
  @Test(timeout=10000)
  public void testGroupedSplitWithDuplicates() throws IOException {
    JobConf job = new JobConf(defaultConf);
    InputFormat mockWrappedFormat = mock(InputFormat.class);
    TezGroupedSplitsInputFormat<LongWritable , Text> format = 
        new TezGroupedSplitsInputFormat<LongWritable, Text>();
    format.setConf(job);
    format.setInputFormat(mockWrappedFormat);
    
    // put multiple splits with multiple copies in the same location
    String[] locations = {"common", "common", "common"};
    int numSplits = 3;
    InputSplit[] mockSplits = new InputSplit[numSplits];
    for (int i=0; i<numSplits; i++) {
      InputSplit mockSplit = mock(InputSplit.class);
      when(mockSplit.getLength()).thenReturn(10*1000*1000l);
      when(mockSplit.getLocations()).thenReturn(locations);
      mockSplits[i] = mockSplit;
    }
    when(mockWrappedFormat.getSplits((JobConf)anyObject(), anyInt())).thenReturn(mockSplits);
    
    format.setDesiredNumberOfSplits(1);
    InputSplit[] splits = format.getSplits(job, 1);
    Assert.assertEquals(1, splits.length);
    TezGroupedSplit split = (TezGroupedSplit) splits[0];
    // all 3 splits are present
    Assert.assertEquals(numSplits, split.wrappedSplits.size());
    Set<InputSplit> splitSet = Sets.newHashSet(split.wrappedSplits);
    Assert.assertEquals(numSplits, splitSet.size());
  }
  
  @SuppressWarnings({ "rawtypes", "unchecked" })
  @Test(timeout=10000)
  public void testGroupedSplitWithBadLocations() throws IOException {
    JobConf job = new JobConf(defaultConf);
    InputFormat mockWrappedFormat = mock(InputFormat.class);
    TezGroupedSplitsInputFormat<LongWritable , Text> format = 
        new TezGroupedSplitsInputFormat<LongWritable, Text>();
    format.setConf(job);
    format.setInputFormat(mockWrappedFormat);
    
    // put multiple splits with multiple copies in the same location
    int numSplits = 3;
    InputSplit[] mockSplits = new InputSplit[numSplits];
    InputSplit mockSplit1 = mock(InputSplit.class);
    when(mockSplit1.getLength()).thenReturn(10*1000*1000l);
    when(mockSplit1.getLocations()).thenReturn(null);
    mockSplits[0] = mockSplit1;
    InputSplit mockSplit2 = mock(InputSplit.class);
    when(mockSplit2.getLength()).thenReturn(10*1000*1000l);
    when(mockSplit2.getLocations()).thenReturn(new String[] {null});
    mockSplits[1] = mockSplit2;
    InputSplit mockSplit3 = mock(InputSplit.class);
    when(mockSplit3.getLength()).thenReturn(10*1000*1000l);
    when(mockSplit3.getLocations()).thenReturn(new String[] {null, null});
    mockSplits[2] = mockSplit3;

    when(mockWrappedFormat.getSplits((JobConf)anyObject(), anyInt())).thenReturn(mockSplits);
    
    format.setDesiredNumberOfSplits(1);
    InputSplit[] splits = format.getSplits(job, 1);
    Assert.assertEquals(1, splits.length);
    TezGroupedSplit split = (TezGroupedSplit) splits[0];
    // all 3 splits are present
    Assert.assertEquals(numSplits, split.wrappedSplits.size());
    ByteArrayOutputStream bOut = new ByteArrayOutputStream();
    split.write(new DataOutputStream(bOut));
  }

}