java source code of DTFile

Project: attic-apex-malhar (GitHub Link)

attic-apex-malhar-master
- sql
  - src
    - main
      - java
        org
        apache
        apex
        malhar
        sql
        table
        KafkaEndpoint.java
        Endpoint.java
        StreamEndpoint.java
        MessageFormat.java
        FileEndpoint.java
        CSVMessageFormat.java
        SQLExecEnvironment.java
        planner
        RelNodeVisitor.java
        ApexRelNode.java
        RelInfo.java
        operators
        OperatorUtils.java
        LineReader.java
        FilterTransformOperator.java
        InnerJoinOperator.java
        codegen
        ExpressionCompiler.java
        BeanClassGenerator.java
        schema
        TupleSchemaRegistry.java
        ApexSQLTableFactory.java
        ApexSQLTable.java
    - test
      - resources
        log4j.properties
        model
        model_file_csv.json
        input.csv
      - java
        org
        apache
        apex
        malhar
        sql
        KafkaEndpointTest.java
        StreamEndpointTest.java
        SerDeTest.java
        FileEndpointTest.java
        codegen
        BeanClassGeneratorTest.java
        OutputPOJO.java
        InputPOJO.java
  - pom.xml
- kafka
  - kafka09
    - src
      - main
        java
        org
        apache
        apex
        malhar
        kafka
        KafkaSinglePortInputOperator.java
        KafkaSinglePortExactlyOnceOutputOperator.java
        KafkaConsumer09.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        malhar
        kafka
        KafkaTestPartitioner.java
        KafkaInputOperatorTest.java
        KafkaHelper.java
        KafkaOperatorTestBase.java
        KafkaOutputOperatorTest.java
        KafkaConsumerPropertiesTest.java
        EmbeddedKafka.java
        KafkaTestProducer.java
    - pom.xml
    - XmlJavadocCommentsExtractor.xsl
  - pom.xml
  - kafka010
    - src
      - main
        java
        org
        apache
        apex
        malhar
        kafka
        KafkaConsumer010.java
        KafkaSinglePortInputOperator.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        malhar
        kafka
        KafkaTestPartitioner.java
        KafkaInputOperatorTest.java
        KafkaOperatorTestBase.java
        KafkaConsumerPropertiesTest.java
        KafkaTestProducer.java
    - pom.xml
    - XmlJavadocCommentsExtractor.xsl
  - kafka-common
    - src
      - main
        java
        org
        apache
        apex
        malhar
        kafka
        KafkaConsumerWrapper.java
        OneToOnePartitioner.java
        AbstractKafkaInputOperator.java
        AbstractKafkaOutputOperator.java
        AbstractKafkaPartitioner.java
        KafkaPartition.java
        AbstractKafkaConsumer.java
        PartitionStrategy.java
        KafkaMetrics.java
        OneToManyPartitioner.java
        KafkaSinglePortOutputOperator.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        malhar
        kafka
        AbstractKafkaConsumerPropertiesTest.java
    - pom.xml
    - XmlJavadocCommentsExtractor.xsl
- flume
  - src
    - main
      - resources
        flume-conf
        flume-env.sample.sh
        flume-conf.sample.properties
      - java
        org
        apache
        apex
        malhar
        flume
        source
        TestSource.java
        HdfsTestSource.java
        storage
        HDFSStorage.java
        ErrorMaskingEventCodec.java
        Storage.java
        EventCodec.java
        DebugWrapper.java
        discovery
        Discovery.java
        ZKAssistedDiscovery.java
        operator
        AbstractFlumeInputOperator.java
        sink
        Server.java
        FlumeSink.java
        interceptor
        ColumnFilteringFormattingInterceptor.java
        ColumnFilteringInterceptor.java
    - test
      - resources
        flume
        conf
        flume-conf.properties
        flume_simple.conf
        flume-env.sh
        flume_zkdiscovery.conf
        log4j.properties
        test_data
        gentxns
      - java
        org
        apache
        apex
        malhar
        flume
        storage
        HDFSStoragePerformance.java
        HDFSStorageMatching.java
        HDFSStoragePerformanceTest.java
        HDFSStorageTest.java
        integration
        ApplicationTest.java
        ApplicationDiscoveryTest.java
        discovery
        ZKAssistedDiscoveryTest.java
        operator
        AbstractFlumeInputOperatorTest.java
        sink
        FlumeSinkTest.java
        ServerTest.java
        interceptor
        RawEvent.java
        ColumnFilteringInterceptorTest.java
        ColumnFilteringFormattingInterceptorTest.java
        InterceptorTestHelper.java
  - pom.xml
  - README.md
- library
  - src
    - main
      - java
        org
        apache
        hadoop
        io
        file
        tfile
        DTFile.java
        ReusableByteArrayInputStream.java
        DTBCFile.java
        package-info.java
        CacheManager.java
        apex
        malhar
        lib
        xml
        AbstractXmlDOMOperator.java
        multiwindow
        SimpleMovingAverageObject.java
        AbstractSlidingWindow.java
        MultiWindowSumKeyVal.java
        MultiWindowRangeKeyVal.java
        package-info.java
        SortedMovingWindow.java
        AbstractSlidingWindowKeyVal.java
        SimpleMovingAverage.java
        script
        JavaScriptOperator.java
        package-info.java
        ScriptOperator.java
        statistics
        StandardDeviation.java
        ModeOperator.java
        package-info.java
        WeightedMeanOperator.java
        MedianOperator.java
        utils
        ByteArrayClassLoader.java
        FileContextUtils.java
        TupleUtil.java
        package-info.java
        ClassLoaderUtils.java
        IOUtils.java
        serde
        DefaultBlockReleaseStrategy.java
        TimeWindowSerde.java
        GenericSerde.java
        AffixKeyValueSerdeManager.java
        WindowCompleteListener.java
        Serde.java
        BlockReleaseStrategy.java
        WindowedBlockStream.java
        ImmutablePairSerde.java
        ArraySerde.java
        CollectionSerde.java
        AffixSerde.java
        KeyValueByteStreamProvider.java
        PassThruSliceSerde.java
        Block.java
        KeyValueSerdeManager.java
        PairSerde.java
        LongSerde.java
        StringSerde.java
        SliceUtils.java
        package-info.java
        BlockStream.java
        BufferSlice.java
        SerializationBuffer.java
        IntSerde.java
        PubSubHelper.java
        KryoJavaSerializer.java
        codec
        JavaSerializationStreamCodec.java
        KryoSerializableStreamCodec.java
        KryoJdkContainer.java
        package-info.java
        fs
        s3
        S3Reconciler.java
        S3OutputModule.java
        S3RecordReaderModule.java
        S3InitiateFileUploadOperator.java
        S3RecordReader.java
        S3ReconcilerQueuePartitioner.java
        S3BlockUploadOperator.java
        S3FileMerger.java
        S3TupleOutputModule.java
        FSRecordReader.java
        FSRecordCompactionOperator.java
        GenericFileOutputOperator.java
        LineByLineFileInputOperator.java
        FSRecordReaderModule.java
        stream
        StreamDuplicater.java
        StreamMerger.java
        DevNullCounter.java
        DevNull.java
        RoundRobinHashMap.java
        JsonByteArrayOperator.java
        KeyValPairToHashMap.java
        ArrayListToItem.java
        package-info.java
        HashMapToKeyValPair.java
        AbstractAggregator.java
        ConsolidatorKeyVal.java
        ArrayListAggregator.java
        Counter.java
        db
        AbstractAggregateTransactionableStoreOutputOperator.java
        AbstractStoreOutputOperator.java
        jdbc
        JdbcFieldInfo.java
        AbstractJdbcTransactionableOutputOperator.java
        JdbcPOJOInsertOutputOperator.java
        JdbcStore.java
        AbstractJdbcInputOperator.java
        JdbcPOJONonInsertOutputOperator.java
        AbstractJdbcNonTransactionableBatchOutputOperator.java
        JdbcPOJOPollInputOperator.java
        package-info.java
        JdbcTransactionalStore.java
        JDBCLookupCacheBackedOperator.java
        AbstractJdbcNonTransactionableOutputOperator.java
        AbstractJdbcPOJOOutputOperator.java
        JdbcPOJOInputOperator.java
        JdbcPollInputOperator.java
        JDBCDimensionalOutputOperator.java
        JdbcNonTransactionalStore.java
        AbstractJdbcPollInputOperator.java
        AbstractPassThruTransactionableStoreOutputOperator.java
        TransactionableKeyValueStore.java
        Connectable.java
        AbstractStoreInputOperator.java
        KeyValueStore.java
        AbstractAggregateTransactionableKeyValueStoreOutputOperator.java
        package-info.java
        TransactionableStore.java
        AbstractKeyValueStoreInputOperator.java
        cache
        CacheStore.java
        package-info.java
        CacheManager.java
        AbstractDBLookupCacheBackedOperator.java
        Transactionable.java
        AbstractPassThruTransactionableKeyValueStoreOutputOperator.java
        AbstractTransactionableStoreOutputOperator.java
        AbstractBatchTransactionableStoreOutputOperator.java
        redshift
        RedshiftJdbcTransactionableOutputOperator.java
        RedshiftOutputModule.java
        bandwidth
        BandwidthManager.java
        BandwidthLimitingOperator.java
        BandwidthPartitioner.java
        state
        TimeSlicedBucketedState.java
        managed
        ManagedState.java
        StateTracker.java
        UnboundedTimeBucketAssigner.java
        TimeExtractor.java
        ManagedTimeStateMultiValue.java
        ManagedStateContext.java
        SliceBloomFilter.java
        IncrementalCheckpointManager.java
        BucketsFileSystem.java
        BucketProvider.java
        Bucket.java
        package-info.java
        MovingBoundaryTimeBucketAssigner.java
        ManagedStateComponent.java
        ManagedTimeUnifiedStateImpl.java
        ManagedTimeStateImpl.java
        TimeBucketAssigner.java
        AbstractManagedStateImpl.java
        ManagedStateImpl.java
        BucketedState.java
        package-info.java
        spillable
        SpillableArrayListMultimapImpl.java
        inmem
        InMemSpillableStateStore.java
        SpillableArrayListImpl.java
        SpillableIdentifierGenerator.java
        SequentialSpillableIdentifierGenerator.java
        managed
        ManagedTimeUnifiedStateSpillableStateStore.java
        ManagedStateSpillableStateStore.java
        TimeBasedPriorityQueue.java
        Spillable.java
        SpillableComplexComponentImpl.java
        package-info.java
        WindowBoundedMapCache.java
        SpillableMapImpl.java
        SpillableSetMultimapImpl.java
        SpillableSetImpl.java
        WindowListener.java
        SpillableComplexComponent.java
        SpillableStateStore.java
        function
        FunctionOperatorUtil.java
        FunctionOperator.java
        Function.java
        AnnonymousClassModifier.java
        formatter
        XmlFormatter.java
        Formatter.java
        JsonFormatter.java
        testbench
        RandomEventGenerator.java
        HttpStatusFilter.java
        EventClassifier.java
        RedisSumOper.java
        CountTestSink.java
        CollectorTestSink.java
        TopOccurrence.java
        EventClassifierNumberToHashDouble.java
        CountAndLastTupleTestSink.java
        SeedEventClassifier.java
        package-info.java
        ThroughputCounter.java
        FilterClassifier.java
        HashTestSink.java
        KeyValSum.java
        SeedEventGenerator.java
        CountOccurance.java
        SumTestSink.java
        ArrayListTestSink.java
        EventGenerator.java
        FilteredEventClassifier.java
        CompareFilterTuples.java
        EventIncrementer.java
        RandomWordGenerator.java
        fileaccess
        TFileReader.java
        FileAccessFSImpl.java
        TFileImpl.java
        TFileWriter.java
        package-info.java
        DTFileReader.java
        FileAccess.java
        counters
        package-info.java
        BasicCounters.java
        window
        impl
        WindowedOperatorImpl.java
        WindowedMergeOperatorFeatures.java
        SpillableWindowedKeyedStorage.java
        InMemoryWindowedStorage.java
        WatermarkImpl.java
        SpillableWindowedPlainStorage.java
        FixedDiffProcessingTimeWatermarkGen.java
        WindowKeyPairTimeExtractor.java
        KeyedWindowedOperatorImpl.java
        AbstractWindowedOperator.java
        package-info.java
        InMemorySessionWindowedStorage.java
        WindowTimeExtractor.java
        KeyedWindowedMergeOperatorImpl.java
        FixedDiffEventTimeWatermarkGen.java
        WindowedMergeOperatorImpl.java
        InMemoryWindowedKeyedStorage.java
        SpillableSessionWindowedStorage.java
        MergeAccumulation.java
        ControlTuple.java
        TriggerOption.java
        Tuple.java
        accumulation
        SumLong.java
        SumFloat.java
        Min.java
        SumInt.java
        Sort.java
        Max.java
        FoldFn.java
        Count.java
        AbstractPojoJoin.java
        SumDouble.java
        PojoLeftOuterJoin.java
        PojoRightOuterJoin.java
        RemoveDuplicates.java
        package-info.java
        ReduceFn.java
        CoGroup.java
        TopNByKey.java
        PojoFullOuterJoin.java
        PojoInnerJoin.java
        InnerJoin.java
        TopN.java
        CompositeAccumulation.java
        Group.java
        Average.java
        WindowedStorage.java
        WindowState.java
        WindowOption.java
        WindowedMergeOperator.java
        Accumulation.java
        package-info.java
        SessionWindowedStorage.java
        WindowedOperator.java
        Window.java
        ImplicitWatermarkGenerator.java
        dimensions
        DimensionsEvent.java
        DimensionsDescriptor.java
        CustomTimeBucketRegistry.java
        aggregator
        AggregateEvent.java
        CompositeAggregatorFactory.java
        AggregatorTop.java
        IncrementalAggregator.java
        AggregatorCumSum.java
        AggregatorUtils.java
        TopBottomAggregatorFactory.java
        AbstractCompositeAggregator.java
        AggregatorLast.java
        AggregatorBottom.java
        AggregatorIncrementalType.java
        AbstractIncrementalAggregator.java
        CompositeAggregator.java
        AggregatorAverage.java
        AggregatorTopBottomType.java
        AggregatorRegistry.java
        DefaultCompositeAggregatorFactory.java
        AggregatorMax.java
        AggregatorMin.java
        AbstractCompositeAggregatorFactory.java
        AggregatorOTFType.java
        AggregatorSum.java
        AggregatorCount.java
        AbstractTopBottomAggregator.java
        OTFAggregator.java
        AggregatorFirst.java
        DimensionsConversionContext.java
        package-info.java
        util
        BaseMatchOperator.java
        UnifierCountOccurKey.java
        ActiveMQMultiTypeMessageListener.java
        PojoUtils.java
        BaseKeyOperator.java
        AbstractBaseMatchOperator.java
        AbstractBaseFrequentKey.java
        BaseKeyValueOperator.java
        UnifierKeyValRange.java
        ArrayHashMapFrequent.java
        UnifierMap.java
        FieldValueGenerator.java
        StorageAgentKeyValueStore.java
        UnifierArrayList.java
        UnifierSumNumber.java
        AbstractKeyValueStorageAgent.java
        TableInfo.java
        UnifierBooleanOr.java
        time
        WindowUtils.java
        HighLow.java
        UnifierHashMap.java
        AbstractBaseSortOperator.java
        JavaScriptFilterOperator.java
        UnifierRange.java
        DimensionTimeBucketSumOperator.java
        AbstractBaseNOperatorMap.java
        UnifierHashMapFrequent.java
        UnifierArrayHashMapFrequent.java
        UnifierHashMapRange.java
        KeyValPair.java
        AbstractBaseFrequentKeyValueMap.java
        comparator
        SliceComparator.java
        AbstractBaseNUniqueOperatorMap.java
        KeyHashValPair.java
        FilterOperator.java
        UnifierHashMapSumKeys.java
        ReversibleComparator.java
        package-info.java
        BaseUniqueKeyValueCounter.java
        ActiveMQMessageListener.java
        TopNUniqueSort.java
        UnifierBooleanAnd.java
        AbstractBaseNNonUniqueOperatorMap.java
        BaseFilteredKeyValueOperator.java
        TopNSort.java
        BaseNumberValueOperator.java
        BaseNumberKeyValueOperator.java
        AbstractDimensionTimeBucketOperator.java
        FieldInfo.java
        BaseUniqueKeyCounter.java
        KryoCloneUtils.java
        ReusableStringReader.java
        BaseLineTokenizer.java
        TimeBucketKey.java
        UnifierHashMapInteger.java
        appdata
        gpo
        SerdeMapPrimitive.java
        GPOType.java
        GPOUtils.java
        SerdeObjectPayloadFix.java
        SerdeListGPOMutable.java
        SerdeListPrimitive.java
        GPOMutable.java
        SerdeListString.java
        Serde.java
        GPOGetters.java
        SerdeFieldsDescriptor.java
        GPOByteArrayList.java
        StoreUtils.java
        datastructs
        CacheLRUSynchronousFlush.java
        DimensionalTable.java
        NOPCacheFlushListener.java
        query
        AppDataWindowEndQueueManager.java
        SimpleQueueManager.java
        QueueList.java
        QueryManagerAsynchronous.java
        QueryBundle.java
        QueueManager.java
        QueryManagerSynchronous.java
        WindowBoundedService.java
        SimpleDoneQueueManager.java
        QueryExecutor.java
        AbstractWindowEndQueueManager.java
        serde
        SchemaQueryDeserializer.java
        MessageDeserializerInfo.java
        SimpleDataValidator.java
        MessageValidatorInfo.java
        DataResultSnapshotSerializer.java
        MessageSerializerInfo.java
        CustomMessageDeserializer.java
        MessageSerializerFactory.java
        SimpleDataDeserializer.java
        DataQuerySnapshotDeserializer.java
        CustomMessageValidator.java
        DataQuerySnapshotValidator.java
        MessageType.java
        MessageDeserializerFactory.java
        SimpleDataSerializer.java
        CustomMessageSerializer.java
        WindowEndQueueManager.java
        package-info.java
        snapshot
        AppDataSnapshotServerPOJO.java
        AppDataSnapshotServerMap.java
        AbstractAppDataSnapshotServer.java
        QueueUtils.java
        AbstractAppDataServer.java
        schemas
        TimeBucket.java
        SchemaResult.java
        SchemaRegistry.java
        Fields.java
        SchemaRegistryMultiple.java
        JSONType.java
        SchemaQuery.java
        Schema.java
        Query.java
        DimensionalSchema.java
        FieldsDescriptor.java
        DimensionalConfigurationSchema.java
        Result.java
        Message.java
        DataResultSnapshot.java
        SnapshotSchema.java
        QRBase.java
        DataQuerySnapshot.java
        SchemaUtils.java
        Type.java
        ResultFormatter.java
        SchemaResultSerializer.java
        CustomTimeBucket.java
        SchemaRegistrySingle.java
        algo
        MatchMap.java
        BottomNUniqueMap.java
        UniqueValueCount.java
        MatchAnyMap.java
        MergeSortNumber.java
        BottomNMap.java
        MergeSort.java
        BottomNUnifier.java
        Distinct.java
        package-info.java
        LeastFrequentValue.java
        MostFrequentValue.java
        UniqueCounter.java
        TopNUnique.java
        MatchAllMap.java
        CompareExceptCountMap.java
        LeastFrequentKeyUnifier.java
        FilterValues.java
        TopN.java
        LeastFrequentKeyArrayUnifier.java
        InsertSort.java
        streamquery
        index
        Index.java
        ColumnIndex.java
        condition
        JoinColumnEqualCondition.java
        Condition.java
        wal
        FileSystemWAL.java
        FSWindowReplayWAL.java
        WindowDataManager.java
        FSWindowDataManager.java
        package-info.java
        WAL.java
        dedup
        BoundedDedupOperator.java
        AbstractDeduper.java
        DeduperStreamCodec.java
        TimeBasedDedupOperator.java
        io
        AbstractHttpInputOperator.java
        WidgetOutputOperator.java
        WebSocketInputOperator.java
        HttpLinesInputOperator.java
        fs
        HDFSFileCopyModule.java
        FilterStreamContext.java
        AbstractReconciler.java
        HDFSFileMerger.java
        FileSplitterBase.java
        FileMerger.java
        AbstractThroughputFileInputOperator.java
        FSInputModule.java
        Synchronizer.java
        FilterStreamProvider.java
        FileStitcher.java
        package-info.java
        FileSplitterInput.java
        AbstractFileSplitter.java
        AbstractFileInputOperator.java
        AbstractFileOutputOperator.java
        S3InputModule.java
        AbstractSingleFileOutputOperator.java
        TailFsInputOperator.java
        S3BlockReader.java
        AbstractWindowFileOutputOperator.java
        FileSplitter.java
        FilterStreamCodec.java
        CollectionMultiConsoleOutputOperator.java
        SimpleSinglePortInputOperator.java
        WebSocketServerInputOperator.java
        HttpOutputOperator.java
        AbstractHttpGetOperator.java
        AbstractFTPInputOperator.java
        WebSocketOutputOperator.java
        PubSubWebSocketOutputOperator.java
        ConsoleOutputOperator.java
        ApacheGenRandomLogs.java
        PubSubWebSocketInputOperator.java
        AbstractKeyValueStoreOutputOperator.java
        PubSubWebSocketAppDataQuery.java
        HttpGetMapOperator.java
        package-info.java
        IdempotentStorageManager.java
        jms
        JMSMultiPortOutputOperator.java
        JMSBase.java
        JMSStringInputOperator.java
        AbstractJMSSinglePortOutputOperator.java
        JMSTransactionableStore.java
        JMSBaseTransactionableStore.java
        AbstractJMSInputOperator.java
        FSPsuedoTransactionableStore.java
        package-info.java
        JMSObjectInputOperator.java
        AbstractJMSOutputOperator.java
        SmtpOutputOperator.java
        AbstractHttpOperator.java
        block
        BlockMetadata.java
        ReaderContext.java
        PartFileWriter.java
        AbstractBlockReader.java
        package-info.java
        BlockWriter.java
        FSSliceReader.java
        AbstractFSBlockReader.java
        HttpJsonChunksInputOperator.java
        AbstractSocketInputOperator.java
        PubSubWebSocketAppDataResult.java
        MapMultiConsoleOutputOperator.java
        HttpPostOutputOperator.java
        AbstractHttpGetMultiValuedMapOperator.java
        converter
        StringValueToNumberConverterForMap.java
        Converter.java
        ByteArrayToStringConverterOperator.java
        package-info.java
        MapToKeyHashValuePairConverter.java
        MapToKeyValuePairConverter.java
        projection
        ProjectionOperator.java
        math
        SumKeyVal.java
        RunningAverage.java
        Min.java
        MinKeyVal.java
        MaxKeyVal.java
        AbstractXmlKeyValueCartesianProduct.java
        AbstractXmlCartesianProduct.java
        Sigma.java
        Max.java
        MultiplyByConstant.java
        LogicalCompare.java
        LogicalCompareToConstant.java
        Division.java
        SquareCalculus.java
        SingleVariableAbstractCalculus.java
        package-info.java
        AverageKeyVal.java
        RangeKeyVal.java
        XmlKeyValueStringCartesianProduct.java
        Range.java
        AbstractAggregateCalc.java
        AbstractOutput.java
        Sum.java
        MarginMap.java
        Margin.java
        Average.java
        MarginKeyVal.java
        partitioner
        StatsAwareStatelessPartitioner.java
        package-info.java
        StatelessThroughputBasedPartitioner.java
        metric
        AvgAggregator.java
        package-info.java
        min
        DoubleMinAggregator.java
        IntMinAggregator.java
        FloatMinAggregator.java
        LongMinAggregator.java
        max
        FloatMaxAggregator.java
        IntMaxAggregator.java
        DoubleMaxAggregator.java
        LongMaxAggregator.java
        logs
        FilteredLineTokenizerKeyVal.java
        InformationExtractor.java
        FilteredLineToTokenHashMap.java
        ApacheLogParseMapOutputOperator.java
        LineTokenizer.java
        FilteredLineToTokenArrayList.java
        DimensionObject.java
        DimensionAggregationUnifier.java
        ApacheLogParseOperator.java
        ApacheVirtualLogParseOperator.java
        LineTokenizerKeyVal.java
        package-info.java
        LineToTokenHashMap.java
        RegexMatchMapOperator.java
        LineToTokenArrayList.java
        MultiWindowDimensionAggregation.java
        join
        AntiJoinOperator.java
        TimeEventImpl.java
        POJOInnerJoinOperator.java
        AbstractJoinOperator.java
        POJOJoinOperator.java
        AbstractManagedStateInnerJoinOperator.java
        TimeEvent.java
        JoinStore.java
        Bucket.java
        TimeBasedStore.java
        JoinStreamCodec.java
        MapJoinOperator.java
        AbstractInnerJoinOperator.java
        SemiJoinOperator.java
        InMemoryStore.java
        filter
        FilterOperator.java
        transform
        TransformOperator.java
        expression
        JavaExpressionParser.java
        Expression.java
        parser
        XmlParser.java
        Parser.java
        package-info.java
      - appended-resources
        META-INF
        NOTICE.vm
    - test
      - resources
        adsGenericEventSchemaCustomTimeBuckets.json
        sqsdevCreds.properties
        adsGenericEventSchemaAggregations.json
        satisfactionRatingSnapshotSchema_test.json
        snapshotquery_deserialize4.json
        snapshotquery_deserialize1.json
        log4j.properties
        adsGenericEventSchemaNoEnums.json
        adsGenericEventSchemaOTF.json
        satisfactionRatingWithTagsSnapshotSchema_test.json
        snapshotquery_validation2.json
        snapshotquery_invalidcountdown.json
        snapshotServerPojoSchema.json
        employeeBean.xsd
        reader_test_data.csv
        snapshotquery_deserialize2.json
        SocketInputOperatorTest.txt
        adsGenericEventSchemaAdditionalOne.json
        snapshotschema.json
        snapshotquery_validation1.json
        adsGenericEventSchemaTags.json
        adsGenericEventSimpleAllCombinations.json
        JdbcProperties.xml
        adsGenericEventSchema.json
        snapshotquery_validation3.json
        adsGenericEventSchemaNoTime.json
        snapshotquery_deserialize6.json
        adsGenericEventSchemaDimensionTimeBuckets.json
        adsGenericEventSchemaAdditional.json
        snapshotServerPojoQuery.json
        snapshotquery_deserialize5.json
        snapshotquery_deserialize3.json
        sqstestCreds.properties
        deserializeToMapTest.json
        wordcount.txt
        snapshotquery_deserialize7.json
      - java
        org
        apache
        hadoop
        io
        file
        tfile
        TestTFileNoneCodecsByteArrays.java
        TestTFileStreams.java
        TestTFileSeek.java
        TestDTFile.java
        TestTFileComparators.java
        TestTFileSeqFileComparison.java
        TestTFileNoneCodecsStreams.java
        TestTFileNoneCodecsJClassComparatorByteArrays.java
        TestDTFileByteArrays.java
        DTFileTest.java
        TestTFileLzoCodecsByteArrays.java
        TestTFileUnsortedByteArrays.java
        TestTFileLzoCodecsStreams.java
        TestTFileSplit.java
        TestTFileJClassComparatorByteArrays.java
        TestTFileComparator2.java
        apex
        malhar
        lib
        multiwindow
        MultiWindowSumKeyValTest.java
        SlidingWindowTest.java
        SortedMovingWindowTest.java
        MultiWindowRangeKeyValTest.java
        SimpleMovingAverageTest.java
        script
        JavaScriptOperatorTest.java
        statistics
        ModeOperatorTest.java
        StandardDeviationOperatorTest.java
        WeightedMeanOperatorTest.java
        MeridianOperatorTest.java
        utils
        serde
        PairSerdeTest.java
        SerdeGeneralTest.java
        BlockStreamTest.java
        CollectionSerdeTest.java
        GenericSerdeTest.java
        AffixSerdeTest.java
        IOUtilsTest.java
        codec
        KryoStreamCodecTest.java
        JavaSerializationStreamCodecTest.java
        fs
        s3
        S3OutputModuleMockTest.java
        S3RecordReaderMockTest.java
        S3ReconcilerTest.java
        S3OutputTestModule.java
        S3FixedWidthRecordReaderTest.java
        S3InitiateFileUploadOperatorTest.java
        S3DelimitedRecordReaderTest.java
        S3RecordReaderModuleAppTest.java
        FSRecordCompactionOperatorTest.java
        FSRecordReaderTest.java
        GenericFileOutputOperatorTest.java
        stream
        StreamDuplicaterTest.java
        RoundRobinHashMapTest.java
        HashMapToKeyValPairTest.java
        JsonByteArrayOperatorTest.java
        KeyPairToHashMapTest.java
        ArrayListAggregatorTest.java
        DevNullCounterTest.java
        ConsolidatorKeyValTest.java
        ArrayListToItemTest.java
        DevNullTest.java
        CounterTest.java
        StreamMergerTest.java
        db
        TransactionableKeyValueStoreOperatorTest.java
        jdbc
        JdbcInputOperatorApplicationTest.java
        JdbcNonTransactionalStoreTest.java
        JdbcNonTransactionalOutputOperatorTest.java
        JdbcPojoOperatorApplicationTest.java
        JDBCLookupCacheBackedOperatorTest.java
        JdbcIOAppTest.java
        JdbcIOApp.java
        JdbcNonTransactionalBatchOutputOperatorTest.java
        JdbcStoreTest.java
        JdbcPojoPollableOpeartorTest.java
        JdbcPojoOperatorTest.java
        JdbcOperatorTest.java
        KeyValueStoreOperatorTest.java
        cache
        CacheManagerTest.java
        CacheStoreTest.java
        redshift
        RedshiftJdbcTransactionalOperatorTest.java
        bandwidth
        BandwidthPartitionerTest.java
        BandwidthManagerTest.java
        state
        managed
        IncrementalCheckpointManagerTest.java
        DefaultBucketTest.java
        BucketsFileSystemTest.java
        ManagedStateImplTest.java
        MockManagedStateContext.java
        StateTrackerTest.java
        ManagedTimeStateImplTest.java
        ManagedTimeUnifiedStateImplTest.java
        SliceBloomFilterTest.java
        ManagedStateTestUtils.java
        MovingBoundaryTimeBucketAssignerTest.java
        spillable
        SpillableComplexComponentImplTest.java
        inmem
        InMemorySpillableStateStoreTest.java
        SpillableArrayListImplTest.java
        WindowBoundedMapCacheTest.java
        SpillableMapImplTest.java
        SpillableArrayListMultimapImplTest.java
        TestStringTimeExtractor.java
        SpillableSetMultimapImplTest.java
        SpillableTestUtils.java
        SpillableSetImplTest.java
        SequentialSpillableIdentifierGeneratorTest.java
        TimeBasedPriorityQueueTest.java
        formatter
        XmlFormatterTest.java
        JsonFormatterTest.java
        testbench
        EventGeneratorTest.java
        EventClassifierTest.java
        ThroughputCounterTest.java
        SeedEventGeneratorTest.java
        FilteredEventClassifierTest.java
        ActiveMQMessageGenerator.java
        SeedEventClassifierTest.java
        EventIncrementerTest.java
        RandomWordGeneratorTest.java
        RandomEventGeneratorTest.java
        counters
        BasicCountersTest.java
        window
        impl
        WindowedMergeOperatorTest.java
        KeyedWindowedMergeOperatorTestApplication.java
        WindowedMergeOperatorTestApplication.java
        PojoInnerJoinTestApplication.java
        sample
        wordcount
        Application.java
        pi
        Application.java
        SpillableWindowedStorageTest.java
        accumulation
        CompositeAccumulationTest.java
        MinTest.java
        SortTest.java
        MaxTest.java
        CoGroupTest.java
        TopNByKeyTest.java
        PojoOuterJoinTest.java
        AverageTest.java
        PojoInnerJoinTest.java
        InnerJoinTest.java
        SumTest.java
        ReduceFnTest.java
        RemoveDuplicatesTest.java
        GroupTest.java
        FoldFnTest.java
        WindowedOperatorTest.java
        SumAccumulation.java
        dimensions
        CustomTimeBucketRegistryTest.java
        DimensionsDescriptorTest.java
        util
        PojoUtilsTest.java
        time
        WindowUtilsTest.java
        TestObjAllTypes.java
        TestUtils.java
        DimensionTimeBucketSumOperatorTest.java
        KryoCloneUtilsTest.java
        JavaScriptFilterOperatorTest.java
        appdata
        gpo
        GPOUtilsTest.java
        SerdeListPrimitiveTest.java
        SerdeListGPOMutableTest.java
        GPOMutableTest.java
        SerdeFieldsDescriptorTest.java
        SerdeListStringTest.java
        datastructs
        CacheLRUSynchronousFlushTest.java
        DimensionalTableTest.java
        dimensions
        CustomTimeBucketRegistryTest.java
        DimensionsEventTest.java
        query
        MockResultSerializer.java
        SimpleQueryQueueManagerTest.java
        WEQueryQueueManagerTest.java
        MockQuery.java
        QueryManagerSynchronousTest.java
        WindowBoundedServiceTest.java
        MockResult.java
        QueueUtilsTest.java
        serde
        MessageDeserializerFactoryTest.java
        SimpleDataValidatorTest.java
        SimpleDoneQueryQueueManagerTest.java
        QueryManagerAsynchronousTest.java
        snapshot
        AppDataSnapshotServerMapTest.java
        AppDataSnapshotServerPojoTest.java
        AppDataSnapshotServerTagsSupportTest.java
        ThreadUtils.java
        schemas
        ResultFormatterTest.java
        DimensionalConfigurationSchemaTest.java
        CustomTimeBucketTest.java
        SnapshotSchemaTest.java
        SchemaRegistryMultipleTest.java
        SchemaRegistrySingleTest.java
        SchemaUtilsTest.java
        DataQuerySnapshotDeserializerTest.java
        DataQuerySnapshotTest.java
        FieldsDescriptorTest.java
        TimeBucketTest.java
        SchemaTestUtils.java
        SchemaQueryTest.java
        DataResultSnapshotSerializerTest.java
        DimensionalSchemaTest.java
        algo
        UniqueCounterTest.java
        LeastFrequentKeyTest.java
        MostFrequentKeyTest.java
        MergeSortNumberTest.java
        MatchAnyMapTest.java
        TopNTest.java
        InsertSortTest.java
        MatchAllMapTest.java
        UniqueValueCountTest.java
        BottomNUniqueMapTest.java
        TopNUniqueTest.java
        FilterValuesTest.java
        BottomNMapTest.java
        DistinctTest.java
        BottomNUnifierTest.java
        CompareExceptCountMapTest.java
        helper
        OperatorContextTestHelper.java
        SamplePubSubWebSocketServlet.java
        TestPortContext.java
        wal
        FileSystemWALTest.java
        FSWindowDataManagerTest.java
        dedup
        DeduperOrderingTest.java
        DeduperTimeBasedPOJOImplTest.java
        DeduperBoundedPOJOImplTest.java
        DeduperPartitioningTest.java
        TestPojo.java
        io
        FTPStringInputOperatorTest.java
        ApacheRandomLogsTest.java
        fs
        AbstractFileInputOperatorTest.java
        FileSplitterInputTest.java
        AbstractWindowFileOutputOperatorTest.java
        AbstractReconcilerTest.java
        AbstractFileOutputOperatorTest.java
        FSInputModuleTest.java
        BlockWriterTest.java
        FSInputModuleAppTest.java
        FileSplitterTest.java
        FastMergerDecisionMakerTest.java
        FileSplitterBaseTest.java
        FileStitcherTest.java
        FileMergerTest.java
        S3InputModuleAppTest.java
        TailFsInputOperatorTest.java
        AbstractFileInputOperatorFailureHandlingTest.java
        SynchronizerTest.java
        AbstractSingleFileOutputOperatorTest.java
        AbstractThroughputFileInputOperatorTest.java
        HttpLinesInputOperatorTest.java
        HttpMultiValuedMapGetOperatorTest.java
        WebSocketServerInputOperatorTest.java
        PubSubWebSocketAppDataOperatorTest.java
        HttpPostOutputOperatorTest.java
        SmtpOutputOperatorTest.java
        PubSubWebSocketAppDataQueryTest.java
        HttpGetMapOperatorTest.java
        jms
        SQSStringInputOperatorTest.java
        SQSTestBase.java
        JMSObjectInputOperatorTest.java
        JMSOutputOperatorTest.java
        FSPsuedoTransactionableStoreTest.java
        JMSTransactionableStoreTestBase.java
        JMSTestBase.java
        JMSStringInputOperatorTest.java
        JMSTransactionableStoreTest.java
        JMSMultiPortOutputOperatorTest.java
        block
        ReadAheadLineReaderTest.java
        PartFileWriterTest.java
        AbstractBlockReaderTest.java
        FSLineReaderTest.java
        FSSliceReaderTest.java
        HttpJsonChunksInputOperatorTest.java
        IdempotentStorageManagerTest.java
        PubSubWebSocketAppDataResultTest.java
        PubSubWebSocketOperatorTest.java
        SocketInputOperatorTest.java
        converter
        ByteArrayToStringConverterTest.java
        StringValueToNumberConverterForMapTest.java
        MapToKeyValuePairConverterTest.java
        projection
        ProjectionTest.java
        ActivateTest.java
        math
        MaxKeyValTest.java
        RunningAverageTest.java
        RangeTest.java
        MinTest.java
        MultiplyByConstantTest.java
        MaxTest.java
        MarginMapTest.java
        CountOccuranceTest.java
        MarginKeyValTest.java
        AverageKeyValTest.java
        DivisionTest.java
        XmlKeyValueStringCartesianProductTest.java
        SumKeyValTest.java
        SquareCalculusTest.java
        MarginTest.java
        AverageTest.java
        LogicalCompareToConstantTest.java
        MinKeyValTest.java
        LogicalCompareTest.java
        RangeKeyValTest.java
        SumTest.java
        SigmaTest.java
        partitioner
        StatelessPartitionerTest.java
        StatelessThroughputBasedPartitionerTest.java
        logs
        FilteredLineToTokenArrayListTest.java
        LineToTokenHashMapTest.java
        FilteredLineTokenizerKeyValTest.java
        TopNUniqueSiteStatsTest.java
        ApacheLogParseMapOutputOperatorTest.java
        FilteredLineToTokenHashMapTest.java
        MultiWindowDimensionAggregationTest.java
        LineToTokenArrayListTest.java
        LineTokenizerKeyValTest.java
        ApacheLogParseOperatorTest.java
        LineTokenizerTest.java
        RegexMatchMapOperatorTest.java
        join
        AntiJoinOperatorTest.java
        SemiJoinOperatorTest.java
        POJOTimeBasedJoinOperatorTest.java
        POJOPartitionJoinOperatorTest.java
        MapTimeBasedJoinOperator.java
        POJOInnerJoinOperatorTest.java
        filter
        FilterAppTest.java
        FilterTest.java
        transform
        TransformOperatorTest.java
        TransformOperatorAppTest.java
        parser
        XmlParserApplicationTest.java
        XmlParserTest.java
  - pom.xml
  - XmlJavadocCommentsExtractor.xsl
  - library-checkstyle-suppressions.xml
- stream
  - src
    - main
      - java
        org
        apache
        apex
        malhar
        stream
        api
        impl
        ApexStreamImpl.java
        TupleWrapperOperator.java
        DagMeta.java
        StreamFactory.java
        IDGenerator.java
        ApexWindowedStreamImpl.java
        CompositeStreamTransform.java
        ApexStream.java
        WindowedStream.java
        Option.java
    - test
      - resources
        words
        word.txt
        data
        word.txt
      - java
        org
        apache
        apex
        malhar
        stream
        sample
        TupleCollector.java
        MyStream.java
        WordCountWithStreamAPI.java
        LocalTestWithoutStreamApplication.java
        ApplicationWithStreamAPI.java
        WCInput.java
        MyStreamTest.java
        ApplicationWithStreamAPITest.java
        api
        impl
        ApexStreamImplTest.java
        FunctionOperator
        FunctionOperatorTest.java
  - pom.xml
- header.txt
- examples
  - sql
    - src
      - main
        resources
        META-INF
        properties-SQLApplicationWithModelFile.xml
        properties-FusionStyleSQLApplication.xml
        properties.xml
        properties-SQLApplicationWithAPI.xml
        properties-PureStyleSQLApplication.xml
        model
        model_file_csv.json
        java
        org
        apache
        apex
        malhar
        sql
        sample
        SQLApplicationWithModelFile.java
        PureStyleSQLApplication.java
        SQLApplicationWithAPI.java
        FusionStyleSQLApplication.java
      - test
        resources
        log4j.properties
        input.csv
        java
        org
        apache
        apex
        malhar
        sql
        sample
        FusionStyleSQLApplicationTest.java
        PureStyleSQLApplicationTest.java
        SQLApplicationWithModelFileTest.java
        SQLApplicationWithAPITest.java
      - assemble
        appPackage.xml
    - pom.xml
  - kafka
    - src
      - main
        resources
        META-INF
        properties.xml
        properties-hdfs2kafka.xml
        properties-KafkaExactlyOnceOutput.xml
        java
        org
        apache
        apex
        examples
        kafka
        hdfs2kafka
        Application.java
        exactlyonceoutput
        BatchSequenceGenerator.java
        PassthroughFailOperator.java
        ValidationToFile.java
        Application.java
        kafka2hdfs
        LineOutputOperator.java
        KafkaApp.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        kafka
        hdfs2kafka
        ApplicationTest.java
        exactlyonceoutput
        ApplicationTest.java
        kafka2hdfs
        ApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - exactly-once
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        exactlyonce
        ExactlyOnceJdbcOutputApp.java
        ExactlyOnceFileOutputApp.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        exactlyonce
        ExactlyOnceFileOutputAppTest.java
        ExactlyOnceJdbcOutputTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - wordcount
    - src
      - main
        resources
        samplefile.txt
        META-INF
        properties.xml
        org
        apache
        apex
        examples
        wordcount
        samplefile.txt
        WordDataSchema.json
        java
        org
        apache
        apex
        examples
        wordcount
        WordCountInputOperator.java
        WindowWordCount.java
        LineReader.java
        WordCountWriter.java
        doc-files
        UniqueWordCounter.jpg
        WCPair.java
        package-info.java
        ApplicationWithQuerySupport.java
        WordReader.java
        FileWordCount.java
        Application.java
      - test
        resources
        log4j.properties
        dt-site-wordcount.xml
        java
        org
        apache
        apex
        examples
        wordcount
        ApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - READEME.md
  - s3
    - src
      - main
        resources
        META-INF
        properties-s3TupleOutput.xml
        properties-s3Output.xml
        properties-s3ToHdfsSync.xml
        java
        org
        apache
        apex
        examples
        s3ToHdfsSync
        S3ToHDFSSyncApplication.java
        s3Output
        Application.java
        s3TupleOutput
        Application.java
      - test
        resources
        log4j.properties
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - jdbc
    - src
      - main
        resources
        META-INF
        properties-JdbcToJdbcApp.xml
        properties-FileToJdbcApp.xml
        properties-SimpleJdbcToHDFSApp.xml
        properties-PollJdbcToHDFSApp.xml
        schema.json
        java
        org
        apache
        apex
        examples
        JdbcIngest
        JdbcHDFSApp.java
        JdbcPollerApplication.java
        FileLineOutputOperator.java
        PojoEvent.java
        FileToJdbcApp
        CustomParser.java
        FileToJdbcCustomParser.java
        PojoEvent.java
        FileReader.java
        FileToJdbcCsvParser.java
        JdbcToJdbc
        JdbcToJdbcApp.java
        PojoEvent.java
      - test
        resources
        log4j.properties
        test-FileToJdbcApp.xml
        example-JdbcIngest-sql.txt
        example-JdbcToJdbc-sql.txt
        test-input
        sample-FileToJdbc.txt
        example-FileToJdbcApp-sql.txt
        java
        org
        apache
        apex
        examples
        JdbcIngest
        ApplicationTest.java
        JdbcInputAppTest.java
        JdbcPollerApplicationTest.java
        FileToJdbcApp
        ApplicationTest.java
        JdbcToJdbc
        ApplicationTest.java
        JdbcOperatorTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - mobile
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        mobile
        doc-files
        PhoneMovementGenerator.java
        PhoneEntryOperator.java
        package-info.java
        Application.java
      - test
        resources
        dt-site-mobile.xml
        log4j.properties
        java
        org
        apache
        apex
        examples
        mobile
        ApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - READEME.md
  - twitter
    - src
      - main
        resources
        top_urls.tplg.properties
        twitterURLDataSchema.json
        twitterWordDataSchema.json
        META-INF
        properties.xml
        properties-TwitterKinesisDemo.xml
        twitterHashTagDataSchema.json
        mysql.sql
        java
        org
        apache
        apex
        examples
        twitter
        TwitterStatusWordExtractor.java
        TwitterTopWordsApplication.java
        TwitterTrendingHashtagsApplication.java
        SlidingContainer.java
        WindowedTopCounter.java
        TwitterStatusURLExtractor.java
        doc-files
        Application.gif
        TwitterDumpApplication.java
        package-info.java
        TwitterStatusHashtagExtractor.java
        TwitterTopCounterApplication.java
        URLSerDe.java
        KinesisHashtagsApplication.java
        TwitterDumpHBaseApplication.java
      - test
        resources
        log4j.properties
        dt-site-rollingtopwords.xml
        java
        org
        apache
        apex
        examples
        twitter
        TwitterDumpApplicationTest.java
        TwitterTopWordsTest.java
        TwitterTopCounterTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - READEME.md
  - pom.xml
  - enricher
    - src
      - main
        resources
        META-INF
        properties.xml
        circleMapping.txt
        java
        org
        apache
        apex
        examples
        enricher
        LineOutputOperator.java
        EnricherAppWithJSONFile.java
        POJOEnriched.java
        POJO.java
        DataGenerator.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        enricher
        ApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - partition
    - src
      - main
        resources
        my-log4j.properties
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        partition
        Codec3.java
        TestPartition.java
        RandomNumberGenerator.java
        Application.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        partition
        ApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - iteration
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        iteration
        package-info.java
        Application.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        iteration
        ApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - READEME.md
  - csvformatter
    - src
      - main
        resources
        META-INF
        properties.xml
        schema.json
        java
        org
        apache
        apex
        examples
        csvformatter
        HDFSOutputOperator.java
        JsonGenerator.java
        PojoEvent.java
        Application.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        csvformatter
        ApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - ftp
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        ftp
        package-info.java
        Application.java
      - test
        resources
        log4j.properties
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - dynamic-partition
    - src
      - main
        resources
        unused-log4j.properties
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        dynamic
        Gen.java
        App.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        dynamic
        ApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - nyctaxi
    - src
      - main
        resources
        nyc_zip_codes.csv
        java
        org
        apache
        apex
        examples
        nyctaxi
        NycTaxiZipFareExtractor.java
        NycTaxiDataReader.java
        NycTaxiDataServer.java
        NycLocationUtils.java
        NycTaxiCsvParser.java
        Application.java
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - yahoofinance
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        yahoofinance
        YahooFinanceCSVInputOperator.java
        YahooFinanceApplication.java
        doc-files
        Application.gif
        StockTickInput.java
        package-info.java
        ApplicationWithDerbySQL.java
      - test
        resources
        alert_create.json
        log4j.properties
        java
        org
        apache
        apex
        examples
        yahoofinance
        ApplicationTest.java
        ApplicationWithDerbySQLTest.java
      - assemble
        appPackage.xml
    - pom.xml
  - frauddetect
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        frauddetect
        BankIdNumberKey.java
        BankIdNumberSamplerOperator.java
        SlidingWindowSumObject.java
        TransactionStatsData.java
        AverageAlertData.java
        operator
        HdfsStringOutputOperator.java
        MongoDBOutputOperator.java
        MerchantTransactionInputHandler.java
        util
        JsonUtils.java
        CreditCardData.java
        SlidingWindowSumKeyVal.java
        MerchantTransactionGenerator.java
        TransactionStatsAggregator.java
        MerchantQueryInputHandler.java
        MerchantTransactionBucketOperator.java
        BankIdNumberAlertData.java
        MerchantKey.java
        CreditCardAlertData.java
        MerchantTransaction.java
        AverageAlertingOperator.java
        Application.java
        CreditCardAmountSamplerOperator.java
      - test
        resources
        log4j.properties
        dt-site-frauddetect.xml
        java
        org
        apache
        apex
        examples
        frauddetect
        FrauddetectApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
  - mrmonitor
    - src
      - main
        resources
        mrdebugger.html
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        mrmonitor
        MRUtil.java
        MRStatusObject.java
        Constants.java
        MRMonitoringApplication.java
        MRJobStatusOperator.java
        Application.java
        MapToMRObjectOperator.java
      - test
        resources
        dt-site-monitoring.xml
        log4j.properties
        java
        org
        apache
        apex
        examples
        mrmonitor
        MrMonitoringApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
  - pi
    - src
      - main
        resources
        META-INF
        properties.xml
        app
        PiJsonExample.json
        PiExampleDataSchema.json
        java
        org
        apache
        apex
        examples
        pi
        PiCalculateOperator.java
        ApplicationAppData.java
        doc-files
        ApplicationScript.gif
        Application.gif
        NamedValueList.java
        package-info.java
        Calculator.java
        ApplicationWithScript.java
        Application.java
      - test
        resources
        log4j.properties
        dt-site-pi.xml
        dt-site-pilibrary.xml
        java
        org
        apache
        apex
        examples
        pi
        ApplicationTest.java
        CalculatorTest.java
        ApplicationWithScriptTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - READEME.md
  - mroperator
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        mroperator
        DateWritable.java
        MapReduceApplication.java
        ReduceOperator.java
        LineIndexer.java
        WordCount.java
        ReporterImpl.java
        MapOperator.java
        OutputCollectorImpl.java
        NewWordCountApplication.java
        InvertedIndexApplication.java
        LogCountsPerHour.java
        HdfsKeyValOutputOperator.java
        LogsCountApplication.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        mroperator
        WordCountMRApplicationTest.java
        ReduceOperatorTest.java
        MapOperatorTest.java
      - assemble
        appPackage.xml
    - pom.xml
  - recordReader
    - src
      - main
        resources
        META-INF
        properties.xml
        CustomerTxnData.txt
        java
        org
        apache
        apex
        examples
        recordReader
        TransactionsSchema.java
        Application.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        recordReader
        ApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - dedup
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        dedup
        Application.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        dedup
        ApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - throttle
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        throttle
        SlowDevNullOperator.java
        PassThroughOperator.java
        ThrottlingStatsListener.java
        RandomNumberGenerator.java
        Application.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        throttle
        ApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - machinedata
    - src
      - main
        resources
        META-INF
        properties.xml
        html
        malhar.css
        json.php
        index.php
        global.js
        java
        org
        apache
        apex
        examples
        machinedata
        operator
        MachineInfoAveragingUnifier.java
        MachineInfoAveragingPrerequisitesOperator.java
        MachineInfoAveragingOperator.java
        CalculatorOperator.java
        util
        Combinatorics.java
        DataTable.java
        data
        MachineInfo.java
        ResourceType.java
        AverageData.java
        MachineKey.java
        InputReceiver.java
        DimensionGenerator.java
        Application.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        machinedata
        CalculatorOperatorTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - READEME.md
  - r
    - src
      - main
        resources
        META-INF
        properties.xml
        org
        apache
        apex
        examples
        r
        oldfaithful
        eruptionModel.R
        java
        org
        apache
        apex
        examples
        r
        oldfaithful
        OldFaithfulApplication.java
        FaithfulRScript.java
        FaithfulKey.java
        InputGenerator.java
      - test
        resources
        log4j.properties
        dt-site-oldfaithful.xml
        java
        org
        apache
        apex
        examples
        r
        oldfaithful
        OldFaithfulApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
  - distributedistinct
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        distributeddistinct
        StatefulApplication.java
        RandomKeyValGenerator.java
        IntegerUniqueValueCountAppender.java
        UniqueValueCountAppender.java
        CountVerifier.java
        Application.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        distributeddistinct
        StatefulApplicationTest.java
        DistributedDistinctTest.java
        ApplicationTest.java
        StatefulUniqueCountTest.java
      - assemble
        appPackage.xml
    - pom.xml
  - uniquecount
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        uniquecount
        RandomKeyValues.java
        RandomDataGenerator.java
        package-info.java
        CountVerifier.java
        Application.java
        RandomKeysGenerator.java
        UniqueKeyValCountExample.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        uniquecount
        ApplicationTest.java
        UniqueKeyValExampleTest.java
      - assemble
        appPackage.xml
    - pom.xml
  - filter
    - src
      - main
        resources
        META-INF
        selected_output.txt
        properties.xml
        rejected_output.txt
        input.txt
        java
        org
        apache
        apex
        examples
        filter
        TransactionPOJO.java
        Application.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        filter
        ApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - transform
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        transform
        CustomerInfo.java
        CustomerEvent.java
        SimpleTransformApplication.java
        DynamicTransformApplication.java
        POJOGenerator.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        transform
        SimpleTransformApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - parser
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        parser
        jsonparser
        Campaign.java
        JsonGenerator.java
        Application.java
        regexparser
        RegexParserApplication.java
        ServerLog.java
        FileOutputOperator.java
        ServerLogGenerator.java
      - test
        resources
        log4j.properties
        properties-regexParserApplication.xml
        java
        org
        apache
        apex
        examples
        parser
        jsonparser
        ApplicationTest.java
        regexparser
        RegexParserApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
    - README.md
  - highlevelapi
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        malhar
        stream
        sample
        cookbook
        OutputPojo.java
        CombinePerKeyExamples.java
        MaxPerKeyExamples.java
        TriggerExample.java
        DeDupExample.java
        InputPojo.java
        WindowedWordCount.java
        complete
        TrafficRoutes.java
        AutoComplete.java
        CompletionCandidate.java
        StreamingWordExtract.java
        TopNByKey.java
        PojoEvent.java
        TopWikipediaSessions.java
        TwitterAutoComplete.java
        MinimalWordCount.java
      - test
        resources
        sampletweets.txt
        wordcount
        word.txt
        log4j.properties
        data
        word.txt
        java
        org
        apache
        apex
        malhar
        stream
        sample
        cookbook
        CombinePerKeyExamplesTest.java
        MaxPerKeyExamplesTest.java
        DeDupExampleTest.java
        complete
        AutoCompleteTest.java
        TwitterAutoCompleteTest.java
        TopWikipediaSessionsTest.java
        TrafficRoutesTest.java
        StreamingWordExtractTest.java
        WindowedWordCountTest.java
        MinimalWordCountTest.java
      - assemble
        appPackage.xml
    - pom.xml
  - echoserver
    - src
      - main
        resources
        META-INF
        properties.xml
        java
        org
        apache
        apex
        examples
        echoserver
        MessageResponder.java
        Message.java
        NetworkManager.java
        Application.java
        MessageReceiver.java
      - test
        resources
        log4j.properties
        java
        org
        apache
        apex
        examples
        echoserver
        ApplicationTest.java
      - assemble
        appPackage.xml
    - pom.xml
- pom.xml
- mkdocs.yml
- LICENSE
- CHANGELOG.md
- kudu
  - src
    - main
      - antlr4
        org
        apache
        apex
        malhar
        kudu
        sqlparser
        KuduSQLExpression.g4
      - java
        org
        apache
        apex
        malhar
        kudu
        KuduMutationType.java
        InputOperatorControlTuple.java
        sqltranslator
        SQLToKuduPredicatesTranslator.java
        package-info.java
        KuduSQLParser.java
        KuduSQLParseTreeListener.java
        KuduSQLExpressionErrorListener.java
        scanner
        KuduPartitionConsistentOrderScanner.java
        KuduPartitionScanAssignmentMeta.java
        KuduScanOrderStrategy.java
        package-info.java
        AbstractKuduPartitionScanner.java
        KuduPartitionScannerCallable.java
        KuduRecordWithMeta.java
        KuduPartitionRandomOrderScanner.java
        BaseKuduOutputOperator.java
        AbstractKuduInputOperator.java
        KuduExecutionContext.java
        ApexKuduConnection.java
        AbstractKuduOutputOperator.java
        package-info.java
        IncrementalStepScanInputOperator.java
        partitioner
        KuduPartitionScanStrategy.java
        KuduOneToOnePartitioner.java
        KuduOneToManyPartitioner.java
        package-info.java
        AbstractKuduInputPartitioner.java
    - test
      - resources
        kuduincrementalstepscaninputoperator.properties
        log4j.properties
        kuduoutputoperator.properties
      - java
        org
        apache
        apex
        malhar
        kudu
        KuduCreateUpdateDeleteOutputOperatorTest.java
        sqltranslator
        SQLToKuduPredicatesTranslatorTest.java
        scanner
        KuduPartitionScannerCallableTest.java
        AbstractKuduPartitionScannerTest.java
        KuduClientTestCommons.java
        IncrementalStepScanInputOperatorTest.java
        test
        KuduClusterTestContext.java
        KuduClusterAvailabilityTestRule.java
        AbstractKuduInputOperatorTest.java
        SimpleKuduOutputOperator.java
        UnitTestStepwiseScanInputOperator.java
        KuduInputOperatorCommons.java
        partitioner
        AbstractKuduInputPartitionerTest.java
        KuduOneToManyPartitionerTest.java
        KuduOneToOnePartitionerTest.java
        UnitTestTablePojo.java
  - pom.xml
- apps
  - pom.xml
  - filecopy
    - src
      - main
        java
        com
        datatorrent
        apps
        copy
        HDFSFileCopyApp.java
      - site
        conf
        app-conf.xml
      - test
        resources
        log4j.properties
      - assemble
        appPackage.xml
    - pom.xml
  - logstream
    - src
      - main
        resources
        META-INF
        properties.xml
        com
        datatorrent
        apps
        logstream
        dt-site-sample.xml
        logstash
        logstash_sample.conf
        logstash_sample3.conf
        logstash_sample2.conf
        generate_sysstats.sh
        rsyslog
        apache.conf
        html
        DrawPageViewTimeChart.js
        TopIpData.php
        Server404.php
        malhar.css
        Url404.js
        TopUrlChart.js
        index.php
        TotalViews.js
        serverfail.js
        ServerLoad.php
        global.js
        TopIpClientChart.js
        TopServer.php
        ClientData.js
        TopIpClientData.php
        Url404.php
        ClientData.php
        PageViewTimeData.php
        server.js
        info.php
        TopUrlData.php
        TopIpData.js
        TotalViews.php
        TopServer.js
        java
        com
        datatorrent
        apps
        logstream
        LogstreamTopN.java
        LogstreamWidgetOutputOperator.java
        Application1.java
        DimensionOperatorUnifier.java
        PropertyRegistry
        LogstreamPropertyRegistry.java
        PropertyRegistry.java
        LogScoreOperator.java
        FilterOperator.java
        package-info.java
        RabbitMQLogsInputOperator.java
        SumItemFromMapOperator.java
        DimensionOperator.java
        AggregationsToRedisOperator.java
        Application.java
        LogstreamUtil.java
      - site
        conf
        my-app-conf1.xml
      - test
        resources
        log4j.properties
        com
        datatorrent
        apps
        sample_logs
        mysql.log.2
        syslog.messages.1
        mysql.log.1
        java
        com
        datatorrent
        apps
        logstream
        DimensionOperatorTest.java
        ApplicationTest.java
        FilterOperatorTest.java
        PropertyRegistry
        LogstreamPropertyRegistryTest.java
        DimensionOperatorUnifierTest.java
        LogstreamTopNTest.java
        Application1Test.java
      - assemble
        appPackage.xml
    - pom.xml
- python
  - src
    - main
      - java
        org
        apache
        apex
        malhar
        python
        base
        ApexPythonEngine.java
        BasePythonExecutionOperator.java
        util
        PythonRequestResponseUtil.java
        NDArrayKryoSerializer.java
        NDimensionalArray.java
        WorkerExecutionMode.java
        package-info.java
        PythonInterpreterConfig.java
        partitioner
        PythonExecutionPartitionerType.java
        package-info.java
        AbstractPythonExecutionPartitioner.java
        ThreadStarvationBasedPartitioner.java
        jep
        InterpreterThread.java
        SpinPolicy.java
        InterpreterWrapper.java
        package-info.java
        JepPythonEngine.java
        requestresponse
        ScriptExecutionRequestPayload.java
        PythonInterpreterRequest.java
        PythonInterpreterResponse.java
        PythonRequestResponse.java
        EvalCommandRequestPayload.java
        PythonCommandType.java
        package-info.java
        MethodCallRequestPayload.java
        GenericCommandsRequestPayload.java
        ApexPythonInterpreterException.java
    - test
      - resources
        log4j.properties
        factorial.py
        META-INF
        properties.xml
      - java
        org
        apache
        apex
        malhar
        python
        PythonProcessingPojo.java
        test
        BasePythonTest.java
        JepPythonTestContext.java
        PythonAvailabilityTestRule.java
        base
        jep
        JepPythonEngineTest.java
        BaseJEPTest.java
        InterpreterWrapperTest.java
        InterpreterThreadTest.java
        PythonPayloadPOJOGenerator.java
        SimplePythonOpOperator.java
        PythonExecutorApplicationTest.java
        PythonExecutorApplication.java
  - pom.xml
- .mvn
  - jvm.config
- .travis.yml
- README.md
- hive
  - src
    - main
      - java
        org
        apache
        apex
        malhar
        hive
        FSPojoToHiveOperator.java
        HiveStreamCodec.java
        HiveOperator.java
        package-info.java
        AbstractFSRollingOutputOperator.java
        HiveOutputModule.java
        HiveStore.java
    - test
      - java
        org
        apache
        apex
        malhar
        hive
        FSRollingTestImpl.java
        HiveStoreTest.java
        FSRollingMapTestImpl.java
        HiveMockTest.java
  - pom.xml
  - XmlJavadocCommentsExtractor.xsl
- NOTICE
- .gitignore
- docs
  - images
  - apis
    - calcite.md
    - images
      - image_2.jpg
      - image_1.jpg
  - index.md
  - operators
    - jdbcPollInputOperator.md
    - kafkaInputOperator.md
    - fsInputOperator.md
    - jsonParser.md
    - deduper.md
    - file_splitter.md
    - fixedWidthParserOperator.md
    - xmlParserOperator.md
    - ftpInputOperator.md
    - s3outputmodule.md
    - images
      - kafkainput
      - s3output
      - csvParser
      - jsonParser
      - blockreader
      - windowedOperator
      - fixedWidthParser
      - fileoutput
      - deduper
      - xmlParser
      - jdbcinput
      - jsonFormatter
      - ftpInputOperator
      - jdbcoutput
      - filesplitter
      - regexparser
      - fsInput
    - csvformatter.md
    - file_output.md
    - block_reader.md
    - regexparser.md
    - csvParserOperator.md
    - enricher.md
    - transform.md
    - filter.md
    - jmsInputOperator.md
    - windowedOperator.md
    - AbstractJdbcTransactionableOutputOperator.md
    - jsonFormatter.md
  - favicon.ico
  - README.md
  - CodingConventionsAndStyle.md
- contrib
  - src
    - main
      - resources
        r
        aDoubleVector.R
        aReal.R
        aBoolean.R
        model.R
        anInt.R
        anInteger.R
        aString.R
        org
        apache
        apex
        malhar
        contrib
        apachelog
        ipaddress.txt
        agents.txt
        urls.txt
        referers.txt
      - c
        zmq_push
        Makefile
        zmq_push.c
      - java
        org
        apache
        apex
        malhar
        contrib
        kafka
        KafkaConsumer.java
        AbstractKafkaInputOperator.java
        SimpleKafkaConsumer.java
        AbstractExactlyOnceKafkaOutputOperator.java
        AbstractKafkaSinglePortInputOperator.java
        KafkaSinglePortByteArrayInputOperator.java
        AbstractKafkaOutputOperator.java
        KafkaPartition.java
        package-info.java
        POJOKafkaOutputOperator.java
        KafkaJsonEncoder.java
        HighlevelKafkaConsumer.java
        OffsetManager.java
        KafkaSinglePortStringInputOperator.java
        KafkaSinglePortOutputOperator.java
        KafkaMetadataUtil.java
        avro
        PojoToAvro.java
        AvroToPojo.java
        AvroFileToPojoModule.java
        AvroRecordHelper.java
        AvroFileInputOperator.java
        kinesis
        ShardManager.java
        KinesisByteArrayOutputOperator.java
        AbstractKinesisInputOperator.java
        KinesisConsumer.java
        package-info.java
        KinesisUtil.java
        KinesisStringInputOperator.java
        KinesisStringOutputOperator.java
        KinesisByteArrayInputOperator.java
        AbstractKinesisOutputOperator.java
        geode
        RegionCreateFunction.java
        GeodeKeyValueStorageAgent.java
        GeodeCheckpointStore.java
        AbstractGeodeInputOperator.java
        GeodePOJOOutputOperator.java
        AbstractGeodeOutputOperator.java
        GeodeStore.java
        romesyndication
        RomeFeedEntry.java
        package-info.java
        RomeStreamProvider.java
        RomeSyndicationOperator.java
        mqtt
        AbstractMqttInputOperator.java
        AbstractMqttOutputOperator.java
        AbstractSinglePortMqttOutputOperator.java
        MqttClientConfig.java
        package-info.java
        AbstractSinglePortMqttInputOperator.java
        hbase
        HBasePOJOPutOperator.java
        HBaseFieldInfo.java
        HBasePOJOInputOperator.java
        HBaseCsvMappingPutOperator.java
        HBaseOperatorBase.java
        AbstractHBaseWindowPutOutputOperator.java
        OutputAdapter.java
        HBaseFieldValueGenerator.java
        HBaseGetOperator.java
        HBaseInputOperator.java
        HBaseScanOperator.java
        package-info.java
        AbstractHBaseWindowOutputOperator.java
        AbstractHBaseWindowAppendOutputOperator.java
        HBaseNameValueCsvPutOperator.java
        AbstractHBasePutOutputOperator.java
        AbstractHBaseAppendOutputOperator.java
        AbstractHBaseOutputOperator.java
        HBaseWindowStore.java
        HBaseStore.java
        redis
        LettuceStore.java
        RedisPOJOInputOperator.java
        RedisPOJOOutputOperator.java
        RedisNumberSummationKeyValPairOutputOperator.java
        RedisNumberSummationMapOutputOperator.java
        RedisKeyValueInputOperator.java
        AbstractRedisInputOperator.java
        RedisStore.java
        package-info.java
        RedisMapOutputOperator.java
        AbstractRedisAggregateOutputOperator.java
        RedisKeyValPairOutputOperator.java
        RedisMapAsValueInputOperator.java
        NumberSummation.java
        rabbitmq
        AbstractRabbitMQOutputOperator.java
        AbstractSinglePortRabbitMQInputOperator.java
        AbstractSinglePortRabbitMQOutputOperator.java
        package-info.java
        RabbitMQOutputOperator.java
        AbstractRabbitMQInputOperator.java
        RabbitMQInputOperator.java
        couchbase
        package-info.java
        AbstractCouchBaseSetOperator.java
        CouchBasePOJOInputOperator.java
        CouchBaseWindowStore.java
        AbstractCouchBaseInputOperator.java
        CouchbasePOJOSetOperator.java
        CouchBaseStore.java
        CouchBaseSerializer.java
        CouchBaseJSONSerializer.java
        AbstractCouchBaseOutputOperator.java
        memsql
        MemsqlPOJOInputOperator.java
        MemsqlStore.java
        AbstractMemsqlOutputOperator.java
        package-info.java
        MemsqlPOJOOutputOperator.java
        AbstractMemsqlInputOperator.java
        twitter
        TwitterSampleInput.java
        formatter
        CsvFormatter.java
        ruby
        RubyOperator.java
        cassandra
        AbstractUpsertOutputOperator.java
        CassandraPreparedStatementGenerator.java
        CassandraPOJOOutputOperator.java
        package-info.java
        CassandraStore.java
        ConnectionStateManager.java
        CassandraPojoUtils.java
        AbstractCassandraInputOperator.java
        CassandraPOJOInputOperator.java
        CassandraTransactionalStore.java
        UpsertExecutionContext.java
        AbstractCassandraTransactionableOutputOperator.java
        couchdb
        CouchDBPOJOOutputOperator.java
        AbstractMapBasedInputOperator.java
        CouchDBPOJOInputOperator.java
        AbstractCouchDBInputOperator.java
        package-info.java
        AbstractCouchDBOutputOperator.java
        MapBasedCouchDbOutputOperator.java
        CouchDbStore.java
        elasticsearch
        AbstractElasticSearchInputOperator.java
        AbstractElasticSearchOutputOperator.java
        ElasticSearchPercolatorOperator.java
        ElasticSearchConnectable.java
        package-info.java
        ElasticSearchMapInputOperator.java
        ElasticSearchMapOutputOperator.java
        ElasticSearchPercolatorStore.java
        memcache
        MemcachePOJOOutputOperator.java
        AbstractMemcacheOutputOperator.java
        package-info.java
        MemcacheStore.java
        AbstractMemcacheInputOperator.java
        sqlite
        SqliteStreamOperator.java
        package-info.java
        splunk
        SplunkInputOperator.java
        SplunkTcpOutputOperator.java
        package-info.java
        SplunkStore.java
        AbstractSplunkInputOperator.java
        SplunkInputFromForwarder.java
        parquet
        ParquetFilePOJOReader.java
        AbstractParquetFileReader.java
        misc
        algo
        MostFrequentKeyValueMap.java
        Sampler.java
        InvertIndexArray.java
        FilterKeysHashMap.java
        MostFrequentKeyMap.java
        InvertIndex.java
        FilterKeyVals.java
        DistinctMap.java
        LeastFrequentKeyValueMap.java
        AllAfterMatchMap.java
        package-info.java
        LeastFrequentKeyMap.java
        FirstTillMatch.java
        FilterKeysMap.java
        InsertSortDesc.java
        FirstN.java
        AbstractStreamPatternMatcher.java
        LastMatchMap.java
        FirstMatchMap.java
        streamquery
        OuterJoinOperator.java
        index
        NegateExpression.java
        SumExpression.java
        StringLenIndex.java
        RoundDoubleIndex.java
        MidIndex.java
        UnaryExpression.java
        BinaryExpression.java
        StringCaseIndex.java
        OrderByRule.java
        DerbySqlStreamOperator.java
        function
        SumFunction.java
        MaxMinFunction.java
        FirstLastFunction.java
        CountFunction.java
        AverageFunction.java
        FunctionIndex.java
        SelectTopOperator.java
        GroupByHavingOperator.java
        package-info.java
        SelectFunctionOperator.java
        InnerJoinOperator.java
        DeleteOperator.java
        OrderByOperator.java
        SelectOperator.java
        AbstractSqlStreamOperator.java
        UpdateOperator.java
        condition
        EqualValueCondition.java
        HavingCompareValue.java
        CompoundCondition.java
        InCondition.java
        LikeCondition.java
        BetweenCondition.java
        HavingCondition.java
        math
        ExceptMap.java
        CompareExceptMap.java
        QuotientMap.java
        ChangeKeyVal.java
        Quotient.java
        ChangeAlertMap.java
        package-info.java
        ChangeAlert.java
        SumCountMap.java
        Change.java
        ChangeAlertKeyVal.java
        CountKeyVal.java
        CompareMap.java
        apachelog
        ApacheLogInputGenerator.java
        ApplicationLocalLog.java
        UserAgentExtractor.java
        TimestampExtractor.java
        package-info.java
        GeoIPExtractor.java
        ApplicationLogGenerator.java
        zmq
        Application.java
        enrich
        FSLoader.java
        DelimitedFSLoader.java
        POJOEnricher.java
        AbstractEnricher.java
        MapEnricher.java
        JDBCLoader.java
        BackendLoader.java
        NullValuesCacheManager.java
        FixedWidthFSLoader.java
        ReadOnlyBackup.java
        JsonFSLoader.java
        memcache_whalin
        AbstractMemcacheOutputOperator.java
        package-info.java
        MemcacheStore.java
        AbstractMemcacheInputOperator.java
        solr
        SolrServerConnector.java
        AbstractSolrOutputOperator.java
        AbstractSolrInputOperator.java
        package-info.java
        CloudSolrServerConnector.java
        ConcurrentUpdateSolrServerConnector.java
        README.md
        LBHttpSolrServerConnector.java
        SolrInputOperator.java
        HttpSolrServerConnector.java
        SolrOutputOperator.java
        r
        REngineConnectable.java
        RMin.java
        RScript.java
        RMax.java
        RStandardDeviation.java
        package-info.java
        aerospike
        AerospikeStore.java
        AbstractAerospikeTransactionalPutOperator.java
        AbstractAerospikeGetOperator.java
        AerospikePOJOTransactionalPutOperator.java
        AerospikeTransactionalStore.java
        AerospikePOJONonTransactionalPutOperator.java
        package-info.java
        AbstractAerospikeNonTransactionalPutOperator.java
        accumulo
        AbstractAccumuloOutputOperator.java
        AccumuloStore.java
        AbstractAccumuloInputOperator.java
        AccumuloWindowStore.java
        package-info.java
        mongodb
        MongoDBArrayListOutputOperator.java
        MongoDBPOJOOutputOperator.java
        MongoDBConnectable.java
        MongoDBInputOperator.java
        package-info.java
        MongoDBOutputOperator.java
        MongoDBHashMapOutputOperator.java
        jython
        package-info.java
        PythonOperator.java
        parser
        RegexParser.java
        LogParser.java
        FixedWidthSchema.java
        CommonLogParser.java
        AbstractCsvParser.java
        Schema.java
        DelimitedSchema.java
        log
        CommonLog.java
        JsonParser.java
        JsonKeyFinder.java
        package-info.java
        StreamingJsonParser.java
        LogSchemaDetails.java
        FixedWidthParser.java
        CsvParser.java
        CellProcessorBuilder.java
        zmq
        AbstractBaseZeroMQInputOperator.java
        SimpleSinglePortZeroMQPullInputOperator.java
        ZeroMQOutputOperator.java
        AbstractSinglePortZeroMQOutputOperator.java
        package-info.java
        ZeroMQInputOperator.java
        AbstractSinglePortZeroMQInputOperator.java
        SimpleSinglePortZeroMQPullStringInputOperator.java
        AbstractBaseZeroMQOutputOperator.java
        nifi
        NiFiSinglePortInputOperator.java
        AbstractNiFiSinglePortInputOperator.java
        NiFiDataPacket.java
        AbstractNiFiOutputOperator.java
        NiFiDataPacketBuilder.java
        NiFiSinglePortOutputOperator.java
        StandardNiFiDataPacket.java
        AbstractNiFiInputOperator.java
      - appended-resources
        META-INF
        NOTICE.vm
    - test
      - resources
        RegexSplitterschema.json
        log4j.properties
        fixed-width-sample.txt
        productmapping-delim.txt
        invalidLogSchema.json
        json-parser-schema.json
        logSchema.json
        FixedWidthSchema.json
        productmapping.txt
        schema.json
      - java
        org
        apache
        apex
        malhar
        contrib
        kafka
        SimpleKakfaConsumerTest.java
        KafkaExactlyOnceOutputOperatorTest.java
        KafkaSimpleConsumer.java
        KafkaTestPartitioner.java
        KafkaInputOperatorTest.java
        KafkaPartitionableInputOperatorTest.java
        KafkaOperatorTestBase.java
        OffsetManagerTest.java
        KafkaOutputOperatorTest.java
        KafkaTestConsumer.java
        KafkaTestProducer.java
        avro
        PojoToAvroTest.java
        AvroFileInputOperatorTest.java
        AvroToPojoTest.java
        AvroFileToPojoModuleTest.java
        kinesis
        ShardManagerTest.java
        KinesisByteArrayOutputOperatorTest.java
        KinesisOperatorTestBase.java
        KinesisPartitionableInputOperatorTest.java
        KinesisTestProducer.java
        KinesisOutputOperatorTest.java
        KinesisInputOperatorTest.java
        KinesisTestConsumer.java
        KinesisStringOutputOperatorTest.java
        geode
        GeodeCheckpointStoreTest.java
        GeodeKeyValueStorageAgentTest.java
        GeodePOJOOperatorTest.java
        GeodeOperatorTest.java
        GeodeStoreTest.java
        romesyndication
        RomeSyndicationOperatorTest.java
        mqtt
        MqttInputOperatorTest.java
        MqttOutputOperatorTest.java
        hbase
        HBaseScanOperatorTest.java
        HBaseCsvMappingPutOperatorTest.java
        HBaseNameValueCsvPutOperatorTest.java
        HBaseTupleCollector.java
        HBasePOJOPutOperatorTest.java
        HBaseTransactionalAppendOperatorTest.java
        HBaseTuple.java
        HBaseRowStringGenerator.java
        HBaseColTupleGenerator.java
        HBasePOJOInputOperatorTest.java
        HBaseRowTupleGenerator.java
        HBaseTestHelper.java
        HBaseUtil.java
        HBaseGetOperatorTest.java
        HBaseTransactionalPutOperatorTest.java
        redis
        RedisOperatorTest.java
        RedisInputOperatorTest.java
        RedisPOJOOperatorTest.java
        rabbitmq
        RabbitMQOutputOperatorBenchmark.java
        RabbitMQInputOperatorBenchmark.java
        RabbitMQInputOperatorTest.java
        RabbitMQOutputOperatorTest.java
        couchbase
        TestComplexPojoInput.java
        CouchBaseSetTest.java
        CouchBasePOJOTest.java
        CouchBaseOutputOperatorTest.java
        CouchBaseGetTest.java
        TestPojo.java
        CouchBaseInputOperatorTest.java
        memsql
        MemsqlInputOperator.java
        TestInputPojo.java
        AbstractMemsqlOutputOperatorTest.java
        AbstractMemsqlInputOperatorTest.java
        MemsqlStoreTest.java
        formatter
        CsvFormatterTest.java
        ruby
        RubyOperatorTest.java
        cassandra
        AbstractUpsertOutputOperatorCountersTest.java
        CounterColumnUpdatesOperator.java
        CompositePrimaryKeyRow.java
        FullNameCodec.java
        UserUpsertOperator.java
        CounterColumnTableEntry.java
        Address.java
        CassandraOperatorTest.java
        FullName.java
        AbstractUpsertOutputOperatorCompositePKTest.java
        CassandraLookupCacheBackedOperatorTest.java
        AbstractUpsertOutputOperatorCodecsTest.java
        TestInputPojo.java
        AddressCodec.java
        User.java
        CompositePrimaryKeyUpdateOperator.java
        couchdb
        CouchDBInputOperatorTest.java
        CouchDBOutputOperatorTest.java
        TestInputPOJO.java
        CouchDBPOJOInputOperatorTest.java
        CouchDBTestHelper.java
        PagedCouchDBInputOperatorTest.java
        elasticsearch
        ElasticSearchPercolateTest.java
        ElasticSearchOperatorTest.java
        memcache
        MemcachePOJOOperatorTest.java
        MemcacheOperatorTest.java
        splunk
        SplunkInputFromForwarderTest.java
        SplunkInputOperatorTest.java
        SplunkTcpOutputOperatorTest.java
        util
        TestPOJO.java
        POJOTupleGenerateOperator.java
        TupleCacheOutputOperator.java
        FieldValueSerializableGenerator.java
        TupleGenerator.java
        TupleGenerateCacheOperator.java
        parquet
        ParquetFilePOJOReaderTest.java
        misc
        algo
        LeastFrequentKeyValueMapTest.java
        FirstNTest.java
        LastMatchMapTest.java
        FirstMatchMapTest.java
        MostFrequentKeyValueMapTest.java
        FilterKeysMapTest.java
        InvertIndexArrayTest.java
        DistinctMapTest.java
        MatchMapTest.java
        InvertIndexTest.java
        InsertSortDescTest.java
        FirstTillMatchTest.java
        FilterKeyValsTest.java
        LeastFrequentKeyMapTest.java
        MostFrequentKeyMapTest.java
        SamplerTest.java
        AbstractStreamPatternMatcherTest.java
        FilterKeysHashMapTest.java
        AllAfterMatchMapTest.java
        streamquery
        FullOuterJoinOperatorTest.java
        LeftOuterJoinOperatorTest.java
        OrderByOperatorTest.java
        HavingOperatorTest.java
        GroupByOperatorTest.java
        UpdateOperatorTest.java
        DeleteOperatorTest.java
        SelectTopOperatorTest.java
        RightOuterJoinOperatorTest.java
        advanced
        SelectAverageTest.java
        SelectFirstLastTest.java
        SumIndexTest.java
        CompoundConditionTest.java
        BetweenConditionTest.java
        SelectMaxMinTest.java
        SelectCountTest.java
        NegateIndexTest.java
        InConditionTest.java
        LikeConditionTest.java
        SelectOperatorTest.java
        InnerJoinOperatorTest.java
        math
        ChangeAlertMapTest.java
        SumCountMapTest.java
        CountKeyValTest.java
        ExceptMapTest.java
        CompareMapTest.java
        ChangeTest.java
        QuotientMapTest.java
        ChangeKeyValTest.java
        ChangeAlertKeyValTest.java
        ChangeAlertTest.java
        CompareExceptMapTest.java
        QuotientTest.java
        helper
        MessageQueueTestHelper.java
        CollectorModule.java
        SourceModule.java
        apachelog
        ApplicationLogGeneratorTest.java
        ApplicationLocalLogTest.java
        ApacheLogInputGeneratorTest.java
        zmq
        ApplicationTest.java
        enrich
        JDBCLoaderTest.java
        Order.java
        MapEnricherTest.java
        EmployeeOrder.java
        POJOEnricherTest.java
        FileEnrichmentTest.java
        memcache_whalin
        MemcacheWhalinOperatorTest.java
        r
        RScriptOperatorStrTest.java
        RScriptOperatorBooleanTest.java
        RMinOperatorTest.java
        RScriptOperatorIntTest.java
        RStandardDeviationOperatorTest.java
        RScriptOperatorDoubleArrayTest.java
        RScriptLinearRegressionPredictorTest.java
        RMaxOperatorTest.java
        RScriptOperatorRealTest.java
        aerospike
        AerospikeTestUtils.java
        AerospikePOJONonTransactionalPutOperatorTest.java
        AerospikePOJOTransactionalPutOperatorTest.java
        AerospikeOperatorTest.java
        accumulo
        AccumuloTestHelper.java
        AccumuloOutputOperatorTest.java
        AccumuloTupleCollector.java
        AccumuloRowTupleGenerator.java
        AccumuloInputOperatorTest.java
        AccumuloTuple.java
        mongodb
        TestPOJO.java
        MongoDBInputOperatorTest.java
        MongoDBOutputOperatorTest.java
        jython
        PythonOperatorTest.java
        parser
        RegexParserTest.java
        JsonParserApplicationTest.java
        StreamingJsonParserTest.java
        CommonLogParserTest.java
        LogParserTest.java
        FixedWidthTest.java
        CSVParserTest.java
        CsvPOJOParserTest.java
        JsonParserTest.java
        zmq
        ZeroMQOutputOperatorTest.java
        ZeroMQMessageGenerator.java
        ZeroMQMessageReceiver.java
        ZeroMQOutputOperatorBenchmark.java
        ZeroMQInputOperatorTest.java
        ZeroMQInputOperatorBenchmark.java
        nifi
        demo
        TestNiFiOutputApplication.java
        TestNiFiInputApplication.java
        NiFiSinglePortOutputOperatorTest.java
        NiFiSinglePortInputOperatorTest.java
        mock
        MockSiteToSiteClient.java
        MockDataPacket.java
        MockTransaction.java
  - pom.xml
  - XmlJavadocCommentsExtractor.xsl
- benchmark
  - src
    - main
      - resources
        META-INF
        properties.xml
      - java
        org
        apache
        apex
        benchmark
        RandomWordInputModule.java
        kafka
        KafkaOutputBenchmark.java
        KafkaTestPartitioner.java
        BenchmarkKafkaInputOperator.java
        BenchmarkPartitionableKafkaOutputOperator.java
        KafkaInputBenchmark.java
        script
        RubyOperatorBenchmarkApplication.java
        CouchBaseAppInput.java
        fs
        FSOutputOperatorBenchmark.java
        FSByteOutputOperator.java
        stream
        IntegerOperator.java
        StreamDuplicaterApp.java
        StreamMergeApp.java
        DevNullCounterBenchmark.java
        state
        StoreOperator.java
        ManagedStateBenchmarkApp.java
        FixedTuplesInputOperator.java
        testbench
        HashMapOperator.java
        FilterClassifierApp.java
        EventGeneratorApp.java
        EventIncrementerApp.java
        RandomEventGeneratorApp.java
        EventClassifierNumberToHashDoubleApp.java
        ThroughputCounterApp.java
        FilteredEventClassifierApp.java
        EventClassifierApp.java
        SeedEventGeneratorApp.java
        cassandra
        CassandraOutputOperator.java
        CassandraOutputBenchmarkApplication.java
        WordCountOperator.java
        window
        WindowedOperatorBenchmarkApp.java
        KeyedWindowedOperatorBenchmarkApp.java
        README.md
        AbstractWindowedOperatorBenchmarkApp.java
        AbstractGenerator.java
        algo
        UniqueValueCountBenchmarkApplication.java
        Benchmark.java
        CouchBaseOutputOperator.java
        package-info.java
        RandomMapOutput.java
        hive
        HiveMapInsertBenchmarkingApp.java
        HiveInsertBenchmarkingApp.java
        spillable
        SpillableTestInputOperator.java
        SpillableTestOperator.java
        SpillableBenchmarkApp.java
        ApplicationFixed.java
        aerospike
        AerospikeOutputBenchmarkApplication.java
        AerospikeOutputOperator.java
        CouchBaseAppOutput.java
        CouchBaseInputOperator.java
      - appended-resources
        META-INF
        NOTICE.vm
    - site
      - conf
        dt-site-hive.xml
        dt-site-kafka.xml
        dt-site-couchbase.xml
        dt-site-testbench.xml
        dt-site-memsql.xml
    - test
      - resources
        log4j.properties
      - java
        org
        apache
        apex
        benchmark
        kafka
        KafkaOutputBenchmarkTest.java
        KafkaInputBenchmarkTest.java
        BenchmarkTest.java
        script
        RubyOperatorBenchmarkAppTest.java
        hbase
        HBaseCsvMappingApplication.java
        HBaseApplicationTest.java
        memsql
        MemsqlInputBenchmarkTest.java
        MemsqlOutputBenchmark.java
        MemsqlOutputBenchmarkTest.java
        MemsqlInputBenchmark.java
        state
        ManagedStateBenchmarkAppTest.java
        testbench
        FilteredEventClassifierAppTest.java
        FilterClassifierAppTest.java
        EventGeneratorAppTest.java
        EventClassifierAppTest.java
        EventClassifierNumberToHashDoubleAppTest.java
        EventIncrementerAppTest.java
        ThroughputCounterAppTest.java
        cassandra
        CassandraApplicatonTest.java
        window
        KeyedWindowedOperatorBenchmarkAppTest.java
        WindowedOperatorBenchmarkAppTest.java
        util
        serde
        GenericSerdePerformanceTest.java
        algo
        UniqueValueCountBenchmarkTest.java
        ApplicationFixedTest.java
        hive
        HiveMapBenchmarkTest.java
        HiveInsertBenchmarkTest.java
        spillable
        SpillableDSBenchmarkTest.java
        SpillableBenchmarkAppTester.java
        aerospike
        AerospikeBenchmarkAppTest.java
        CouchBaseBenchmarkTest.java
        accumulo
        AccumuloAppTest.java
        AccumuloApp.java
    - assemble
      - appPackage.xml
  - pom.xml

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.hadoop.io.file.tfile;

import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Comparator;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.io.BoundedByteArrayOutputStream;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.io.file.tfile.DTBCFile.Reader.BlockReader;
import org.apache.hadoop.io.file.tfile.DTBCFile.Writer.BlockAppender;
import org.apache.hadoop.io.file.tfile.Chunk.ChunkDecoder;
import org.apache.hadoop.io.file.tfile.Chunk.ChunkEncoder;
import org.apache.hadoop.io.file.tfile.CompareUtils.BytesComparator;
import org.apache.hadoop.io.file.tfile.CompareUtils.MemcmpRawComparator;
import org.apache.hadoop.io.file.tfile.Utils.Version;
import org.apache.hadoop.io.serializer.JavaSerializationComparator;

/**
 * <ul>
 * <li>The file format of DTFile is same as {@link TFile} with different reader implementation.
 * It reads data block by block and cache the binary block data into memory to speed up the random read.
 *
 * <li>The public api of {@link Reader} is as same as it is in {@link TFile} {@link org.apache.hadoop.io.file.tfile.TFile.Reader} implementation.
 * Besides, it provides getBlockBuffer(), getKeyOffset(), getKeyLength(), getValueOffset(), getValueLength() method
 *  to expose raw block, key, value data to user to avoid unnecessary internal/external data copy
 *
 * <li>In the performance test, It shows no difference in sequential reads and 20x faster in random reads(If most of them hit memory)
 * </ul>
 *
 * A TFile is a container of key-value pairs. Both keys and values are type-less
 * bytes. Keys are restricted to 64KB, value length is not restricted
 * (practically limited to the available disk storage). TFile further provides
 * the following features:
 * <ul>
 * <li>Block Compression.
 * <li>Named meta data blocks.
 * <li>Sorted or unsorted keys.
 * <li>Seek by key or by file offset.
 * </ul>
 * The memory footprint of a TFile includes the following:
 * <ul>
 * <li>Some constant overhead of reading or writing a compressed block.
 * <ul>
 * <li>Each compressed block requires one compression/decompression codec for
 * I/O.
 * <li>Temporary space to buffer the key.
 * <li>Temporary space to buffer the value (for TFile.Writer only). Values are
 * chunk encoded, so that we buffer at most one chunk of user data. By default,
 * the chunk buffer is 1MB. Reading chunked value does not require additional
 * memory.
 * </ul>
 * <li>TFile index, which is proportional to the total number of Data Blocks.
 * The total amount of memory needed to hold the index can be estimated as
 * (56+AvgKeySize)*NumBlocks.
 * <li>MetaBlock index, which is proportional to the total number of Meta
 * Blocks.The total amount of memory needed to hold the index for Meta Blocks
 * can be estimated as (40+AvgMetaBlockName)*NumMetaBlock.
 * </ul>
 * <p>
 * The behavior of TFile can be customized by the following variables through
 * Configuration:
 * <ul>
 * <li><b>tfile.io.chunk.size</b>: Value chunk size. Integer (in bytes). Default
 * to 1MB. Values of the length less than the chunk size is guaranteed to have
 * known value length in read time (See
 * {@link DTFile.Reader.Scanner.Entry#isValueLengthKnown()}).
 * <li><b>tfile.fs.output.buffer.size</b>: Buffer size used for
 * FSDataOutputStream. Integer (in bytes). Default to 256KB.
 * <li><b>tfile.fs.input.buffer.size</b>: Buffer size used for
 * FSDataInputStream. Integer (in bytes). Default to 256KB.
 * </ul>
 * <p>
 * Suggestions on performance optimization.
 * <ul>
 * <li>Minimum block size. We recommend a setting of minimum block size between
 * 256KB to 1MB for general usage. Larger block size is preferred if files are
 * primarily for sequential access. However, it would lead to inefficient random
 * access (because there are more data to decompress). Smaller blocks are good
 * for random access, but require more memory to hold the block index, and may
 * be slower to create (because we must flush the compressor stream at the
 * conclusion of each data block, which leads to an FS I/O flush). Further, due
 * to the internal caching in Compression codec, the smallest possible block
 * size would be around 20KB-30KB.
 * <li>The current implementation does not offer true multi-threading for
 * reading. The implementation uses FSDataInputStream seek()+read(), which is
 * shown to be much faster than positioned-read call in single thread mode.
 * However, it also means that if multiple threads attempt to access the same
 * TFile (using multiple scanners) simultaneously, the actual I/O is carried out
 * sequentially even if they access different DFS blocks.
 * <li>Compression codec. Use "none" if the data is not very compressable (by
 * compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
 * as the starting point for experimenting. "gz" overs slightly better
 * compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
 * decompress, comparing to "lzo".
 * <li>File system buffering, if the underlying FSDataInputStream and
 * FSDataOutputStream is already adequately buffered; or if applications
 * reads/writes keys and values in large buffers, we can reduce the sizes of
 * input/output buffering in TFile layer by setting the configuration parameters
 * "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size".
 * </ul>
 *
 * Some design rationale behind TFile can be found at <a
 * href=https://issues.apache.org/jira/browse/HADOOP-3315>Hadoop-3315</a>.
 *
 * @since 2.0.0
 */
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class DTFile {
  static final Log LOG = LogFactory.getLog(DTFile.class);

  private static final String CHUNK_BUF_SIZE_ATTR = "tfile.io.chunk.size";
  private static final String FS_INPUT_BUF_SIZE_ATTR =
      "tfile.fs.input.buffer.size";
  private static final String FS_OUTPUT_BUF_SIZE_ATTR =
      "tfile.fs.output.buffer.size";

  public static final int DEFAULT_INPUT_FS_BUF_SIZE = 256 * 1024;

  static int getChunkBufferSize(Configuration conf) {
    int ret = conf.getInt(CHUNK_BUF_SIZE_ATTR, 1024 * 1024);
    return (ret > 0) ? ret : 1024 * 1024;
  }

  static int getFSInputBufferSize(Configuration conf) {
    int buffserSize = conf.getInt(FS_INPUT_BUF_SIZE_ATTR, DEFAULT_INPUT_FS_BUF_SIZE);
    if (buffserSize <= 0)
      buffserSize = DEFAULT_INPUT_FS_BUF_SIZE;
    return buffserSize;
  }

  static int getFSOutputBufferSize(Configuration conf) {
    return conf.getInt(FS_OUTPUT_BUF_SIZE_ATTR, 256 * 1024);
  }

  private static final int MAX_KEY_SIZE = 64 * 1024; // 64KB
  static final Version API_VERSION = new Version((short) 1, (short) 0);

  /** compression: gzip */
  public static final String COMPRESSION_GZ = "gz";
  /** compression: lzo */
  public static final String COMPRESSION_LZO = "lzo";
  /** compression: none */
  public static final String COMPRESSION_NONE = "none";
  /** comparator: memcmp */
  public static final String COMPARATOR_MEMCMP = "memcmp";
  /** comparator prefix: java class */
  public static final String COMPARATOR_JCLASS = "jclass:";

  /**
   * Make a raw comparator from a string name.
   *
   * @param name
   *          Comparator name
   * @return A RawComparable comparator.
   */
  static public Comparator<RawComparable> makeComparator(String name) {
    return TFileMeta.makeComparator(name);
  }

  // Prevent the instantiation of TFiles
  private DTFile() {
    // nothing
  }

  /**
   * Get names of supported compression algorithms. The names are acceptable by
   * TFile.Writer.
   *
   * @return Array of strings, each represents a supported compression
   *         algorithm. Currently, the following compression algorithms are
   *         supported.
   *         <ul>
   *         <li>"none" - No compression.
   *         <li>"lzo" - LZO compression.
   *         <li>"gz" - GZIP compression.
   *         </ul>
   */
  public static String[] getSupportedCompressionAlgorithms() {
    return Compression.getSupportedAlgorithms();
  }

  /**
   * TFile Writer.
   */
  @InterfaceStability.Evolving
  public static class Writer implements Closeable {
    // minimum compressed size for a block.
    private final int sizeMinBlock;

    // Meta blocks.
    final TFileIndex tfileIndex;
    final TFileMeta tfileMeta;

    // reference to the underlying BCFile.
    private DTBCFile.Writer writerBCF;

    // current data block appender.
    BlockAppender blkAppender;
    long blkRecordCount;

    // buffers for caching the key.
    BoundedByteArrayOutputStream currentKeyBufferOS;
    BoundedByteArrayOutputStream lastKeyBufferOS;

    // buffer used by chunk codec
    private byte[] valueBuffer;

    /**
     * Writer states. The state always transits in circles: READY -> IN_KEY ->
     * END_KEY -> IN_VALUE -> READY.
     */
    private enum State {
      READY, // Ready to start a new key-value pair insertion.
      IN_KEY, // In the middle of key insertion.
      END_KEY, // Key insertion complete, ready to insert value.
      IN_VALUE, // In value insertion.
      // ERROR, // Error encountered, cannot continue.
      CLOSED, // TFile already closed.
    };

    // current state of Writer.
    State state = State.READY;
    Configuration conf;
    long errorCount = 0;

    /**
     * Constructor
     *
     * @param fsdos
     *          output stream for writing. Must be at position 0.
     * @param minBlockSize
     *          Minimum compressed block size in bytes. A compression block will
     *          not be closed until it reaches this size except for the last
     *          block.
     * @param compressName
     *          Name of the compression algorithm. Must be one of the strings
     *          returned by {@link DTFile#getSupportedCompressionAlgorithms()}.
     * @param comparator
     *          Leave comparator as null or empty string if TFile is not sorted.
     *          Otherwise, provide the string name for the comparison algorithm
     *          for keys. Two kinds of comparators are supported.
     *          <ul>
     *          <li>Algorithmic comparator: binary comparators that is language
     *          independent. Currently, only "memcmp" is supported.
     *          <li>Language-specific comparator: binary comparators that can
     *          only be constructed in specific language. For Java, the syntax
     *          is "jclass:", followed by the class name of the RawComparator.
     *          Currently, we only support RawComparators that can be
     *          constructed through the default constructor (with no
     *          parameters). Parameterized RawComparators such as
     *          {@link WritableComparator} or
     *          {@link JavaSerializationComparator} may not be directly used.
     *          One should write a wrapper class that inherits from such classes
     *          and use its default constructor to perform proper
     *          initialization.
     *          </ul>
     * @param conf
     *          The configuration object.
     * @throws IOException
     */
    public Writer(FSDataOutputStream fsdos, int minBlockSize,
        String compressName, String comparator, Configuration conf)
        throws IOException {
      sizeMinBlock = minBlockSize;
      tfileMeta = new TFileMeta(comparator);
      tfileIndex = new TFileIndex(tfileMeta.getComparator());

      writerBCF = new DTBCFile.Writer(fsdos, compressName, conf);
      currentKeyBufferOS = new BoundedByteArrayOutputStream(MAX_KEY_SIZE);
      lastKeyBufferOS = new BoundedByteArrayOutputStream(MAX_KEY_SIZE);
      this.conf = conf;
    }

    /**
     * Close the Writer. Resources will be released regardless of the exceptions
     * being thrown. Future close calls will have no effect.
     *
     * The underlying FSDataOutputStream is not closed.
     */
    @Override
    public void close() throws IOException {
      if ((state == State.CLOSED)) {
        return;
      }
      try {
        // First try the normal finish.
        // Terminate upon the first Exception.
        if (errorCount == 0) {
          if (state != State.READY) {
            throw new IllegalStateException(
                "Cannot close TFile in the middle of key-value insertion.");
          }

          finishDataBlock(true);

          // first, write out data:TFile.meta
          BlockAppender outMeta =
              writerBCF
                  .prepareMetaBlock(TFileMeta.BLOCK_NAME, COMPRESSION_NONE);
          try {
            tfileMeta.write(outMeta);
          } finally {
            outMeta.close();
          }

          // second, write out data:TFile.index
          BlockAppender outIndex =
              writerBCF.prepareMetaBlock(TFileIndex.BLOCK_NAME);
          try {
            tfileIndex.write(outIndex);
          } finally {
            outIndex.close();
          }

          writerBCF.close();
        }
      } finally {
        IOUtils.cleanup(LOG, blkAppender, writerBCF);
        blkAppender = null;
        writerBCF = null;
        state = State.CLOSED;
      }
    }

    /**
     * Adding a new key-value pair to the TFile. This is synonymous to
     * append(key, 0, key.length, value, 0, value.length)
     *
     * @param key
     *          Buffer for key.
     * @param value
     *          Buffer for value.
     * @throws IOException
     */
    public void append(byte[] key, byte[] value) throws IOException {
      append(key, 0, key.length, value, 0, value.length);
    }

    /**
     * Adding a new key-value pair to TFile.
     *
     * @param key
     *          buffer for key.
     * @param koff
     *          offset in key buffer.
     * @param klen
     *          length of key.
     * @param value
     *          buffer for value.
     * @param voff
     *          offset in value buffer.
     * @param vlen
     *          length of value.
     * @throws IOException
     *           Upon IO errors.
     *           <p>
     *           If an exception is thrown, the TFile will be in an inconsistent
     *           state. The only legitimate call after that would be close
     */
    public void append(byte[] key, int koff, int klen, byte[] value, int voff,
        int vlen) throws IOException {
      if ((koff | klen | (koff + klen) | (key.length - (koff + klen))) < 0) {
        throw new IndexOutOfBoundsException(
            "Bad key buffer offset-length combination.");
      }

      if ((voff | vlen | (voff + vlen) | (value.length - (voff + vlen))) < 0) {
        throw new IndexOutOfBoundsException(
            "Bad value buffer offset-length combination.");
      }

      try {
        DataOutputStream dosKey = prepareAppendKey(klen);
        try {
          ++errorCount;
          dosKey.write(key, koff, klen);
          --errorCount;
        } finally {
          dosKey.close();
        }

        DataOutputStream dosValue = prepareAppendValue(vlen);
        try {
          ++errorCount;
          dosValue.write(value, voff, vlen);
          --errorCount;
        } finally {
          dosValue.close();
        }
      } finally {
        state = State.READY;
      }
    }

    /**
     * Helper class to register key after close call on key append stream.
     */
    private class KeyRegister extends DataOutputStream {
      private final int expectedLength;
      private boolean closed = false;

      public KeyRegister(int len) {
        super(currentKeyBufferOS);
        if (len >= 0) {
          currentKeyBufferOS.reset(len);
        } else {
          currentKeyBufferOS.reset();
        }
        expectedLength = len;
      }

      @Override
      public void close() throws IOException {
        if (closed == true) {
          return;
        }

        try {
          ++errorCount;
          byte[] key = currentKeyBufferOS.getBuffer();
          int len = currentKeyBufferOS.size();
          /**
           * verify length.
           */
          if (expectedLength >= 0 && expectedLength != len) {
            throw new IOException("Incorrect key length: expected="
                + expectedLength + " actual=" + len);
          }

          Utils.writeVInt(blkAppender, len);
          blkAppender.write(key, 0, len);
          if (tfileIndex.getFirstKey() == null) {
            tfileIndex.setFirstKey(key, 0, len);
          }

          if (tfileMeta.isSorted() && tfileMeta.getRecordCount()>0) {
            byte[] lastKey = lastKeyBufferOS.getBuffer();
            int lastLen = lastKeyBufferOS.size();
            if (tfileMeta.getComparator().compare(key, 0, len, lastKey, 0,
                lastLen) < 0) {
              throw new IOException("Keys are not added in sorted order");
            }
          }

          BoundedByteArrayOutputStream tmp = currentKeyBufferOS;
          currentKeyBufferOS = lastKeyBufferOS;
          lastKeyBufferOS = tmp;
          --errorCount;
        } finally {
          closed = true;
          state = State.END_KEY;
        }
      }
    }

    /**
     * Helper class to register value after close call on value append stream.
     */
    private class ValueRegister extends DataOutputStream {
      private boolean closed = false;

      public ValueRegister(OutputStream os) {
        super(os);
      }

      // Avoiding flushing call to down stream.
      @Override
      public void flush() {
        // do nothing
      }

      @Override
      public void close() throws IOException {
        if (closed == true) {
          return;
        }

        try {
          ++errorCount;
          super.close();
          blkRecordCount++;
          // bump up the total record count in the whole file
          tfileMeta.incRecordCount();
          finishDataBlock(false);
          --errorCount;
        } finally {
          closed = true;
          state = State.READY;
        }
      }
    }

    /**
     * Obtain an output stream for writing a key into TFile. This may only be
     * called when there is no active Key appending stream or value appending
     * stream.
     *
     * @param length
     *          The expected length of the key. If length of the key is not
     *          known, set length = -1. Otherwise, the application must write
     *          exactly as many bytes as specified here before calling close on
     *          the returned output stream.
     * @return The key appending output stream.
     * @throws IOException
     *
     */
    public DataOutputStream prepareAppendKey(int length) throws IOException {
      if (state != State.READY) {
        throw new IllegalStateException("Incorrect state to start a new key: "
            + state.name());
      }

      initDataBlock();
      DataOutputStream ret = new KeyRegister(length);
      state = State.IN_KEY;
      return ret;
    }

    /**
     * Obtain an output stream for writing a value into TFile. This may only be
     * called right after a key appending operation (the key append stream must
     * be closed).
     *
     * @param length
     *          The expected length of the value. If length of the value is not
     *          known, set length = -1. Otherwise, the application must write
     *          exactly as many bytes as specified here before calling close on
     *          the returned output stream. Advertising the value size up-front
     *          guarantees that the value is encoded in one chunk, and avoids
     *          intermediate chunk buffering.
     * @throws IOException
     *
     */
    public DataOutputStream prepareAppendValue(int length) throws IOException {
      if (state != State.END_KEY) {
        throw new IllegalStateException(
            "Incorrect state to start a new value: " + state.name());
      }

      DataOutputStream ret;

      // unknown length
      if (length < 0) {
        if (valueBuffer == null) {
          valueBuffer = new byte[getChunkBufferSize(conf)];
        }
        ret = new ValueRegister(new ChunkEncoder(blkAppender, valueBuffer));
      } else {
        ret =
            new ValueRegister(new Chunk.SingleChunkEncoder(blkAppender, length));
      }

      state = State.IN_VALUE;
      return ret;
    }

    /**
     * Obtain an output stream for creating a meta block. This function may not
     * be called when there is a key append stream or value append stream
     * active. No more key-value insertion is allowed after a meta data block
     * has been added to TFile.
     *
     * @param name
     *          Name of the meta block.
     * @param compressName
     *          Name of the compression algorithm to be used. Must be one of the
     *          strings returned by
     *          {@link DTFile#getSupportedCompressionAlgorithms()}.
     * @return A DataOutputStream that can be used to write Meta Block data.
     *         Closing the stream would signal the ending of the block.
     * @throws IOException
     * @throws MetaBlockAlreadyExists
     *           the Meta Block with the same name already exists.
     */
    public DataOutputStream prepareMetaBlock(String name, String compressName)
        throws IOException, MetaBlockAlreadyExists {
      if (state != State.READY) {
        throw new IllegalStateException(
            "Incorrect state to start a Meta Block: " + state.name());
      }

      finishDataBlock(true);
      DataOutputStream outputStream =
          writerBCF.prepareMetaBlock(name, compressName);
      return outputStream;
    }

    /**
     * Obtain an output stream for creating a meta block. This function may not
     * be called when there is a key append stream or value append stream
     * active. No more key-value insertion is allowed after a meta data block
     * has been added to TFile. Data will be compressed using the default
     * compressor as defined in Writer's constructor.
     *
     * @param name
     *          Name of the meta block.
     * @return A DataOutputStream that can be used to write Meta Block data.
     *         Closing the stream would signal the ending of the block.
     * @throws IOException
     * @throws MetaBlockAlreadyExists
     *           the Meta Block with the same name already exists.
     */
    public DataOutputStream prepareMetaBlock(String name) throws IOException,
        MetaBlockAlreadyExists {
      if (state != State.READY) {
        throw new IllegalStateException(
            "Incorrect state to start a Meta Block: " + state.name());
      }

      finishDataBlock(true);
      return writerBCF.prepareMetaBlock(name);
    }

    /**
     * Check if we need to start a new data block.
     *
     * @throws IOException
     */
    private void initDataBlock() throws IOException {
      // for each new block, get a new appender
      if (blkAppender == null) {
        blkAppender = writerBCF.prepareDataBlock();
      }
    }

    /**
     * Close the current data block if necessary.
     *
     * @param bForceFinish
     *          Force the closure regardless of the block size.
     * @throws IOException
     */
    void finishDataBlock(boolean bForceFinish) throws IOException {
      if (blkAppender == null) {
        return;
      }

      // exceeded the size limit, do the compression and finish the block
      if (bForceFinish || blkAppender.getCompressedSize() >= sizeMinBlock) {
        // keep tracks of the last key of each data block, no padding
        // for now
        TFileIndexEntry keyLast =
            new TFileIndexEntry(lastKeyBufferOS.getBuffer(), 0, lastKeyBufferOS
                .size(), blkRecordCount);
        tfileIndex.addEntry(keyLast);
        // close the appender
        blkAppender.close();
        blkAppender = null;
        blkRecordCount = 0;
      }
    }
  }

  /**
   * TFile Reader. Users may only read TFiles by creating TFile.Reader.Scanner.
   * objects. A scanner may scan the whole TFile ({@link Reader#createScanner()}
   * ) , a portion of TFile based on byte offsets (
   * {@link Reader#createScannerByByteRange(long, long)}), or a portion of TFile with keys
   * fall in a certain key range (for sorted TFile only,
   * {@link Reader#createScannerByKey(byte[], byte[])} or
   * {@link Reader#createScannerByKey(RawComparable, RawComparable)}).
   */
  @InterfaceStability.Evolving
  public static class Reader implements Closeable {
    // The underlying BCFile reader.
    final DTBCFile.Reader readerBCF;

    // TFile index, it is loaded lazily.
    TFileIndex tfileIndex = null;
    final TFileMeta tfileMeta;
    final BytesComparator comparator;

    // global begin and end locations.
    private final Location begin;
    private final Location end;

    /**
     * Location representing a virtual position in the TFile.
     */
    static final class Location implements Comparable<Location>, Cloneable {
      private int blockIndex;
      // distance/offset from the beginning of the block
      private long recordIndex;

      Location(int blockIndex, long recordIndex) {
        set(blockIndex, recordIndex);
      }

      void incRecordIndex() {
        ++recordIndex;
      }

      Location(Location other) {
        set(other);
      }

      int getBlockIndex() {
        return blockIndex;
      }

      long getRecordIndex() {
        return recordIndex;
      }

      void set(int blockIndex, long recordIndex) {
        if ((blockIndex | recordIndex) < 0) {
          throw new IllegalArgumentException(
              "Illegal parameter for BlockLocation.");
        }
        this.blockIndex = blockIndex;
        this.recordIndex = recordIndex;
      }

      void set(Location other) {
        set(other.blockIndex, other.recordIndex);
      }

      /**
       * @see java.lang.Comparable#compareTo(java.lang.Object)
       */
      @Override
      public int compareTo(Location other) {
        return compareTo(other.blockIndex, other.recordIndex);
      }

      int compareTo(int bid, long rid) {
        if (this.blockIndex == bid) {
          long ret = this.recordIndex - rid;
          if (ret > 0) return 1;
          if (ret < 0) return -1;
          return 0;
        }
        return this.blockIndex - bid;
      }

      /**
       * @see java.lang.Object#clone()
       */
      @Override
      protected Location clone() throws CloneNotSupportedException {
        return (Location)super.clone();
      }

      /**
       * @see java.lang.Object#hashCode()
       */
      @Override
      public int hashCode() {
        final int prime = 31;
        int result = prime + blockIndex;
        result = (int) (prime * result + recordIndex);
        return result;
      }

      /**
       * @see java.lang.Object#equals(java.lang.Object)
       */
      @Override
      public boolean equals(Object obj) {
        if (this == obj) return true;
        if (obj == null) return false;
        if (getClass() != obj.getClass()) return false;
        Location other = (Location) obj;
        if (blockIndex != other.blockIndex) return false;
        if (recordIndex != other.recordIndex) return false;
        return true;
      }
    }

    /**
     * Constructor
     *
     * @param fsdis
     *          FS input stream of the TFile.
     * @param fileLength
     *          The length of TFile. This is required because we have no easy
     *          way of knowing the actual size of the input file through the
     *          File input stream.
     * @param conf
     * @throws IOException
     */
    public Reader(FSDataInputStream fsdis, long fileLength, Configuration conf)
        throws IOException {
      readerBCF = new DTBCFile.Reader(fsdis, fileLength, conf);

      // first, read TFile meta
      BlockReader brMeta = readerBCF.getMetaBlock(TFileMeta.BLOCK_NAME);
      try {
        tfileMeta = new TFileMeta(brMeta);
      } finally {
        brMeta.close();
      }

      comparator = tfileMeta.getComparator();
      // Set begin and end locations.
      begin = new Location(0, 0);
      end = new Location(readerBCF.getBlockCount(), 0);
    }

    /**
     * Close the reader. The state of the Reader object is undefined after
     * close. Calling close() for multiple times has no effect.
     */
    @Override
    public void close() throws IOException {
      readerBCF.close();
    }

    /**
     * Get the begin location of the TFile.
     *
     * @return If TFile is not empty, the location of the first key-value pair.
     *         Otherwise, it returns end().
     */
    Location begin() {
      return begin;
    }

    /**
     * Get the end location of the TFile.
     *
     * @return The location right after the last key-value pair in TFile.
     */
    Location end() {
      return end;
    }

    /**
     * Get the string representation of the comparator.
     *
     * @return If the TFile is not sorted by keys, an empty string will be
     *         returned. Otherwise, the actual comparator string that is
     *         provided during the TFile creation time will be returned.
     */
    public String getComparatorName() {
      return tfileMeta.getComparatorString();
    }

    /**
     * Is the TFile sorted?
     *
     * @return true if TFile is sorted.
     */
    public boolean isSorted() {
      return tfileMeta.isSorted();
    }

    /**
     * Get the number of key-value pair entries in TFile.
     *
     * @return the number of key-value pairs in TFile
     */
    public long getEntryCount() {
      return tfileMeta.getRecordCount();
    }

    /**
     * Lazily loading the TFile index.
     *
     * @throws IOException
     */
    synchronized void checkTFileDataIndex() throws IOException {
      if (tfileIndex == null) {
        BlockReader brIndex = readerBCF.getMetaBlock(TFileIndex.BLOCK_NAME);
        try {
          tfileIndex =
              new TFileIndex(readerBCF.getBlockCount(), brIndex, tfileMeta
                  .getComparator());
        } finally {
          brIndex.close();
        }
      }
    }

    /**
     * Get the first key in the TFile.
     *
     * @return The first key in the TFile.
     * @throws IOException
     */
    public RawComparable getFirstKey() throws IOException {
      checkTFileDataIndex();
      return tfileIndex.getFirstKey();
    }

    /**
     * Get the last key in the TFile.
     *
     * @return The last key in the TFile.
     * @throws IOException
     */
    public RawComparable getLastKey() throws IOException {
      checkTFileDataIndex();
      return tfileIndex.getLastKey();
    }

    /**
     * Get a Comparator object to compare Entries. It is useful when you want
     * stores the entries in a collection (such as PriorityQueue) and perform
     * sorting or comparison among entries based on the keys without copying out
     * the key.
     *
     * @return An Entry Comparator..
     */
    public Comparator<Scanner.Entry> getEntryComparator() {
      if (!isSorted()) {
        throw new RuntimeException(
            "Entries are not comparable for unsorted TFiles");
      }

      return new Comparator<Scanner.Entry>() {
        /**
         * Provide a customized comparator for Entries. This is useful if we
         * have a collection of Entry objects. However, if the Entry objects
         * come from different TFiles, users must ensure that those TFiles share
         * the same RawComparator.
         */
        @Override
        public int compare(Scanner.Entry o1, Scanner.Entry o2) {
          return comparator.compare(o1.getBlockBuffer(), o1.getKeyOffset(), o1.getKeyLength(), o2
              .getBlockBuffer(), o2.getKeyOffset(), o2.getKeyLength());
        }
      };
    }

    /**
     * Get an instance of the RawComparator that is constructed based on the
     * string comparator representation.
     *
     * @return a Comparator that can compare RawComparable's.
     */
    public Comparator<RawComparable> getComparator() {
      return comparator;
    }

    /**
     * Stream access to a meta block.``
     *
     * @param name
     *          The name of the meta block.
     * @return The input stream.
     * @throws IOException
     *           on I/O error.
     * @throws MetaBlockDoesNotExist
     *           If the meta block with the name does not exist.
     */
    public DataInputStream getMetaBlock(String name) throws IOException,
        MetaBlockDoesNotExist {
      return readerBCF.getMetaBlock(name);
    }

    /**
     * if greater is true then returns the beginning location of the block
     * containing the key strictly greater than input key. if greater is false
     * then returns the beginning location of the block greater than equal to
     * the input key
     *
     * @param key
     *          the input key
     * @param greater
     *          boolean flag
     * @return
     * @throws IOException
     */
    Location getBlockContainsKey(RawComparable key, boolean greater)
        throws IOException {
      if (!isSorted()) {
        throw new RuntimeException("Seeking in unsorted TFile");
      }
      checkTFileDataIndex();
      int blkIndex =
          (greater) ? tfileIndex.upperBound(key) : tfileIndex.lowerBound(key);
      if (blkIndex < 0) return end;
      return new Location(blkIndex, 0);
    }

    Location getLocationByRecordNum(long recNum) throws IOException {
      checkTFileDataIndex();
      return tfileIndex.getLocationByRecordNum(recNum);
    }

    long getRecordNumByLocation(Location location) throws IOException {
      checkTFileDataIndex();
      return tfileIndex.getRecordNumByLocation(location);
    }

    int compareKeys(byte[] a, int o1, int l1, byte[] b, int o2, int l2) {
      if (!isSorted()) {
        throw new RuntimeException("Cannot compare keys for unsorted TFiles.");
      }
      return comparator.compare(a, o1, l1, b, o2, l2);
    }

    int compareKeys(RawComparable a, RawComparable b) {
      if (!isSorted()) {
        throw new RuntimeException("Cannot compare keys for unsorted TFiles.");
      }
      return comparator.compare(a, b);
    }

    /**
     * Get the location pointing to the beginning of the first key-value pair in
     * a compressed block whose byte offset in the TFile is greater than or
     * equal to the specified offset.
     *
     * @param offset
     *          the user supplied offset.
     * @return the location to the corresponding entry; or end() if no such
     *         entry exists.
     */
    Location getLocationNear(long offset) {
      int blockIndex = readerBCF.getBlockIndexNear(offset);
      if (blockIndex == -1) return end;
      return new Location(blockIndex, 0);
    }

    /**
     * Get the RecordNum for the first key-value pair in a compressed block
     * whose byte offset in the TFile is greater than or equal to the specified
     * offset.
     *
     * @param offset
     *          the user supplied offset.
     * @return the RecordNum to the corresponding entry. If no such entry
     *         exists, it returns the total entry count.
     * @throws IOException
     */
    public long getRecordNumNear(long offset) throws IOException {
      return getRecordNumByLocation(getLocationNear(offset));
    }

    /**
     * Get a sample key that is within a block whose starting offset is greater
     * than or equal to the specified offset.
     *
     * @param offset
     *          The file offset.
     * @return the key that fits the requirement; or null if no such key exists
     *         (which could happen if the offset is close to the end of the
     *         TFile).
     * @throws IOException
     */
    public RawComparable getKeyNear(long offset) throws IOException {
      int blockIndex = readerBCF.getBlockIndexNear(offset);
      if (blockIndex == -1) return null;
      checkTFileDataIndex();
      return new ByteArray(tfileIndex.getEntry(blockIndex).key);
    }

    /**
     * Get a scanner than can scan the whole TFile.
     *
     * @return The scanner object. A valid Scanner is always returned even if
     *         the TFile is empty.
     * @throws IOException
     */
    public Scanner createScanner() throws IOException {
      return new Scanner(this, begin, end);
    }

    /**
     * Get a scanner that covers a portion of TFile based on byte offsets.
     *
     * @param offset
     *          The beginning byte offset in the TFile.
     * @param length
     *          The length of the region.
     * @return The actual coverage of the returned scanner tries to match the
     *         specified byte-region but always round up to the compression
     *         block boundaries. It is possible that the returned scanner
     *         contains zero key-value pairs even if length is positive.
     * @throws IOException
     */
    public Scanner createScannerByByteRange(long offset, long length) throws IOException {
      return new Scanner(this, offset, offset + length);
    }

    /**
     * Get a scanner that covers a portion of TFile based on keys.
     *
     * @param beginKey
     *          Begin key of the scan (inclusive). If null, scan from the first
     *          key-value entry of the TFile.
     * @param endKey
     *          End key of the scan (exclusive). If null, scan up to the last
     *          key-value entry of the TFile.
     * @return The actual coverage of the returned scanner will cover all keys
     *         greater than or equal to the beginKey and less than the endKey.
     * @throws IOException
     *
     * @deprecated Use {@link #createScannerByKey(byte[], byte[])} instead.
     */
    @Deprecated
    public Scanner createScanner(byte[] beginKey, byte[] endKey)
      throws IOException {
      return createScannerByKey(beginKey, endKey);
    }

    /**
     * Get a scanner that covers a portion of TFile based on keys.
     *
     * @param beginKey
     *          Begin key of the scan (inclusive). If null, scan from the first
     *          key-value entry of the TFile.
     * @param endKey
     *          End key of the scan (exclusive). If null, scan up to the last
     *          key-value entry of the TFile.
     * @return The actual coverage of the returned scanner will cover all keys
     *         greater than or equal to the beginKey and less than the endKey.
     * @throws IOException
     */
    public Scanner createScannerByKey(byte[] beginKey, byte[] endKey)
        throws IOException {
      return createScannerByKey((beginKey == null) ? null : new ByteArray(beginKey,
          0, beginKey.length), (endKey == null) ? null : new ByteArray(endKey,
          0, endKey.length));
    }

    /**
     * Get a scanner that covers a specific key range.
     *
     * @param beginKey
     *          Begin key of the scan (inclusive). If null, scan from the first
     *          key-value entry of the TFile.
     * @param endKey
     *          End key of the scan (exclusive). If null, scan up to the last
     *          key-value entry of the TFile.
     * @return The actual coverage of the returned scanner will cover all keys
     *         greater than or equal to the beginKey and less than the endKey.
     * @throws IOException
     *
     * @deprecated Use {@link #createScannerByKey(RawComparable, RawComparable)}
     *             instead.
     */
    @Deprecated
    public Scanner createScanner(RawComparable beginKey, RawComparable endKey)
        throws IOException {
      return createScannerByKey(beginKey, endKey);
    }

    /**
     * Get a scanner that covers a specific key range.
     *
     * @param beginKey
     *          Begin key of the scan (inclusive). If null, scan from the first
     *          key-value entry of the TFile.
     * @param endKey
     *          End key of the scan (exclusive). If null, scan up to the last
     *          key-value entry of the TFile.
     * @return The actual coverage of the returned scanner will cover all keys
     *         greater than or equal to the beginKey and less than the endKey.
     * @throws IOException
     */
    public Scanner createScannerByKey(RawComparable beginKey, RawComparable endKey)
        throws IOException {
      if ((beginKey != null) && (endKey != null)
          && (compareKeys(beginKey, endKey) >= 0)) {
        return new Scanner(this, beginKey, beginKey);
      }
      return new Scanner(this, beginKey, endKey);
    }

    /**
     * Create a scanner that covers a range of records.
     *
     * @param beginRecNum
     *          The RecordNum for the first record (inclusive).
     * @param endRecNum
     *          The RecordNum for the last record (exclusive). To scan the whole
     *          file, either specify endRecNum==-1 or endRecNum==getEntryCount().
     * @return The TFile scanner that covers the specified range of records.
     * @throws IOException
     */
    public Scanner createScannerByRecordNum(long beginRecNum, long endRecNum)
        throws IOException {
      if (beginRecNum < 0) beginRecNum = 0;
      if (endRecNum < 0 || endRecNum > getEntryCount()) {
        endRecNum = getEntryCount();
      }
      return new Scanner(this, getLocationByRecordNum(beginRecNum),
          getLocationByRecordNum(endRecNum));
    }

    /**
     * The TFile Scanner. The Scanner has an implicit cursor, which, upon
     * creation, points to the first key-value pair in the scan range. If the
     * scan range is empty, the cursor will point to the end of the scan range.
     * <p>
     * Use {@link Scanner#atEnd()} to test whether the cursor is at the end
     * location of the scanner.
     * <p>
     * Use {@link Scanner#advance()} to move the cursor to the next key-value
     * pair (or end if none exists). Use seekTo methods (
     * {@link Scanner#seekTo(byte[])} or
     * {@link Scanner#seekTo(byte[], int, int)}) to seek to any arbitrary
     * location in the covered range (including backward seeking). Use
     * {@link Scanner#rewind()} to seek back to the beginning of the scanner.
     * Use {@link Scanner#seekToEnd()} to seek to the end of the scanner.
     * <p>
     * Actual keys and values may be obtained through {@link Scanner.Entry}
     * object, which is obtained through {@link Scanner#entry()}.
     */
    public static class Scanner implements Closeable {
      // The underlying TFile reader.
      final Reader reader;
      // current block (null if reaching end)
      private BlockReader blkReader;
      private byte[] blockBuffer;

      Location beginLocation;
      Location endLocation;
      Location currentLocation;

      // flag to ensure value is only examined once.
      boolean valueChecked = false;
      // reusable buffer for keys.
//      final byte[] keyBuffer;
      // length of key, -1 means key is invalid.
      int klen = -1;
      int keyOffset = 0;

      static final int MAX_VAL_TRANSFER_BUF_SIZE = 128 * 1024;
      BytesWritable valTransferBuffer;

//      DataInputBuffer keyDataInputStream;
      ChunkDecoder valueBufferInputStream;
      DataInputStream valueDataInputStream;
      // vlen == -1 if unknown.
      int vlen;
      int valueOffset = 0;

      /**
       * Constructor
       *
       * @param reader
       *          The TFile reader object.
       * @param offBegin
       *          Begin byte-offset of the scan.
       * @param offEnd
       *          End byte-offset of the scan.
       * @throws IOException
       *
       *           The offsets will be rounded to the beginning of a compressed
       *           block whose offset is greater than or equal to the specified
       *           offset.
       */
      protected Scanner(Reader reader, long offBegin, long offEnd)
          throws IOException {
        this(reader, reader.getLocationNear(offBegin), reader
            .getLocationNear(offEnd));
      }

      /**
       * Constructor
       *
       * @param reader
       *          The TFile reader object.
       * @param begin
       *          Begin location of the scan.
       * @param end
       *          End location of the scan.
       * @throws IOException
       */
      Scanner(Reader reader, Location begin, Location end) throws IOException {
        this.reader = reader;
        // ensure the TFile index is loaded throughout the life of scanner.
        reader.checkTFileDataIndex();
        beginLocation = begin;
        endLocation = end;

        valTransferBuffer = new BytesWritable();
        // TODO: remember the longest key in a TFile, and use it to replace
        // MAX_KEY_SIZE.
//        keyBuffer = new byte[MAX_KEY_SIZE];
//        keyDataInputStream = new DataInputBuffer();
        valueBufferInputStream = new ChunkDecoder();
        valueDataInputStream = new DataInputStream(valueBufferInputStream);

        if (beginLocation.compareTo(endLocation) >= 0) {
          currentLocation = new Location(endLocation);
        } else {
          currentLocation = new Location(0, 0);
          initBlock(beginLocation.getBlockIndex());
          inBlockAdvance(beginLocation.getRecordIndex());
        }
      }

      /**
       * Constructor
       *
       * @param reader
       *          The TFile reader object.
       * @param beginKey
       *          Begin key of the scan. If null, scan from the first <K,V>
       *          entry of the TFile.
       * @param endKey
       *          End key of the scan. If null, scan up to the last <K, V> entry
       *          of the TFile.
       * @throws IOException
       */
      protected Scanner(Reader reader, RawComparable beginKey,
          RawComparable endKey) throws IOException {
        this(reader, (beginKey == null) ? reader.begin() : reader
            .getBlockContainsKey(beginKey, false), reader.end());
        if (beginKey != null) {
          inBlockAdvance(beginKey, false);
          beginLocation.set(currentLocation);
        }
        if (endKey != null) {
          seekTo(endKey, false);
          endLocation.set(currentLocation);
          seekTo(beginLocation);
        }
      }

      /**
       * Move the cursor to the first entry whose key is greater than or equal
       * to the input key. Synonymous to seekTo(key, 0, key.length). The entry
       * returned by the previous entry() call will be invalid.
       *
       * @param key
       *          The input key
       * @return true if we find an equal key.
       * @throws IOException
       */
      public boolean seekTo(byte[] key) throws IOException {
        return seekTo(key, 0, key.length);
      }

      /**
       * Move the cursor to the first entry whose key is greater than or equal
       * to the input key. The entry returned by the previous entry() call will
       * be invalid.
       *
       * @param key
       *          The input key
       * @param keyOffset
       *          offset in the key buffer.
       * @param keyLen
       *          key buffer length.
       * @return true if we find an equal key; false otherwise.
       * @throws IOException
       */
      public boolean seekTo(byte[] key, int keyOffset, int keyLen)
          throws IOException {
        return seekTo(new ByteArray(key, keyOffset, keyLen), false);
      }

      private boolean seekTo(RawComparable key, boolean beyond)
          throws IOException {
        Location l = reader.getBlockContainsKey(key, beyond);
        if (l.compareTo(beginLocation) < 0) {
          l = beginLocation;
        } else if (l.compareTo(endLocation) >= 0) {
          seekTo(endLocation);
          return false;
        }

        // check if what we are seeking is in the later part of the current
        // block.
        if (atEnd() || (l.getBlockIndex() != currentLocation.getBlockIndex())
            || (compareCursorKeyTo(key) >= 0)) {
          // sorry, we must seek to a different location first.
          seekTo(l);
        }

        return inBlockAdvance(key, beyond);
      }

      /**
       * Move the cursor to the new location. The entry returned by the previous
       * entry() call will be invalid.
       *
       * @param l
       *          new cursor location. It must fall between the begin and end
       *          location of the scanner.
       * @throws IOException
       */
      private void seekTo(Location l) throws IOException {
        if (l.compareTo(beginLocation) < 0) {
          throw new IllegalArgumentException(
              "Attempt to seek before the begin location.");
        }

        if (l.compareTo(endLocation) > 0) {
          throw new IllegalArgumentException(
              "Attempt to seek after the end location.");
        }

        if (l.compareTo(endLocation) == 0) {
          parkCursorAtEnd();
          return;
        }

        if (l.getBlockIndex() != currentLocation.getBlockIndex()) {
          // going to a totally different block
          initBlock(l.getBlockIndex());
        } else {
          if (valueChecked) {
            // may temporarily go beyond the last record in the block (in which
            // case the next if loop will always be true).
            inBlockAdvance(1);
          }
          if (l.getRecordIndex() < currentLocation.getRecordIndex()) {
            initBlock(l.getBlockIndex());
          }
        }

        inBlockAdvance(l.getRecordIndex() - currentLocation.getRecordIndex());

        return;
      }

      /**
       * Rewind to the first entry in the scanner. The entry returned by the
       * previous entry() call will be invalid.
       *
       * @throws IOException
       */
      public void rewind() throws IOException {
        seekTo(beginLocation);
      }

      /**
       * Seek to the end of the scanner. The entry returned by the previous
       * entry() call will be invalid.
       *
       * @throws IOException
       */
      public void seekToEnd() throws IOException {
        parkCursorAtEnd();
      }

      /**
       * Move the cursor to the first entry whose key is greater than or equal
       * to the input key. Synonymous to lowerBound(key, 0, key.length). The
       * entry returned by the previous entry() call will be invalid.
       *
       * @param key
       *          The input key
       * @throws IOException
       */
      public void lowerBound(byte[] key) throws IOException {
        lowerBound(key, 0, key.length);
      }

      /**
       * Move the cursor to the first entry whose key is greater than or equal
       * to the input key. The entry returned by the previous entry() call will
       * be invalid.
       *
       * @param key
       *          The input key
       * @param keyOffset
       *          offset in the key buffer.
       * @param keyLen
       *          key buffer length.
       * @throws IOException
       */
      public void lowerBound(byte[] key, int keyOffset, int keyLen)
          throws IOException {
        seekTo(new ByteArray(key, keyOffset, keyLen), false);
      }

      /**
       * Move the cursor to the first entry whose key is strictly greater than
       * the input key. Synonymous to upperBound(key, 0, key.length). The entry
       * returned by the previous entry() call will be invalid.
       *
       * @param key
       *          The input key
       * @throws IOException
       */
      public void upperBound(byte[] key) throws IOException {
        upperBound(key, 0, key.length);
      }

      /**
       * Move the cursor to the first entry whose key is strictly greater than
       * the input key. The entry returned by the previous entry() call will be
       * invalid.
       *
       * @param key
       *          The input key
       * @param keyOffset
       *          offset in the key buffer.
       * @param keyLen
       *          key buffer length.
       * @throws IOException
       */
      public void upperBound(byte[] key, int keyOffset, int keyLen)
          throws IOException {
        seekTo(new ByteArray(key, keyOffset, keyLen), true);
      }

      /**
       * Move the cursor to the next key-value pair. The entry returned by the
       * previous entry() call will be invalid.
       *
       * @return true if the cursor successfully moves. False when cursor is
       *         already at the end location and cannot be advanced.
       * @throws IOException
       */
      public boolean advance() throws IOException {
        if (atEnd()) {
          return false;
        }

        int curBid = currentLocation.getBlockIndex();
        long curRid = currentLocation.getRecordIndex();
        long entriesInBlock = reader.getBlockEntryCount(curBid);
        if (curRid + 1 >= entriesInBlock) {
          if (endLocation.compareTo(curBid + 1, 0) <= 0) {
            // last entry in TFile.
            parkCursorAtEnd();
          } else {
            // last entry in Block.
            initBlock(curBid + 1);
          }
        } else {
          inBlockAdvance(1);
        }
        return true;
      }

      /**
       * Load a compressed block for reading. Expecting blockIndex is valid.
       *
       * @throws IOException
       */
      private void initBlock(int blockIndex) throws IOException {
        klen = -1;
        if (blkReader != null) {
          try {
            blkReader.close();
          } finally {
            blkReader = null;
          }
        }
        blkReader = reader.getBlockReader(blockIndex);
        blockBuffer = blkReader.getBlockDataInputStream().getBuf();
        currentLocation.set(blockIndex, 0);
      }

      private void parkCursorAtEnd() throws IOException {
        klen = -1;
        currentLocation.set(endLocation);
        if (blkReader != null) {
          try {
            blkReader.close();
          } finally {
            blkReader = null;
          }
        }
      }

      /**
       * Close the scanner. Release all resources. The behavior of using the
       * scanner after calling close is not defined. The entry returned by the
       * previous entry() call will be invalid.
       */
      @Override
      public void close() throws IOException {
        parkCursorAtEnd();
      }

      /**
       * Is cursor at the end location?
       *
       * @return true if the cursor is at the end location.
       */
      public boolean atEnd() {
        return (currentLocation.compareTo(endLocation) >= 0);
      }

      /**
       * check whether we have already successfully obtained the key. It also
       * initializes the valueInputStream.
       */
      void checkKey() throws IOException {
        if (klen >= 0) return;
        if (atEnd()) {
          throw new EOFException("No key-value to read");
        }
        klen = -1;
        vlen = -1;
        valueChecked = false;

        klen = Utils.readVInt(blkReader);
        keyOffset = blkReader.getBlockDataInputStream().getPos();
        blkReader.getBlockDataInputStream().skip(klen);
        valueBufferInputStream.reset(blkReader);
        if (valueBufferInputStream.isLastChunk()) {
          vlen = valueBufferInputStream.getRemain();
          valueOffset = blkReader.getBlockDataInputStream().getPos();
        }
      }

      /**
       * Get an entry to access the key and value.
       *
       * @return The Entry object to access the key and value.
       * @throws IOException
       */
      public Entry entry() throws IOException {
        checkKey();
        return new Entry();
      }

      /**
       * Get the RecordNum corresponding to the entry pointed by the cursor.
       * @return The RecordNum corresponding to the entry pointed by the cursor.
       * @throws IOException
       */
      public long getRecordNum() throws IOException {
        return reader.getRecordNumByLocation(currentLocation);
      }

      /**
       * Internal API. Comparing the key at cursor to user-specified key.
       *
       * @param other
       *          user-specified key.
       * @return negative if key at cursor is smaller than user key; 0 if equal;
       *         and positive if key at cursor greater than user key.
       * @throws IOException
       */
      int compareCursorKeyTo(RawComparable other) throws IOException {
        checkKey();
        return reader.compareKeys(blockBuffer, keyOffset, klen, other.buffer(), other
            .offset(), other.size());
      }

      /**
       * Entry to a &lt;Key, Value&gt; pair.
       */
      public class Entry implements Comparable<RawComparable> {
        /**
         * Get the length of the key.
         *
         * @return the length of the key.
         */
        public int getKeyLength() {
          return klen;
        }

        public int getKeyOffset() {
          return keyOffset;
        }

        public int getValueOffset() {
          return valueOffset;
        }

        public byte[] getBlockBuffer() {
          return blockBuffer;
        }

        /**
         * Copy the key and value in one shot into BytesWritables. This is
         * equivalent to getKey(key); getValue(value);
         *
         * @param key
         *          BytesWritable to hold key.
         * @param value
         *          BytesWritable to hold value
         * @throws IOException
         */
        public void get(BytesWritable key, BytesWritable value)
            throws IOException {
          getKey(key);
          getValue(value);
        }

        /**
         * Copy the key into BytesWritable. The input BytesWritable will be
         * automatically resized to the actual key size.
         *
         * @param key
         *          BytesWritable to hold the key.
         * @throws IOException
         */
        public int getKey(BytesWritable key) throws IOException {
          key.setSize(getKeyLength());
          getKey(key.getBytes());
          return key.getLength();
        }

        /**
         * Copy the value into BytesWritable. The input BytesWritable will be
         * automatically resized to the actual value size. The implementation
         * directly uses the buffer inside BytesWritable for storing the value.
         * The call does not require the value length to be known.
         *
         * @param value
         * @throws IOException
         */
        public long getValue(BytesWritable value) throws IOException {
          DataInputStream dis = getValueStream();
          int size = 0;
          try {
            int remain;
            while ((remain = valueBufferInputStream.getRemain()) > 0) {
              value.setSize(size + remain);
              dis.readFully(value.getBytes(), size, remain);
              size += remain;
            }
            return value.getLength();
          } finally {
            dis.close();
          }
        }

        /**
         * Writing the key to the output stream. This method avoids copying key
         * buffer from Scanner into user buffer, then writing to the output
         * stream.
         *
         * @param out
         *          The output stream
         * @return the length of the key.
         * @throws IOException
         */
        public int writeKey(OutputStream out) throws IOException {
          out.write(blockBuffer, keyOffset, klen);
          return klen;
        }

        /**
         * Writing the value to the output stream. This method avoids copying
         * value data from Scanner into user buffer, then writing to the output
         * stream. It does not require the value length to be known.
         *
         * @param out
         *          The output stream
         * @return the length of the value
         * @throws IOException
         */
        public long writeValue(OutputStream out) throws IOException {
          DataInputStream dis = getValueStream();
          long size = 0;
          try {
            int chunkSize;
            while ((chunkSize = valueBufferInputStream.getRemain()) > 0) {
              chunkSize = Math.min(chunkSize, MAX_VAL_TRANSFER_BUF_SIZE);
              valTransferBuffer.setSize(chunkSize);
              dis.readFully(valTransferBuffer.getBytes(), 0, chunkSize);
              out.write(valTransferBuffer.getBytes(), 0, chunkSize);
              size += chunkSize;
            }
            return size;
          } finally {
            dis.close();
          }
        }

        /**
         * Copy the key into user supplied buffer.
         *
         * @param buf
         *          The buffer supplied by user. The length of the buffer must
         *          not be shorter than the key length.
         * @return The length of the key.
         *
         * @throws IOException
         */
        public int getKey(byte[] buf) throws IOException {
          return getKey(buf, 0);
        }

        /**
         * Copy the key into user supplied buffer.
         *
         * @param buf
         *          The buffer supplied by user.
         * @param offset
         *          The starting offset of the user buffer where we should copy
         *          the key into. Requiring the key-length + offset no greater
         *          than the buffer length.
         * @return The length of the key.
         * @throws IOException
         */
        public int getKey(byte[] buf, int offset) throws IOException {
          if ((offset | (buf.length - offset - klen)) < 0) {
            throw new IndexOutOfBoundsException(
                "Bufer not enough to store the key");
          }
          System.arraycopy(blockBuffer, keyOffset, buf, offset, klen);
          return klen;
        }

        /**
         * Streaming access to the key. Useful for desrializing the key into
         * user objects.
         *
         * @return The input stream.
         */
//        public DataInputStream getKeyStream() {
//          keyDataInputStream.reset(keyBuffer, klen);
//          return keyDataInputStream;
//        }

        /**
         * Get the length of the value. isValueLengthKnown() must be tested
         * true.
         *
         * @return the length of the value.
         */
        public int getValueLength() {
          if (vlen >= 0) {
            return vlen;
          }

          throw new RuntimeException("Value length unknown.");
        }

        /**
         * Copy value into user-supplied buffer. User supplied buffer must be
         * large enough to hold the whole value. The value part of the key-value
         * pair pointed by the current cursor is not cached and can only be
         * examined once. Calling any of the following functions more than once
         * without moving the cursor will result in exception:
         * {@link #getValue(byte[])}, {@link #getValue(byte[], int)},
         * {@link #getValueStream}.
         *
         * @return the length of the value. Does not require
         *         isValueLengthKnown() to be true.
         * @throws IOException
         *
         */
        public int getValue(byte[] buf) throws IOException {
          return getValue(buf, 0);
        }

        /**
         * Copy value into user-supplied buffer. User supplied buffer must be
         * large enough to hold the whole value (starting from the offset). The
         * value part of the key-value pair pointed by the current cursor is not
         * cached and can only be examined once. Calling any of the following
         * functions more than once without moving the cursor will result in
         * exception: {@link #getValue(byte[])}, {@link #getValue(byte[], int)},
         * {@link #getValueStream}.
         *
         * @return the length of the value. Does not require
         *         isValueLengthKnown() to be true.
         * @throws IOException
         */
        public int getValue(byte[] buf, int offset) throws IOException {
          DataInputStream dis = getValueStream();
          try {
            if (isValueLengthKnown()) {
              if ((offset | (buf.length - offset - vlen)) < 0) {
                throw new IndexOutOfBoundsException(
                    "Buffer too small to hold value");
              }
              dis.readFully(buf, offset, vlen);
              return vlen;
            }

            int nextOffset = offset;
            while (nextOffset < buf.length) {
              int n = dis.read(buf, nextOffset, buf.length - nextOffset);
              if (n < 0) {
                break;
              }
              nextOffset += n;
            }
            if (dis.read() >= 0) {
              // attempt to read one more byte to determine whether we reached
              // the
              // end or not.
              throw new IndexOutOfBoundsException(
                  "Buffer too small to hold value");
            }
            return nextOffset - offset;
          } finally {
            dis.close();
          }
        }

        /**
         * Stream access to value. The value part of the key-value pair pointed
         * by the current cursor is not cached and can only be examined once.
         * Calling any of the following functions more than once without moving
         * the cursor will result in exception: {@link #getValue(byte[])},
         * {@link #getValue(byte[], int)}, {@link #getValueStream}.
         *
         * @return The input stream for reading the value.
         * @throws IOException
         */
        public DataInputStream getValueStream() throws IOException {
          if (valueChecked == true) {
            throw new IllegalStateException(
                "Attempt to examine value multiple times.");
          }
          valueChecked = true;
          return valueDataInputStream;
        }

        /**
         * Check whether it is safe to call getValueLength().
         *
         * @return true if value length is known before hand. Values less than
         *         the chunk size will always have their lengths known before
         *         hand. Values that are written out as a whole (with advertised
         *         length up-front) will always have their lengths known in
         *         read.
         */
        public boolean isValueLengthKnown() {
          return (vlen >= 0);
        }

        /**
         * Compare the entry key to another key. Synonymous to compareTo(key, 0,
         * key.length).
         *
         * @param buf
         *          The key buffer.
         * @return comparison result between the entry key with the input key.
         */
        public int compareTo(byte[] buf) {
          return compareTo(buf, 0, buf.length);
        }

        /**
         * Compare the entry key to another key. Synonymous to compareTo(new
         * ByteArray(buf, offset, length)
         *
         * @param buf
         *          The key buffer
         * @param offset
         *          offset into the key buffer.
         * @param length
         *          the length of the key.
         * @return comparison result between the entry key with the input key.
         */
        public int compareTo(byte[] buf, int offset, int length) {
          return compareTo(new ByteArray(buf, offset, length));
        }

        /**
         * Compare an entry with a RawComparable object. This is useful when
         * Entries are stored in a collection, and we want to compare a user
         * supplied key.
         */
        @Override
        public int compareTo(RawComparable key) {
          return reader.compareKeys(blockBuffer, getKeyOffset(), getKeyLength(), key.buffer(),
              key.offset(), key.size());
        }

        /**
         * Compare whether this and other points to the same key value.
         */
        @Override
        public boolean equals(Object other) {
          if (this == other) return true;
          if (!(other instanceof Entry)) return false;
          return ((Entry) other).compareTo(blockBuffer, getKeyOffset(), getKeyLength()) == 0;
        }

        @Override
        public int hashCode() {
          return WritableComparator.hashBytes(blockBuffer, getKeyOffset(), getKeyLength());
        }
      }

      /**
       * Advance cursor by n positions within the block.
       *
       * @param n
       *          Number of key-value pairs to skip in block.
       * @throws IOException
       */
      private void inBlockAdvance(long n) throws IOException {
        for (long i = 0; i < n; ++i) {
          checkKey();
          if (!valueBufferInputStream.isClosed()) {
            valueBufferInputStream.close();
          }
          klen = -1;
          currentLocation.incRecordIndex();
        }
      }

      /**
       * Advance cursor in block until we find a key that is greater than or
       * equal to the input key.
       *
       * @param key
       *          Key to compare.
       * @param greater
       *          advance until we find a key greater than the input key.
       * @return true if we find a equal key.
       * @throws IOException
       */
      private boolean inBlockAdvance(RawComparable key, boolean greater)
          throws IOException {
        int curBid = currentLocation.getBlockIndex();
        long entryInBlock = reader.getBlockEntryCount(curBid);
        if (curBid == endLocation.getBlockIndex()) {
          entryInBlock = endLocation.getRecordIndex();
        }

        while (currentLocation.getRecordIndex() < entryInBlock) {
          int cmp = compareCursorKeyTo(key);
          if (cmp > 0) return false;
          if (cmp == 0 && !greater) return true;
          if (!valueBufferInputStream.isClosed()) {
            valueBufferInputStream.close();
          }
          klen = -1;
          currentLocation.incRecordIndex();
        }

        throw new RuntimeException("Cannot find matching key in block.");
      }
    }

    long getBlockEntryCount(int curBid) {
      return tfileIndex.getEntry(curBid).entries();
    }

    BlockReader getBlockReader(int blockIndex) throws IOException {
      return readerBCF.getDataBlock(blockIndex);
    }
  }

  /**
   * Data structure representing "TFile.meta" meta block.
   */
  static final class TFileMeta {
    final static String BLOCK_NAME = "TFile.meta";
    final Version version;
    private long recordCount;
    private final String strComparator;
    private final BytesComparator comparator;

    // ctor for writes
    public TFileMeta(String comparator) {
      // set fileVersion to API version when we create it.
      version = DTFile.API_VERSION;
      recordCount = 0;
      strComparator = (comparator == null) ? "" : comparator;
      this.comparator = makeComparator(strComparator);
    }

    // ctor for reads
    public TFileMeta(DataInput in) throws IOException {
      version = new Version(in);
      if (!version.compatibleWith(DTFile.API_VERSION)) {
        throw new RuntimeException("Incompatible TFile fileVersion.");
      }
      recordCount = Utils.readVLong(in);
      strComparator = Utils.readString(in);
      comparator = makeComparator(strComparator);
    }

    @SuppressWarnings("unchecked")
    static BytesComparator makeComparator(String comparator) {
      if (comparator.length() == 0) {
        // unsorted keys
        return null;
      }
      if (comparator.equals(COMPARATOR_MEMCMP)) {
        // default comparator
        return new BytesComparator(new MemcmpRawComparator());
      } else if (comparator.startsWith(COMPARATOR_JCLASS)) {
        String compClassName =
            comparator.substring(COMPARATOR_JCLASS.length()).trim();
        try {
          Class compClass = Class.forName(compClassName);
          // use its default ctor to create an instance
          return new BytesComparator((RawComparator<Object>) compClass
              .newInstance());
        } catch (Exception e) {
          throw new IllegalArgumentException(
              "Failed to instantiate comparator: " + comparator + "("
                  + e.toString() + ")");
        }
      } else {
        throw new IllegalArgumentException("Unsupported comparator: "
            + comparator);
      }
    }

    public void write(DataOutput out) throws IOException {
      DTFile.API_VERSION.write(out);
      Utils.writeVLong(out, recordCount);
      Utils.writeString(out, strComparator);
    }

    public long getRecordCount() {
      return recordCount;
    }

    public void incRecordCount() {
      ++recordCount;
    }

    public boolean isSorted() {
      return !strComparator.isEmpty();
    }

    public String getComparatorString() {
      return strComparator;
    }

    public BytesComparator getComparator() {
      return comparator;
    }

    public Version getVersion() {
      return version;
    }
  } // END: class MetaTFileMeta

  /**
   * Data structure representing "TFile.index" meta block.
   */
  static class TFileIndex {
    final static String BLOCK_NAME = "TFile.index";
    private ByteArray firstKey;
    private final ArrayList<TFileIndexEntry> index;
    private final ArrayList<Long> recordNumIndex;
    private final BytesComparator comparator;
    private long sum = 0;

    /**
     * For reading from file.
     *
     * @throws IOException
     */
    public TFileIndex(int entryCount, DataInput in, BytesComparator comparator)
        throws IOException {
      index = new ArrayList<TFileIndexEntry>(entryCount);
      recordNumIndex = new ArrayList<Long>(entryCount);
      int size = Utils.readVInt(in); // size for the first key entry.
      if (size > 0) {
        byte[] buffer = new byte[size];
        in.readFully(buffer);
        DataInputStream firstKeyInputStream =
            new DataInputStream(new ByteArrayInputStream(buffer, 0, size));

        int firstKeyLength = Utils.readVInt(firstKeyInputStream);
        firstKey = new ByteArray(new byte[firstKeyLength]);
        firstKeyInputStream.readFully(firstKey.buffer());

        for (int i = 0; i < entryCount; i++) {
          size = Utils.readVInt(in);
          if (buffer.length < size) {
            buffer = new byte[size];
          }
          in.readFully(buffer, 0, size);
          TFileIndexEntry idx =
              new TFileIndexEntry(new DataInputStream(new ByteArrayInputStream(
                  buffer, 0, size)));
          index.add(idx);
          sum += idx.entries();
          recordNumIndex.add(sum);
        }
      } else {
        if (entryCount != 0) {
          throw new RuntimeException("Internal error");
        }
      }
      this.comparator = comparator;
    }

    /**
     * @param key
     *          input key.
     * @return the ID of the first block that contains key >= input key. Or -1
     *         if no such block exists.
     */
    public int lowerBound(RawComparable key) {
      if (comparator == null) {
        throw new RuntimeException("Cannot search in unsorted TFile");
      }

      if (firstKey == null) {
        return -1; // not found
      }

      int ret = Utils.lowerBound(index, key, comparator);
      if (ret == index.size()) {
        return -1;
      }
      return ret;
    }

    /**
     * @param key
     *          input key.
     * @return the ID of the first block that contains key > input key. Or -1
     *         if no such block exists.
     */
    public int upperBound(RawComparable key) {
      if (comparator == null) {
        throw new RuntimeException("Cannot search in unsorted TFile");
      }

      if (firstKey == null) {
        return -1; // not found
      }

      int ret = Utils.upperBound(index, key, comparator);
      if (ret == index.size()) {
        return -1;
      }
      return ret;
    }

    /**
     * For writing to file.
     */
    public TFileIndex(BytesComparator comparator) {
      index = new ArrayList<TFileIndexEntry>();
      recordNumIndex = new ArrayList<Long>();
      this.comparator = comparator;
    }

    public RawComparable getFirstKey() {
      return firstKey;
    }

    public Reader.Location getLocationByRecordNum(long recNum) {
      int idx = Utils.upperBound(recordNumIndex, recNum);
      long lastRecNum = (idx == 0)? 0: recordNumIndex.get(idx-1);
      return new Reader.Location(idx, recNum-lastRecNum);
    }

    public long getRecordNumByLocation(Reader.Location location) {
      int blkIndex = location.getBlockIndex();
      long lastRecNum = (blkIndex == 0) ? 0: recordNumIndex.get(blkIndex-1);
      return lastRecNum + location.getRecordIndex();
    }

    public void setFirstKey(byte[] key, int offset, int length) {
      firstKey = new ByteArray(new byte[length]);
      System.arraycopy(key, offset, firstKey.buffer(), 0, length);
    }

    public RawComparable getLastKey() {
      if (index.size() == 0) {
        return null;
      }
      return new ByteArray(index.get(index.size() - 1).buffer());
    }

    public void addEntry(TFileIndexEntry keyEntry) {
      index.add(keyEntry);
      sum += keyEntry.entries();
      recordNumIndex.add(sum);
    }

    public TFileIndexEntry getEntry(int bid) {
      return index.get(bid);
    }

    public void write(DataOutput out) throws IOException {
      if (firstKey == null) {
        Utils.writeVInt(out, 0);
        return;
      }

      DataOutputBuffer dob = new DataOutputBuffer();
      Utils.writeVInt(dob, firstKey.size());
      dob.write(firstKey.buffer());
      Utils.writeVInt(out, dob.size());
      out.write(dob.getData(), 0, dob.getLength());

      for (TFileIndexEntry entry : index) {
        dob.reset();
        entry.write(dob);
        Utils.writeVInt(out, dob.getLength());
        out.write(dob.getData(), 0, dob.getLength());
      }
    }
  }

  /**
   * TFile Data Index entry. We should try to make the memory footprint of each
   * index entry as small as possible.
   */
  static final class TFileIndexEntry implements RawComparable {
    final byte[] key;
    // count of <key, value> entries in the block.
    final long kvEntries;

    public TFileIndexEntry(DataInput in) throws IOException {
      int len = Utils.readVInt(in);
      key = new byte[len];
      in.readFully(key, 0, len);
      kvEntries = Utils.readVLong(in);
    }

    // default entry, without any padding
    public TFileIndexEntry(byte[] newkey, int offset, int len, long entries) {
      key = new byte[len];
      System.arraycopy(newkey, offset, key, 0, len);
      this.kvEntries = entries;
    }

    @Override
    public byte[] buffer() {
      return key;
    }

    @Override
    public int offset() {
      return 0;
    }

    @Override
    public int size() {
      return key.length;
    }

    long entries() {
      return kvEntries;
    }

    public void write(DataOutput out) throws IOException {
      Utils.writeVInt(out, key.length);
      out.write(key, 0, key.length);
      Utils.writeVLong(out, kvEntries);
    }
  }

  /**
   * Dumping the TFile information.
   *
   * @param args
   *          A list of TFile paths.
   */
  public static void main(String[] args) {
    System.out.printf("TFile Dumper (TFile %s, BCFile %s)\n", DTFile.API_VERSION
        .toString(), DTBCFile.API_VERSION.toString());
    if (args.length == 0) {
      System.out
          .println("Usage: java ... com.datatorrent.contrib.hdht.tfile.withcache.TFile tfile-path [tfile-path ...]");
      System.exit(0);
    }
    Configuration conf = new Configuration();

    for (String file : args) {
      System.out.println("===" + file + "===");
      try {
        TFileDumper.dumpInfo(file, System.out, conf);
      } catch (IOException e) {
        e.printStackTrace(System.err);
      }
    }
  }
}