org.apache.kylin.cube.cuboid.Cuboid Java Examples

The following examples show how to use org.apache.kylin.cube.cuboid.Cuboid. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CubeStatsReader.java    From kylin with Apache License 2.0 6 votes vote down vote up
private static void printOneCuboidInfo(long parent, long cuboidID, Map<Long, Long> cuboidRows,
        Map<Long, Double> cuboidSizes, int dimensionCount, int depth, PrintWriter out) {
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < depth; i++) {
        sb.append("    ");
    }
    String cuboidName = Cuboid.getDisplayName(cuboidID, dimensionCount);
    sb.append("|---- Cuboid ").append(cuboidName);

    long rowCount = cuboidRows.get(cuboidID);
    double size = cuboidSizes.get(cuboidID);
    sb.append(", est row: ").append(rowCount).append(", est MB: ").append(formatDouble(size));

    if (parent != -1) {
        sb.append(", shrink: ").append(formatDouble(100.0 * cuboidRows.get(cuboidID) / cuboidRows.get(parent)))
                .append("%");
    }

    out.println(sb.toString());
}
 
Example #2
Source File: SequentialCubeTupleIterator.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SequentialCubeTupleIterator(List<CubeSegmentScanner> scanners, Cuboid cuboid,
        Set<TblColRef> selectedDimensions, List<TblColRef> rtGroups, Set<TblColRef> groups, //
        Set<FunctionDesc> selectedMetrics, TupleInfo returnTupleInfo, StorageContext context, SQLDigest sqlDigest) {
    this.context = context;
    this.scanners = scanners;

    Set<TblColRef> selectedDims = Sets.newHashSet(selectedDimensions);
    selectedDims.addAll(rtGroups);

    segmentCubeTupleIterators = Lists.newArrayList();
    for (CubeSegmentScanner scanner : scanners) {
        segmentCubeTupleIterators.add(new SegmentCubeTupleIterator(scanner, cuboid, selectedDims, selectedMetrics, returnTupleInfo, context));
    }

    if (context.mergeSortPartitionResults() && !sqlDigest.isRawQuery) {
        //query with limit
        logger.info("Using SortedIteratorMergerWithLimit to merge segment results");
        Iterator<Iterator<ITuple>> transformed = (Iterator<Iterator<ITuple>>) (Iterator<?>) segmentCubeTupleIterators.iterator();
        tupleIterator = new SortedIteratorMergerWithLimit<ITuple>(transformed, context.getFinalPushDownLimit(), getTupleDimensionComparator(cuboid, groups, returnTupleInfo)).getIterator();
    } else {
        //normal case
        logger.info("Using Iterators.concat to merge segment results");
        tupleIterator = Iterators.concat(segmentCubeTupleIterators.iterator());
    }
}
 
Example #3
Source File: SegmentCubeTupleIterator.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SegmentCubeTupleIterator(CubeSegmentScanner scanner, Cuboid cuboid, Set<TblColRef> selectedDimensions, //
        Set<FunctionDesc> selectedMetrics, TupleInfo returnTupleInfo, StorageContext context) {
    this.scanner = scanner;
    this.cuboid = cuboid;
    this.selectedDimensions = selectedDimensions;
    this.selectedMetrics = selectedMetrics;
    this.tupleInfo = returnTupleInfo;
    this.tuple = new Tuple(returnTupleInfo);
    this.context = context;

    CuboidToGridTableMapping mapping = context.getMapping();
    int[] gtDimsIdx = mapping.getDimIndexes(selectedDimensions);
    int[] gtMetricsIdx = mapping.getMetricsIndexes(selectedMetrics);
    // gtColIdx = gtDimsIdx + gtMetricsIdx
    int[] gtColIdx = new int[gtDimsIdx.length + gtMetricsIdx.length];
    System.arraycopy(gtDimsIdx, 0, gtColIdx, 0, gtDimsIdx.length);
    System.arraycopy(gtMetricsIdx, 0, gtColIdx, gtDimsIdx.length, gtMetricsIdx.length);

    this.gtValues = getGTValuesIterator(scanner.iterator(), scanner.getScanRequest(), gtDimsIdx, gtMetricsIdx);
    this.cubeTupleConverter = ((GTCubeStorageQueryBase) context.getStorageQuery()).newCubeTupleConverter(
            scanner.cubeSeg, cuboid, selectedDimensions, selectedMetrics, gtColIdx, tupleInfo);
}
 
Example #4
Source File: ObserverEnabler.java    From Kylin with Apache License 2.0 6 votes vote down vote up
public static ResultScanner scanWithCoprocessorIfBeneficial(CubeSegment segment, Cuboid cuboid, TupleFilter tupleFiler, //
        Collection<TblColRef> groupBy, Collection<RowValueDecoder> rowValueDecoders, StorageContext context, HTableInterface table, Scan scan) throws IOException {

    if (context.isCoprocessorEnabled() == false) {
        return table.getScanner(scan);
    }

    CoprocessorRowType type = CoprocessorRowType.fromCuboid(segment, cuboid);
    CoprocessorFilter filter = CoprocessorFilter.fromFilter(segment, tupleFiler);
    CoprocessorProjector projector = CoprocessorProjector.makeForObserver(segment, cuboid, groupBy);
    ObserverAggregators aggrs = ObserverAggregators.fromValueDecoders(rowValueDecoders);

    if (DEBUG_LOCAL_COPROCESSOR) {
        RegionScanner innerScanner = new RegionScannerAdapter(table.getScanner(scan));
        AggregationScanner aggrScanner = new AggregationScanner(type, filter, projector, aggrs, innerScanner);
        return new ResultScannerAdapter(aggrScanner);
    } else {
        scan.setAttribute(AggregateRegionObserver.COPROCESSOR_ENABLE, new byte[] { 0x01 });
        scan.setAttribute(AggregateRegionObserver.TYPE, CoprocessorRowType.serialize(type));
        scan.setAttribute(AggregateRegionObserver.PROJECTOR, CoprocessorProjector.serialize(projector));
        scan.setAttribute(AggregateRegionObserver.AGGREGATORS, ObserverAggregators.serialize(aggrs));
        scan.setAttribute(AggregateRegionObserver.FILTER, CoprocessorFilter.serialize(filter));
        return table.getScanner(scan);
    }
}
 
Example #5
Source File: CubeStatsReader.java    From kylin with Apache License 2.0 6 votes vote down vote up
private static Map<Long, Double> getCuboidSizeMapFromRowCount(CubeSegment cubeSegment, Map<Long, Long> rowCountMap,
                                                              long sourceRowCount, boolean origin) {
    final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    final List<Integer> rowkeyColumnSize = Lists.newArrayList();
    final Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc);
    final List<TblColRef> columnList = baseCuboid.getColumns();
    final CubeDimEncMap dimEncMap = cubeSegment.getDimensionEncodingMap();
    final Long baseCuboidRowCount = rowCountMap.get(baseCuboid.getId());

    for (int i = 0; i < columnList.size(); i++) {
        rowkeyColumnSize.add(dimEncMap.get(columnList.get(i)).getLengthOfEncoding());
    }

    Map<Long, Double> sizeMap = Maps.newHashMap();
    for (Map.Entry<Long, Long> entry : rowCountMap.entrySet()) {
        sizeMap.put(entry.getKey(), estimateCuboidStorageSize(cubeSegment, entry.getKey(), entry.getValue(),
                baseCuboid.getId(), baseCuboidRowCount, rowkeyColumnSize, sourceRowCount));
    }

    if (origin == false && cubeSegment.getConfig().enableJobCuboidSizeOptimize()) {
        optimizeSizeMap(sizeMap, cubeSegment);
    }

    return sizeMap;
}
 
Example #6
Source File: StreamingSearchContext.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void calculateHitCuboid() {
    long basicCuboid = Cuboid.getBaseCuboidId(cubeDesc);
    this.setBasicCuboid(basicCuboid);
    if (!cubeDesc.getConfig().isStreamingBuildAdditionalCuboids()) {
        this.setHitCuboid(basicCuboid);
        return;
    }
    long targetCuboidID = identifyCuboid(dimensions);
    Set<Long> mandatoryCuboids = getMandatoryCuboids();
    for (long cuboidID : mandatoryCuboids) {
        if ((targetCuboidID & ~cuboidID) == 0) {
            this.setHitCuboid(cuboidID);
            return;
        }
    }
    this.setHitCuboid(basicCuboid);
}
 
Example #7
Source File: FlinkCubingByLayer.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Override
public void mapPartition(Iterable<Tuple2<ByteArray, Object[]>> iterable, Collector<Tuple2<ByteArray, Object[]>> collector) throws Exception {
    for (Tuple2<ByteArray, Object[]> item : iterable) {
        byte[] key = item.f0.array();
        long cuboidId = rowKeySplitter.parseCuboid(key);
        final List<Long> myChildren = cubeSegment.getCuboidScheduler().getSpanningCuboid(cuboidId);

        // if still empty or null
        if (myChildren == null || myChildren.size() == 0) {
            continue;
        }
        rowKeySplitter.split(key);
        final Cuboid parentCuboid = Cuboid.findForMandatory(cubeDesc, cuboidId);

        for (Long child : myChildren) {
            Cuboid childCuboid = Cuboid.findForMandatory(cubeDesc, child);
            ByteArray result = ndCuboidBuilder.buildKey2(parentCuboid, childCuboid,
                    rowKeySplitter.getSplitBuffers());

            collector.collect(new Tuple2<>(result, item.f1));
        }
    }
}
 
Example #8
Source File: GTCubeStorageQueryBase.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void enableStreamAggregateIfBeneficial(Cuboid cuboid, Set<TblColRef> groupsD, StorageContext context) {
    CubeDesc cubeDesc = cuboid.getCubeDesc();
    boolean enabled = cubeDesc.getConfig().isStreamAggregateEnabled();

    Set<TblColRef> shardByInGroups = Sets.newHashSet();
    for (TblColRef col : cubeDesc.getShardByColumns()) {
        if (groupsD.contains(col)) {
            shardByInGroups.add(col);
        }
    }
    if (!shardByInGroups.isEmpty()) {
        enabled = false;
        logger.debug("Aggregate partition results is not beneficial because shard by columns in groupD: {}",
                shardByInGroups);
    }

    if (!context.isNeedStorageAggregation()) {
        enabled = false;
        logger.debug("Aggregate partition results is not beneficial because no storage aggregation");
    }

    if (enabled) {
        context.enableStreamAggregate();
    }
}
 
Example #9
Source File: RowKeyEncoder.java    From kylin with Apache License 2.0 6 votes vote down vote up
public RowKeyEncoder(CubeSegment cubeSeg, Cuboid cuboid) {
    super(cubeSeg, cuboid);
    enableSharding = cubeSeg.isEnableSharding();
    headerLength = cubeSeg.getRowKeyPreambleSize();
    Set<TblColRef> shardByColumns = cubeSeg.getCubeDesc().getShardByColumns();
    if (shardByColumns.size() > 1) {
        throw new IllegalStateException("Does not support multiple UHC now");
    }
    colIO = new RowKeyColumnIO(cubeSeg.getDimensionEncodingMap());
    for (TblColRef column : cuboid.getColumns()) {
        if (shardByColumns.contains(column)) {
            uhcOffset = bodyLength;
            uhcLength = colIO.getColumnLength(column);
        }
        bodyLength += colIO.getColumnLength(column);
    }
}
 
Example #10
Source File: GTCubeStorageQueryRequest.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public GTCubeStorageQueryRequest(Cuboid cuboid, Set<TblColRef> dimensions, //
        Set<TblColRef> groups, List<TblColRef> dynGroups, List<TupleExpression> dynGroupExprs, //
        Set<TblColRef> filterCols, Set<FunctionDesc> metrics, List<DynamicFunctionDesc> dynFuncs, //
        TupleFilter filter, TupleFilter havingFilter, StorageContext context) {
    this.cuboid = cuboid;
    this.dimensions = dimensions;
    this.groups = groups;
    this.dynGroups = dynGroups;
    this.dynGroupExprs = dynGroupExprs;
    this.filterCols = filterCols;
    this.metrics = metrics;
    this.dynFuncs = dynFuncs;
    this.filter = filter;
    this.havingFilter = havingFilter;
    this.context = context;
}
 
Example #11
Source File: CubeSizeEstimationCLI.java    From Kylin with Apache License 2.0 6 votes vote down vote up
public static long estimatedCubeSize(String cubeName, long[] cardinality) {
    KylinConfig config = KylinConfig.getInstanceFromEnv();
    CubeManager cubeManager = CubeManager.getInstance(config);
    CubeInstance cubeInstance = cubeManager.getCube(cubeName);
    CubeDesc cubeDesc = cubeInstance.getDescriptor();

    CuboidScheduler scheduler = new CuboidScheduler(cubeDesc);
    long baseCuboid = Cuboid.getBaseCuboidId(cubeDesc);
    LinkedList<Long> cuboidQueue = new LinkedList<Long>();
    cuboidQueue.push(baseCuboid);

    long totalSpace = 0;

    while (!cuboidQueue.isEmpty()) {
        long cuboidID = cuboidQueue.pop();
        Collection<Long> spanningCuboid = scheduler.getSpanningCuboid(cuboidID);
        for (Long sc : spanningCuboid) {
            cuboidQueue.push(sc);
        }

        totalSpace += estimateCuboidSpace(cuboidID, cardinality, cubeDesc);
    }
    return totalSpace;
}
 
Example #12
Source File: CubeManager.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public CubeInstance dropCube(String cubeName, boolean deleteDesc) throws IOException {
    try (AutoLock lock = cubeMapLock.lockForWrite()) {
        logger.info("Dropping cube '{}'", cubeName);
        // load projects before remove cube from project

        // delete cube instance and cube desc
        CubeInstance cube = getCube(cubeName);

        // remove cube and update cache
        crud.delete(cube);
        Cuboid.clearCache(cube);

        if (deleteDesc && cube.getDescriptor() != null) {
            CubeDescManager.getInstance(config).removeCubeDesc(cube.getDescriptor());
        }

        // delete cube from project
        ProjectManager.getInstance(config).removeRealizationsFromProjects(RealizationType.CUBE, cubeName);

        return cube;
    }
}
 
Example #13
Source File: SegmentCubeTupleIterator.java    From kylin with Apache License 2.0 6 votes vote down vote up
public SegmentCubeTupleIterator(CubeSegmentScanner scanner, Cuboid cuboid, Set<TblColRef> selectedDimensions, //
        Set<FunctionDesc> selectedMetrics, TupleInfo returnTupleInfo, StorageContext context) {
    this.scanner = scanner;
    this.cuboid = cuboid;
    this.selectedDimensions = selectedDimensions;
    this.selectedMetrics = selectedMetrics;
    this.tupleInfo = returnTupleInfo;
    this.tuple = new Tuple(returnTupleInfo);
    this.context = context;

    CuboidToGridTableMapping mapping = context.getMapping();
    int[] gtDimsIdx = mapping.getDimIndexes(selectedDimensions);
    int[] gtMetricsIdx = mapping.getMetricsIndexes(selectedMetrics);
    // gtColIdx = gtDimsIdx + gtMetricsIdx
    int[] gtColIdx = new int[gtDimsIdx.length + gtMetricsIdx.length];
    System.arraycopy(gtDimsIdx, 0, gtColIdx, 0, gtDimsIdx.length);
    System.arraycopy(gtMetricsIdx, 0, gtColIdx, gtDimsIdx.length, gtMetricsIdx.length);

    this.gtValues = getGTValuesIterator(scanner.iterator(), scanner.getScanRequest(), gtDimsIdx, gtMetricsIdx);
    this.cubeTupleConverter = ((GTCubeStorageQueryBase) context.getStorageQuery()).newCubeTupleConverter(
            scanner.cubeSeg, cuboid, selectedDimensions, selectedMetrics, gtColIdx, tupleInfo);
}
 
Example #14
Source File: GTCubeStorageQueryBase.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void enableStreamAggregateIfBeneficial(Cuboid cuboid, Set<TblColRef> groupsD, StorageContext context) {
    CubeDesc cubeDesc = cuboid.getCubeDesc();
    boolean enabled = cubeDesc.getConfig().isStreamAggregateEnabled();

    Set<TblColRef> shardByInGroups = Sets.newHashSet();
    for (TblColRef col : cubeDesc.getShardByColumns()) {
        if (groupsD.contains(col)) {
            shardByInGroups.add(col);
        }
    }
    if (!shardByInGroups.isEmpty()) {
        enabled = false;
        logger.debug("Aggregate partition results is not beneficial because shard by columns in groupD: {}",
                shardByInGroups);
    }

    if (!context.isNeedStorageAggregation()) {
        enabled = false;
        logger.debug("Aggregate partition results is not beneficial because no storage aggregation");
    }

    if (enabled) {
        context.enableStreamAggregate();
    }
}
 
Example #15
Source File: NDCuboidBuilder.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void buildKeyInternal(Cuboid parentCuboid, Cuboid childCuboid, ByteArray[] splitBuffers, ByteArray newKeyBodyBuf) {
    RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(childCuboid);

    // rowkey columns
    long mask = Long.highestOneBit(parentCuboid.getId());
    long parentCuboidId = parentCuboid.getId();
    long childCuboidId = childCuboid.getId();
    long parentCuboidIdActualLength = (long)Long.SIZE - Long.numberOfLeadingZeros(parentCuboid.getId());
    int index = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboidId
    int offset = RowConstants.ROWKEY_SHARDID_LEN + RowConstants.ROWKEY_CUBOIDID_LEN; // skip shard and cuboidId
    for (int i = 0; i < parentCuboidIdActualLength; i++) {
        if ((mask & parentCuboidId) > 0) {// if the this bit position equals
            // 1
            if ((mask & childCuboidId) > 0) {// if the child cuboid has this
                // column
                System.arraycopy(splitBuffers[index].array(), splitBuffers[index].offset(), newKeyBodyBuf.array(), offset, splitBuffers[index].length());
                offset += splitBuffers[index].length();
            }
            index++;
        }
        mask = mask >> 1;
    }

    rowkeyEncoder.fillHeader(newKeyBodyBuf.array());
}
 
Example #16
Source File: NDCuboidBuilder.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void buildKeyInternal(Cuboid parentCuboid, Cuboid childCuboid, ByteArray[] splitBuffers, ByteArray newKeyBodyBuf) {
    RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(childCuboid);

    // rowkey columns
    long mask = Long.highestOneBit(parentCuboid.getId());
    long parentCuboidId = parentCuboid.getId();
    long childCuboidId = childCuboid.getId();
    long parentCuboidIdActualLength = (long)Long.SIZE - Long.numberOfLeadingZeros(parentCuboid.getId());
    int index = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboidId
    int offset = RowConstants.ROWKEY_SHARDID_LEN + RowConstants.ROWKEY_CUBOIDID_LEN; // skip shard and cuboidId
    for (int i = 0; i < parentCuboidIdActualLength; i++) {
        if ((mask & parentCuboidId) > 0) {// if the this bit position equals
            // 1
            if ((mask & childCuboidId) > 0) {// if the child cuboid has this
                // column
                System.arraycopy(splitBuffers[index].array(), splitBuffers[index].offset(), newKeyBodyBuf.array(), offset, splitBuffers[index].length());
                offset += splitBuffers[index].length();
            }
            index++;
        }
        mask = mask >> 1;
    }

    rowkeyEncoder.fillHeader(newKeyBodyBuf.array());
}
 
Example #17
Source File: CoprocessorProjector.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public static CoprocessorProjector makeForObserver(final CubeSegment cubeSegment, final Cuboid cuboid, final Collection<TblColRef> dimensionColumns) {

        RowKeyEncoder rowKeyMaskEncoder = new RowKeyEncoder(cubeSegment, cuboid) {
            @Override
            public void fillHeader(byte[] bytes) {
                Arrays.fill(bytes, 0, this.getHeaderLength(), (byte) 0xff);
            }

            @Override
            protected void fillColumnValue(TblColRef column, int columnLen, String valueStr, byte[] outputValue, int outputValueOffset) {
                byte bits = dimensionColumns.contains(column) ? (byte) 0xff : 0x00;
                Arrays.fill(outputValue, outputValueOffset, outputValueOffset + columnLen, bits);
            }
        };

        byte[] mask = rowKeyMaskEncoder.encode(new String[cuboid.getColumns().size()]);
        return new CoprocessorProjector(mask, dimensionColumns.size() != 0);
    }
 
Example #18
Source File: AggregationGroup.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public int getBuildLevel() {
    int ret = 1;//base cuboid => partial cube root
    if (this.getPartialCubeFullMask() == Cuboid.getBaseCuboidId(cubeDesc)) {
        ret -= 1;//if partial cube's root is base cuboid, then one round less agg
    }

    ret += getNormalDims().size();
    for (HierarchyMask hierarchyMask : this.hierarchyMasks) {
        ret += hierarchyMask.allMasks.length;
    }
    for (Long joint : joints) {
        if ((joint & this.getHierarchyDimsMask()) == 0) {
            ret += 1;
        }
    }

    return ret;
}
 
Example #19
Source File: RowKeyEncoderProvider.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public RowKeyEncoder getRowkeyEncoder(Cuboid cuboid) {
    RowKeyEncoder rowKeyEncoder = rowKeyEncoders.get(cuboid.getId());
    if (rowKeyEncoder == null) {
        rowKeyEncoder = new RowKeyEncoder(cubeSegment, cuboid);
        rowKeyEncoders.put(cuboid.getId(), rowKeyEncoder);
    }
    return rowKeyEncoder;
}
 
Example #20
Source File: HadoopFileStorageQuery.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public GTCubeStorageQueryRequest getStorageQueryRequest(StorageContext context, SQLDigest sqlDigest,
                                                        TupleInfo returnTupleInfo) {
    context.setStorageQuery(this);

    //cope with queries with no aggregations
    RawQueryLastHacker.hackNoAggregations(sqlDigest, cubeDesc, returnTupleInfo);

    // Customized measure taking effect: e.g. allow custom measures to help raw queries
    notifyBeforeStorageQuery(sqlDigest);

    Collection<TblColRef> groups = sqlDigest.groupbyColumns;
    TupleFilter filter = sqlDigest.filter;

    // build dimension & metrics
    Set<TblColRef> dimensions = new LinkedHashSet<>();
    Set<FunctionDesc> metrics = new LinkedHashSet<>();
    buildDimensionsAndMetrics(sqlDigest, dimensions, metrics);

    // all dimensions = groups + other(like filter) dimensions
    Set<TblColRef> otherDims = Sets.newHashSet(dimensions);
    otherDims.removeAll(groups);

    // expand derived (xxxD means contains host columns only, derived columns were translated)
    Set<TblColRef> derivedPostAggregation = Sets.newHashSet();
    Set<TblColRef> groupsD = expandDerived(groups, derivedPostAggregation);
    Set<TblColRef> otherDimsD = expandDerived(otherDims, derivedPostAggregation);
    otherDimsD.removeAll(groupsD);

    // identify cuboid
    Set<TblColRef> dimensionsD = new LinkedHashSet<>();
    dimensionsD.addAll(groupsD);
    dimensionsD.addAll(otherDimsD);
    Cuboid cuboid = findCuboid(cubeInstance, dimensionsD, metrics);
    context.setCuboid(cuboid);
    return new GTCubeStorageQueryRequest(cuboid, dimensionsD, groupsD, null, null, null,
            metrics, null, null, null, context);
}
 
Example #21
Source File: NDCuboidMapper.java    From Kylin with Apache License 2.0 5 votes vote down vote up
@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    long cuboidId = rowKeySplitter.split(key.getBytes(), key.getLength());
    Cuboid parentCuboid = Cuboid.findById(cubeDesc, cuboidId);

    Collection<Long> myChildren = cuboidScheduler.getSpanningCuboid(cuboidId);

    // if still empty or null
    if (myChildren == null || myChildren.size() == 0) {
        context.getCounter(BatchConstants.MAPREDUCE_COUTNER_GROUP_NAME, "Skipped records").increment(1L);
        skipCounter++;
        if (skipCounter % BatchConstants.COUNTER_MAX == 0) {
            logger.info("Skipped " + skipCounter + " records!");
        }
        return;
    }

    context.getCounter(BatchConstants.MAPREDUCE_COUTNER_GROUP_NAME, "Processed records").increment(1L);

    handleCounter++;
    if (handleCounter % BatchConstants.COUNTER_MAX == 0) {
        logger.info("Handled " + handleCounter + " records!");
    }

    for (Long child : myChildren) {
        Cuboid childCuboid = Cuboid.findById(cubeDesc, child);
        int keyLength = buildKey(parentCuboid, childCuboid, rowKeySplitter.getSplitBuffers());
        outputKey.set(keyBuf, 0, keyLength);
        context.write(outputKey, value);
    }

}
 
Example #22
Source File: BaseCuboidMapper.java    From Kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected void setup(Context context) throws IOException {
    super.publishConfiguration(context.getConfiguration());

    cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
    segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME);
    intermediateTableRowDelimiter = context.getConfiguration().get(BatchConstants.CFG_CUBE_INTERMEDIATE_TABLE_ROW_DELIMITER, Character.toString(BatchConstants.INTERMEDIATE_TABLE_ROW_DELIMITER));
    if (Bytes.toBytes(intermediateTableRowDelimiter).length > 1) {
        throw new RuntimeException("Expected delimiter byte length is 1, but got " + Bytes.toBytes(intermediateTableRowDelimiter).length);
    }

    byteRowDelimiter = Bytes.toBytes(intermediateTableRowDelimiter)[0];

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());

    cube = CubeManager.getInstance(config).getCube(cubeName);
    cubeDesc = cube.getDescriptor();
    cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);

    long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
    baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);

    intermediateTableDesc = new CubeJoinedFlatTableDesc(cube.getDescriptor(), cubeSegment);

    bytesSplitter = new BytesSplitter(200, 4096);
    rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid);

    measureCodec = new MeasureCodec(cubeDesc.getMeasures());
    measures = new Object[cubeDesc.getMeasures().size()];

    int colCount = cubeDesc.getRowkey().getRowKeyColumns().length;
    keyBytesBuf = new byte[colCount][];

    initNullBytes();
}
 
Example #23
Source File: RowKeyEncoderTest.java    From Kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testEncodeWithSlr2() throws Exception {
    CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITH_SLR_READY");
    // CubeSegment seg = cube.getTheOnlySegment();
    CubeDesc cubeDesc = cube.getDescriptor();
    // String data =
    // "1234567892013-08-18Abbigliamento e accessoriDonna: AccessoriSciarpFoulard e ScialliAuctionItalyRegular";
    byte[][] data = new byte[9][];
    data[0] = Bytes.toBytes("123456789");
    data[1] = null;
    data[2] = null;
    data[3] = null;
    data[4] = null;
    data[5] = null;
    data[6] = null;
    data[7] = null;
    data[8] = null;

    long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
    Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
    AbstractRowKeyEncoder rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cube.getFirstSegment(), baseCuboid);

    byte[] encodedKey = rowKeyEncoder.encode(data);
    assertEquals(48, encodedKey.length);
    byte[] sellerId = Arrays.copyOfRange(encodedKey, 8, 26);
    byte[] cuboidId = Arrays.copyOfRange(encodedKey, 0, 8);
    byte[] rest = Arrays.copyOfRange(encodedKey, 26, encodedKey.length);
    assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
    assertEquals(511, Bytes.toLong(cuboidId));
    assertArrayEquals(new byte[] { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, rest);
}
 
Example #24
Source File: InMemCubeBuilder.java    From kylin with Apache License 2.0 5 votes vote down vote up
private GridTable newGridTableByCuboidID(long cuboidID) throws IOException {
    GTInfo info = CubeGridTable.newGTInfo(Cuboid.findForMandatory(cubeDesc, cuboidID),
            new CubeDimEncMap(cubeDesc, dictionaryMap)
    );

    // Below several store implementation are very similar in performance. The ConcurrentDiskStore is the simplest.
    // MemDiskStore store = new MemDiskStore(info, memBudget == null ? MemoryBudgetController.ZERO_BUDGET : memBudget);
    // MemDiskStore store = new MemDiskStore(info, MemoryBudgetController.ZERO_BUDGET);
    IGTStore store = new ConcurrentDiskStore(info);

    GridTable gridTable = new GridTable(info, store);
    return gridTable;
}
 
Example #25
Source File: CubeGridTable.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public static GTInfo newGTInfo(Cuboid cuboid, IDimensionEncodingMap dimEncMap, CuboidToGridTableMapping mapping) {
    GTInfo.Builder builder = GTInfo.builder();
    builder.setTableName("Cuboid " + cuboid.getId());
    builder.setCodeSystem(
            new CubeCodeSystem(mapping.getDimensionEncodings(dimEncMap), mapping.getDependentMetricsMap()));
    builder.setColumns(mapping.getDataTypes());
    builder.setPrimaryKey(mapping.getPrimaryKey());
    builder.enableColumnBlock(mapping.getColumnBlocks());
    if (mapping instanceof CuboidToGridTableMappingExt) {
        builder.enableDynamicDims(((CuboidToGridTableMappingExt) mapping).getDynamicDims());
    }
    return builder.build();
}
 
Example #26
Source File: CubeGridTable.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public static GTInfo newGTInfo(Cuboid cuboid, IDimensionEncodingMap dimEncMap) {
    CuboidToGridTableMapping mapping = new CuboidToGridTableMapping(cuboid);

    GTInfo.Builder builder = GTInfo.builder();
    builder.setTableName("Cuboid " + cuboid.getId());
    builder.setCodeSystem(
            new CubeCodeSystem(mapping.getDimensionEncodings(dimEncMap), mapping.getDependentMetricsMap()));
    builder.setColumns(mapping.getDataTypes());
    builder.setPrimaryKey(mapping.getPrimaryKey());
    builder.enableColumnBlock(mapping.getColumnBlocks());
    return builder.build();
}
 
Example #27
Source File: CoprocessorRowType.java    From kylin with Apache License 2.0 5 votes vote down vote up
public static CoprocessorRowType fromCuboid(CubeSegment seg, Cuboid cuboid) {
    List<TblColRef> colList = cuboid.getColumns();
    TblColRef[] cols = colList.toArray(new TblColRef[colList.size()]);
    RowKeyColumnIO colIO = new RowKeyColumnIO(seg.getDimensionEncodingMap());
    int[] colSizes = new int[cols.length];
    for (int i = 0; i < cols.length; i++) {
        colSizes[i] = colIO.getColumnLength(cols[i]);
    }
    return new CoprocessorRowType(cols, colSizes, seg.getRowKeyPreambleSize());
}
 
Example #28
Source File: RowKeyEncoderTest.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Ignore
@Test
public void testEncodeWithSlr() throws Exception {
    CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITH_SLR_READY");
    // CubeSegment seg = cube.getTheOnlySegment();
    CubeDesc cubeDesc = cube.getDescriptor();
    // String data =
    // "1234567892013-08-18Abbigliamento e accessoriDonna: AccessoriSciarpFoulard e ScialliAuctionItalyRegular";
    String[] data = new String[9];
    data[0] = "123456789";
    data[1] = "2012-12-15";
    data[2] = "11848";
    data[3] = "Health & Beauty";
    data[4] = "Fragrances";
    data[5] = "Women";
    data[6] = "FP-GTC";
    data[7] = "0";
    data[8] = "15";

    long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
    Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId);
    RowKeyEncoder rowKeyEncoder = new RowKeyEncoder(cube.getFirstSegment(), baseCuboid);

    byte[] encodedKey = rowKeyEncoder.encode(data);
    assertEquals(43 + rowKeyEncoder.getHeaderLength(), encodedKey.length);
    byte[] shard = Arrays.copyOfRange(encodedKey, 0, RowConstants.ROWKEY_SHARDID_LEN);
    @SuppressWarnings("unused")
    byte[] sellerId = Arrays.copyOfRange(encodedKey, rowKeyEncoder.getHeaderLength(), 4 + rowKeyEncoder.getHeaderLength());
    byte[] cuboidId = Arrays.copyOfRange(encodedKey, RowConstants.ROWKEY_SHARDID_LEN, rowKeyEncoder.getHeaderLength());
    byte[] rest = Arrays.copyOfRange(encodedKey, 4 + rowKeyEncoder.getHeaderLength(), encodedKey.length);
    assertEquals(0, Bytes.toShort(shard));
    //        assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
    assertEquals(511, Bytes.toLong(cuboidId));
    assertArrayEquals(new byte[] { 11, 55, -13, 49, 49, 56, 52, 56, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, 34, 121, 70, 80, 45, 71, 84, 67, 9, 9, 9, 9, 9, 9, 0, 10, 5 }, rest);
}
 
Example #29
Source File: CubeJoinedFlatTableEnrich.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private void parseCubeDesc() {
    Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc);

    // build index for rowkey columns
    List<TblColRef> cuboidColumns = baseCuboid.getColumns();
    int rowkeyColCount = cubeDesc.getRowkey().getRowKeyColumns().length;
    rowKeyColumnIndexes = new int[rowkeyColCount];
    for (int i = 0; i < rowkeyColCount; i++) {
        TblColRef col = cuboidColumns.get(i);
        rowKeyColumnIndexes[i] = flatDesc.getColumnIndex(col);
    }

    List<MeasureDesc> measures = cubeDesc.getMeasures();
    int measureSize = measures.size();
    measureColumnIndexes = new int[measureSize][];
    for (int i = 0; i < measureSize; i++) {
        FunctionDesc func = measures.get(i).getFunction();
        List<TblColRef> colRefs = func.getParameter().getColRefs();
        if (colRefs == null) {
            measureColumnIndexes[i] = null;
        } else {
            measureColumnIndexes[i] = new int[colRefs.size()];
            for (int j = 0; j < colRefs.size(); j++) {
                TblColRef c = colRefs.get(j);
                measureColumnIndexes[i][j] = flatDesc.getColumnIndex(c);
            }
        }
    }
}
 
Example #30
Source File: InMemCubeBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private GridTable newGridTableByCuboidID(long cuboidID) throws IOException {
    GTInfo info = CubeGridTable.newGTInfo(Cuboid.findForMandatory(cubeDesc, cuboidID),
            new CubeDimEncMap(cubeDesc, dictionaryMap));

    // Below several store implementation are very similar in performance. The ConcurrentDiskStore is the simplest.
    // MemDiskStore store = new MemDiskStore(info, memBudget == null ? MemoryBudgetController.ZERO_BUDGET : memBudget);
    // MemDiskStore store = new MemDiskStore(info, MemoryBudgetController.ZERO_BUDGET);
    IGTStore store = new ConcurrentDiskStore(info);

    GridTable gridTable = new GridTable(info, store);
    return gridTable;
}