Java Code Examples for org.apache.kylin.cube.model.CubeDesc#getMeasures()

The following examples show how to use org.apache.kylin.cube.model.CubeDesc#getMeasures() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KeyValueCreator.java    From kylin with Apache License 2.0 6 votes vote down vote up
public KeyValueCreator(CubeDesc cubeDesc, HBaseColumnDesc colDesc) {

        cfBytes = Bytes.toBytes(colDesc.getColumnFamilyName());
        qBytes = Bytes.toBytes(colDesc.getQualifier());
        timestamp = 0; // use 0 for timestamp

        refIndex = colDesc.getMeasureIndex();
        refMeasures = colDesc.getMeasures();

        codec = new BufferedMeasureCodec(refMeasures);
        colValues = new Object[refMeasures.length];

        isFullCopy = true;
        List<MeasureDesc> measures = cubeDesc.getMeasures();
        for (int i = 0; i < measures.size(); i++) {
            if (refIndex.length <= i || refIndex[i] != i)
                isFullCopy = false;
        }
    }
 
Example 2
Source File: CubeHFileMapper.java    From Kylin with Apache License 2.0 6 votes vote down vote up
public KeyValueCreator(CubeDesc cubeDesc, HBaseColumnDesc colDesc) {

            cfBytes = Bytes.toBytes(colDesc.getColumnFamilyName());
            qBytes = Bytes.toBytes(colDesc.getQualifier());
            timestamp = System.currentTimeMillis();

            List<MeasureDesc> measures = cubeDesc.getMeasures();
            String[] measureNames = getMeasureNames(cubeDesc);
            String[] refs = colDesc.getMeasureRefs();

            refIndex = new int[refs.length];
            refMeasures = new MeasureDesc[refs.length];
            for (int i = 0; i < refs.length; i++) {
                refIndex[i] = indexOf(measureNames, refs[i]);
                refMeasures[i] = measures.get(refIndex[i]);
            }

            codec = new MeasureCodec(refMeasures);
            colValues = new Object[refs.length];

            isFullCopy = true;
            for (int i = 0; i < measures.size(); i++) {
                if (refIndex.length <= i || refIndex[i] != i)
                    isFullCopy = false;
            }
        }
 
Example 3
Source File: InMemCuboidReducer.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Override
protected void doSetup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());
    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase(Locale.ROOT);
    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    CubeDesc cubeDesc = cube.getDescriptor();

    List<MeasureDesc> measuresDescs = cubeDesc.getMeasures();
    codec = new BufferedMeasureCodec(measuresDescs);
    aggs = new MeasureAggregators(measuresDescs);
    input = new Object[measuresDescs.size()];
    result = new Object[measuresDescs.size()];

    outputKey = new Text();
    outputValue = new Text();
}
 
Example 4
Source File: CubeCapabilityChecker.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private static void tryCustomMeasureTypes(Collection<TblColRef> unmatchedDimensions,
        Collection<FunctionDesc> unmatchedAggregations, SQLDigest digest, CubeInstance cube,
        CapabilityResult result) {
    CubeDesc cubeDesc = cube.getDescriptor();
    List<String> influencingMeasures = Lists.newArrayList();
    for (MeasureDesc measure : cubeDesc.getMeasures()) {
        //            if (unmatchedDimensions.isEmpty() && unmatchedAggregations.isEmpty())
        //                break;

        MeasureType<?> measureType = measure.getFunction().getMeasureType();
        if (measureType instanceof BasicMeasureType)
            continue;

        CapabilityInfluence inf = measureType.influenceCapabilityCheck(unmatchedDimensions, unmatchedAggregations,
                digest, measure);
        if (inf != null) {
            result.influences.add(inf);
            influencingMeasures.add(measure.getName() + "@" + measureType.getClass());
        }
    }
    if (influencingMeasures.size() != 0)
        logger.info("Cube {} CapabilityInfluences: {}", cube.getCanonicalName(),
                StringUtils.join(influencingMeasures, ","));
}
 
Example 5
Source File: KeyValueCreator.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public KeyValueCreator(CubeDesc cubeDesc, HBaseColumnDesc colDesc) {

        cfBytes = Bytes.toBytes(colDesc.getColumnFamilyName());
        qBytes = Bytes.toBytes(colDesc.getQualifier());
        timestamp = 0; // use 0 for timestamp

        refIndex = colDesc.getMeasureIndex();
        refMeasures = colDesc.getMeasures();

        codec = new BufferedMeasureCodec(refMeasures);
        colValues = new Object[refMeasures.length];

        isFullCopy = true;
        List<MeasureDesc> measures = cubeDesc.getMeasures();
        for (int i = 0; i < measures.size(); i++) {
            if (refIndex.length <= i || refIndex[i] != i)
                isFullCopy = false;
        }
    }
 
Example 6
Source File: DataController.java    From kylin with Apache License 2.0 5 votes vote down vote up
private FunctionDesc findAggrFuncFromCubeDesc(CubeDesc cubeDesc, FunctionDesc aggrFunc) {
    aggrFunc.init(cubeDesc.getModel());
    for (MeasureDesc measure : cubeDesc.getMeasures()) {
        if (measure.getFunction().equals(aggrFunc))
            return measure.getFunction();
    }
    return aggrFunc;
}
 
Example 7
Source File: CubeSizeEstimationCLI.java    From Kylin with Apache License 2.0 5 votes vote down vote up
private static int getMeasureSpace(CubeDesc cubeDesc) {
    int space = 0;
    for (MeasureDesc measureDesc : cubeDesc.getMeasures()) {
        DataType returnType = measureDesc.getFunction().getReturnDataType();
        space += returnType.getSpaceEstimate();
    }
    return space;
}
 
Example 8
Source File: CubeReducerTest.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testReducer() throws Exception {

    reduceDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, "test_kylin_cube_with_slr_ready");

    CubeDesc cubeDesc = CubeManager.getInstance(getTestConfig()).getCube("test_kylin_cube_with_slr_ready").getDescriptor();
    BufferedMeasureCodec codec = new BufferedMeasureCodec(cubeDesc.getMeasures());

    Text key1 = new Text("72010ustech");
    List<Text> values1 = new ArrayList<Text>();
    values1.add(newValueText(codec, "15.09", "15.09", "15.09", 1, 100));
    values1.add(newValueText(codec, "20.34", "20.34", "20.34", 1, 200));
    values1.add(newValueText(codec, "10", "10", "10", 1, 300));

    Text key2 = new Text("1tech");
    List<Text> values2 = new ArrayList<Text>();
    values2.add(newValueText(codec, "15.09", "15.09", "15.09", 1, 500));
    values2.add(newValueText(codec, "20.34", "20.34", "20.34", 1, 1000));

    Text key3 = new Text("0");
    List<Text> values3 = new ArrayList<Text>();
    values3.add(newValueText(codec, "146.52", "146.52", "146.52", 0, 0));

    reduceDriver.withInput(key1, values1);
    reduceDriver.withInput(key2, values2);
    reduceDriver.withInput(key3, values3);

    List<Pair<Text, Text>> result = reduceDriver.run();

    Pair<Text, Text> p1 = new Pair<Text, Text>(new Text("72010ustech"), newValueText(codec, "45.43", "10", "20.34", 3, 600));
    Pair<Text, Text> p2 = new Pair<Text, Text>(new Text("1tech"), newValueText(codec, "35.43", "15.09", "20.34", 2, 1500));
    Pair<Text, Text> p3 = new Pair<Text, Text>(new Text("0"), newValueText(codec, "146.52", "146.52", "146.52", 0, 0));

    assertEquals(3, result.size());

    assertTrue(result.contains(p1));
    assertTrue(result.contains(p2));
    assertTrue(result.contains(p3));
}
 
Example 9
Source File: RowValueDecoderTest.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecode() throws Exception {
    CubeDesc cubeDesc = CubeManager.getInstance(getTestConfig()).getCube("test_kylin_cube_with_slr_ready").getDescriptor();
    HBaseColumnDesc hbaseCol = cubeDesc.getHbaseMapping().getColumnFamily()[0].getColumns()[0];

    BufferedMeasureCodec codec = new BufferedMeasureCodec(hbaseCol.getMeasures());
    BigDecimal sum = new BigDecimal("333.1234567");
    BigDecimal min = new BigDecimal("333.1111111");
    BigDecimal max = new BigDecimal("333.1999999");
    Long count = new Long(2);
    Long item_count = new Long(100);
    ByteBuffer buf = codec.encode(new Object[] { sum, min, max, count, item_count });

    buf.flip();
    byte[] valueBytes = new byte[buf.limit()];
    System.arraycopy(buf.array(), 0, valueBytes, 0, buf.limit());

    RowValueDecoder rowValueDecoder = new RowValueDecoder(hbaseCol);
    for (MeasureDesc measure : cubeDesc.getMeasures()) {
        FunctionDesc aggrFunc = measure.getFunction();
        int index = hbaseCol.findMeasure(aggrFunc);
        rowValueDecoder.setProjectIndex(index);
    }

    rowValueDecoder.decodeAndConvertJavaObj(valueBytes);
    Object[] measureValues = rowValueDecoder.getValues();
    //BigDecimal.ROUND_HALF_EVEN in BigDecimalSerializer
    assertEquals("[333.1235, 333.1111, 333.2000, 2, 100]", Arrays.toString(measureValues));
}
 
Example 10
Source File: RowValueDecoderTest.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecode() throws Exception {
    CubeDesc cubeDesc = CubeManager.getInstance(getTestConfig()).getCube("test_kylin_cube_with_slr_ready").getDescriptor();
    HBaseColumnDesc hbaseCol = cubeDesc.getHbaseMapping().getColumnFamily()[0].getColumns()[0];

    BufferedMeasureCodec codec = new BufferedMeasureCodec(hbaseCol.getMeasures());
    BigDecimal sum = new BigDecimal("333.1234567");
    BigDecimal min = new BigDecimal("333.1111111");
    BigDecimal max = new BigDecimal("333.1999999");
    Long count = new Long(2);
    Long item_count = new Long(100);
    ByteBuffer buf = codec.encode(new Object[] { sum, min, max, count, item_count });

    buf.flip();
    byte[] valueBytes = new byte[buf.limit()];
    System.arraycopy(buf.array(), 0, valueBytes, 0, buf.limit());

    RowValueDecoder rowValueDecoder = new RowValueDecoder(hbaseCol);
    for (MeasureDesc measure : cubeDesc.getMeasures()) {
        FunctionDesc aggrFunc = measure.getFunction();
        int index = hbaseCol.findMeasure(aggrFunc);
        rowValueDecoder.setProjectIndex(index);
    }

    rowValueDecoder.decodeAndConvertJavaObj(valueBytes);
    Object[] measureValues = rowValueDecoder.getValues();
    //BigDecimal.ROUND_HALF_EVEN in BigDecimalSerializer
    assertEquals("[333.1235, 333.1111, 333.2000, 2, 100]", Arrays.toString(measureValues));
}
 
Example 11
Source File: CubeReducerTest.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Test
public void testReducer() throws Exception {

    reduceDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, "test_kylin_cube_with_slr_ready");

    CubeDesc cubeDesc = CubeManager.getInstance(getTestConfig()).getCube("test_kylin_cube_with_slr_ready").getDescriptor();
    BufferedMeasureCodec codec = new BufferedMeasureCodec(cubeDesc.getMeasures());

    Text key1 = new Text("72010ustech");
    List<Text> values1 = new ArrayList<Text>();
    values1.add(newValueText(codec, "15.09", "15.09", "15.09", 1, 100));
    values1.add(newValueText(codec, "20.34", "20.34", "20.34", 1, 200));
    values1.add(newValueText(codec, "10", "10", "10", 1, 300));

    Text key2 = new Text("1tech");
    List<Text> values2 = new ArrayList<Text>();
    values2.add(newValueText(codec, "15.09", "15.09", "15.09", 1, 500));
    values2.add(newValueText(codec, "20.34", "20.34", "20.34", 1, 1000));

    Text key3 = new Text("0");
    List<Text> values3 = new ArrayList<Text>();
    values3.add(newValueText(codec, "146.52", "146.52", "146.52", 0, 0));

    reduceDriver.withInput(key1, values1);
    reduceDriver.withInput(key2, values2);
    reduceDriver.withInput(key3, values3);

    List<Pair<Text, Text>> result = reduceDriver.run();

    Pair<Text, Text> p1 = new Pair<Text, Text>(new Text("72010ustech"), newValueText(codec, "45.43", "10", "20.34", 3, 600));
    Pair<Text, Text> p2 = new Pair<Text, Text>(new Text("1tech"), newValueText(codec, "35.43", "15.09", "20.34", 2, 1500));
    Pair<Text, Text> p3 = new Pair<Text, Text>(new Text("0"), newValueText(codec, "146.52", "146.52", "146.52", 0, 0));

    assertEquals(3, result.size());

    assertTrue(result.contains(p1));
    assertTrue(result.contains(p2));
    assertTrue(result.contains(p3));
}
 
Example 12
Source File: CubeController.java    From kylin with Apache License 2.0 5 votes vote down vote up
private void validateColumnFamily(CubeDesc cubeDesc) {
    Set<String> columnFamilyMetricsSet = Sets.newHashSet();
    for (HBaseColumnFamilyDesc hBaseColumnFamilyDesc : cubeDesc.getHbaseMapping().getColumnFamily()) {
        for (HBaseColumnDesc hBaseColumnDesc : hBaseColumnFamilyDesc.getColumns()) {
            for (String columnName : hBaseColumnDesc.getMeasureRefs()) {
                columnFamilyMetricsSet.add(columnName);
            }
        }
    }
    for (MeasureDesc measureDesc : cubeDesc.getMeasures()) {
        if (!columnFamilyMetricsSet.contains(measureDesc.getName())) {
            throw new BadRequestException("column family lack measure:" + measureDesc.getName());
        }
    }
    if (cubeDesc.getMeasures().size() != columnFamilyMetricsSet.size()) {
        throw new BadRequestException(
                "the number of input measure and the number of measure defined in cubedesc are not consistent");
    }

    for (RowKeyColDesc rowKeyColDesc : cubeDesc.getRowkey().getRowKeyColumns()) {
        Object[] encodingConf = DimensionEncoding.parseEncodingConf(rowKeyColDesc.getEncoding());
        String encodingName = (String) encodingConf[0];
        String[] encodingArgs = (String[]) encodingConf[1];

        if (!DimensionEncodingFactory.isValidEncoding(encodingName, encodingArgs,
                rowKeyColDesc.getEncodingVersion())) {
            throw new BadRequestException("Illegal row key column desc: " + rowKeyColDesc);
        }
    }
}
 
Example 13
Source File: DataController.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private FunctionDesc findAggrFuncFromCubeDesc(CubeDesc cubeDesc, FunctionDesc aggrFunc) {
    aggrFunc.init(cubeDesc.getModel());
    for (MeasureDesc measure : cubeDesc.getMeasures()) {
        if (measure.getFunction().equals(aggrFunc))
            return measure.getFunction();
    }
    return aggrFunc;
}
 
Example 14
Source File: FlinkCubingByLayer.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
protected void execute(OptionsHelper optionsHelper) throws Exception {
    String metaUrl = optionsHelper.getOptionValue(OPTION_META_URL);
    String hiveTable = optionsHelper.getOptionValue(OPTION_INPUT_TABLE);
    String inputPath = optionsHelper.getOptionValue(OPTION_INPUT_PATH);
    String cubeName = optionsHelper.getOptionValue(OPTION_CUBE_NAME);
    String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID);
    String outputPath = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH);
    String enableObjectReuseOptValue = optionsHelper.getOptionValue(OPTION_ENABLE_OBJECT_REUSE);

    boolean enableObjectReuse = false;
    if (enableObjectReuseOptValue != null && !enableObjectReuseOptValue.isEmpty()) {
        enableObjectReuse = true;
    }

    Job job = Job.getInstance();
    FileSystem fs = HadoopUtil.getWorkingFileSystem();
    HadoopUtil.deletePath(job.getConfiguration(), new Path(outputPath));

    final SerializableConfiguration sConf = new SerializableConfiguration(job.getConfiguration());
    KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);

    final CubeInstance cubeInstance = CubeManager.getInstance(envConfig).getCube(cubeName);
    final CubeDesc cubeDesc = cubeInstance.getDescriptor();
    final CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);

    logger.info("DataSet input path : {}", inputPath);
    logger.info("DataSet output path : {}", outputPath);

    int countMeasureIndex = 0;
    for (MeasureDesc measureDesc : cubeDesc.getMeasures()) {
        if (measureDesc.getFunction().isCount() == true) {
            break;
        } else {
            countMeasureIndex++;
        }
    }

    final CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, envConfig);
    boolean[] needAggr = new boolean[cubeDesc.getMeasures().size()];
    boolean allNormalMeasure = true;
    for (int i = 0; i < cubeDesc.getMeasures().size(); i++) {
        needAggr[i] = !cubeDesc.getMeasures().get(i).getFunction().getMeasureType().onlyAggrInBaseCuboid();
        allNormalMeasure = allNormalMeasure && needAggr[i];
    }

    logger.info("All measure are normal (agg on all cuboids) ? : " + allNormalMeasure);

    boolean isSequenceFile = JoinedFlatTable.SEQUENCEFILE.equalsIgnoreCase(envConfig.getFlatTableStorageFormat());

    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    if (enableObjectReuse) {
        env.getConfig().enableObjectReuse();
    }
    env.getConfig().registerKryoType(PercentileCounter.class);
    env.getConfig().registerTypeWithKryoSerializer(PercentileCounter.class, PercentileCounterSerializer.class);

    DataSet<String[]> hiveDataSet = FlinkUtil.readHiveRecords(isSequenceFile, env, inputPath, hiveTable, job);

    DataSet<Tuple2<ByteArray, Object[]>> encodedBaseDataSet = hiveDataSet.mapPartition(
            new EncodeBaseCuboidMapPartitionFunction(cubeName, segmentId, metaUrl, sConf));

    Long totalCount = 0L;
    if (envConfig.isFlinkSanityCheckEnabled()) {
        totalCount = encodedBaseDataSet.count();
    }

    final BaseCuboidReduceGroupFunction baseCuboidReducerFunction = new BaseCuboidReduceGroupFunction(cubeName, metaUrl, sConf);

    BaseCuboidReduceGroupFunction reducerFunction = baseCuboidReducerFunction;
    if (!allNormalMeasure) {
        reducerFunction = new CuboidReduceGroupFunction(cubeName, metaUrl, sConf, needAggr);
    }

    final int totalLevels = cubeSegment.getCuboidScheduler().getBuildLevel();
    DataSet<Tuple2<ByteArray, Object[]>>[] allDataSets = new DataSet[totalLevels + 1];
    int level = 0;

    // aggregate to calculate base cuboid
    allDataSets[0] = encodedBaseDataSet.groupBy(0).reduceGroup(baseCuboidReducerFunction);

    sinkToHDFS(allDataSets[0], metaUrl, cubeName, cubeSegment, outputPath, 0, Job.getInstance(), envConfig);

    CuboidMapPartitionFunction mapPartitionFunction = new CuboidMapPartitionFunction(cubeName, segmentId, metaUrl, sConf);

    for (level = 1; level <= totalLevels; level++) {
        allDataSets[level] = allDataSets[level - 1].mapPartition(mapPartitionFunction).groupBy(0).reduceGroup(reducerFunction);
        if (envConfig.isFlinkSanityCheckEnabled()) {
            sanityCheck(allDataSets[level], totalCount, level, cubeStatsReader, countMeasureIndex);
        }
        sinkToHDFS(allDataSets[level], metaUrl, cubeName, cubeSegment, outputPath, level, Job.getInstance(), envConfig);
    }

    env.execute("Cubing for : " + cubeName + " segment " + segmentId);
    logger.info("Finished on calculating all level cuboids.");
    logger.info("HDFS: Number of bytes written=" + FlinkBatchCubingJobBuilder2.getFileSize(outputPath, fs));
}
 
Example 15
Source File: OLAPProjectRel.java    From kylin with Apache License 2.0 4 votes vote down vote up
/**
 * Change Array[String] to Array[Specific Type] for intersect_count
 * https://github.com/apache/kylin/pull/785
 */
private void rewriteProjectsForArrayDataType() {
    if (hasIntersect) {
        Set<TblColRef> tblColRefs = new HashSet<>(context.allColumns); // all column
        IRealization realization = context.realization;
        TblColRef groupBy = null;
        DataType groupByType = null;
        if (realization instanceof CubeInstance) {
            CubeDesc cubeDesc = ((CubeInstance) realization).getDescriptor();
            for (MeasureDesc measureDesc : cubeDesc.getMeasures()) {
                if (measureDesc.getFunction().getMeasureType() instanceof BitmapMeasureType) {
                    TblColRef col1 = measureDesc.getFunction().getParameter().getColRef();
                    tblColRefs.remove(col1); // Remove all column included in COUNT_DISTINCT
                    logger.trace("Remove {}", col1);
                }
            }
            // After remove all columns included in COUNT_DISTINCT, last one should be a group by column
            if (tblColRefs.size() == 1) {
                for (TblColRef colRef : tblColRefs) {
                    groupBy = colRef;
                    groupByType = groupBy.getType();
                    logger.trace("Group By Column in intersect_count should be {}.", groupBy);
                }
                // only auto change to date/timestamp type from string type
                if (groupByType != null && groupByType.isDateTimeFamily()) {
                    for (int i = 0; i < this.rewriteProjects.size(); i++) {
                        RexNode rex = this.rewriteProjects.get(i);
                        if (groupByType.isTimestamp()) {
                            rewriteProjectForIntersect(rex, SqlTypeName.TIMESTAMP, timestampType,
                                    timestampArrayType, i);
                        } else if (groupByType.isDate()) {
                            rewriteProjectForIntersect(rex, SqlTypeName.DATE, dateType, dateArrayType, i);
                        }
                    }
                }
            } else {
                logger.trace("After remove, {}.", tblColRefs.size());
            }
        }
    }
}
 
Example 16
Source File: CubeDescManager.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
/**
 * if there is some change need be applied after getting a cubeDesc from front-end, do it here
 * @param cubeDesc
 */
private void postProcessCubeDesc(CubeDesc cubeDesc) {
    for (MeasureDesc measureDesc : cubeDesc.getMeasures()) {
        if (TopNMeasureType.FUNC_TOP_N.equalsIgnoreCase(measureDesc.getFunction().getExpression())) {
            // update return type scale with the estimated key length
            Map<String, String> configuration = measureDesc.getFunction().getConfiguration();
            ParameterDesc parameter = measureDesc.getFunction().getParameter();
            parameter = parameter.getNextParameter();
            int keyLength = 0;
            while (parameter != null) {
                String encoding = configuration.get(TopNMeasureType.CONFIG_ENCODING_PREFIX + parameter.getValue());
                String encodingVersionStr = configuration
                        .get(TopNMeasureType.CONFIG_ENCODING_VERSION_PREFIX + parameter.getValue());
                if (StringUtils.isEmpty(encoding) || DictionaryDimEnc.ENCODING_NAME.equals(encoding)) {
                    keyLength += DictionaryDimEnc.MAX_ENCODING_LENGTH; // estimation for dict encoding
                } else if (encoding.startsWith("dict")) {
                    throw new IllegalArgumentException(
                            "TOP_N's Encoding is " + encoding + ", please choose the correct one");
                } else {
                    // non-dict encoding
                    int encodingVersion = 1;
                    if (!StringUtils.isEmpty(encodingVersionStr)) {
                        try {
                            encodingVersion = Integer.parseInt(encodingVersionStr);
                        } catch (NumberFormatException e) {
                            throw new RuntimeException("invalid encoding version: " + encodingVersionStr);
                        }
                    }
                    Object[] encodingConf = DimensionEncoding.parseEncodingConf(encoding);
                    DimensionEncoding dimensionEncoding = DimensionEncodingFactory.create((String) encodingConf[0],
                            (String[]) encodingConf[1], encodingVersion);
                    keyLength += dimensionEncoding.getLengthOfEncoding();
                }

                parameter = parameter.getNextParameter();
            }

            DataType returnType = DataType.getType(measureDesc.getFunction().getReturnType());
            DataType newReturnType = new DataType(returnType.getName(), returnType.getPrecision(), keyLength);
            measureDesc.getFunction().setReturnType(newReturnType.toString());
        }
    }
}
 
Example 17
Source File: SparkCubingByLayer.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected void execute(OptionsHelper optionsHelper) throws Exception {
    String metaUrl = optionsHelper.getOptionValue(OPTION_META_URL);
    String hiveTable = optionsHelper.getOptionValue(OPTION_INPUT_TABLE);
    String inputPath = optionsHelper.getOptionValue(OPTION_INPUT_PATH);
    String cubeName = optionsHelper.getOptionValue(OPTION_CUBE_NAME);
    String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID);
    String outputPath = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH);

    Class[] kryoClassArray = new Class[] { Class.forName("scala.reflect.ClassTag$$anon$1") };

    SparkConf conf = new SparkConf().setAppName("Cubing for:" + cubeName + " segment " + segmentId);
    //serialization conf
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    conf.set("spark.kryo.registrator", "org.apache.kylin.engine.spark.KylinKryoRegistrator");
    conf.set("spark.kryo.registrationRequired", "true").registerKryoClasses(kryoClassArray);

    KylinSparkJobListener jobListener = new KylinSparkJobListener();
    JavaSparkContext sc = new JavaSparkContext(conf);
    sc.sc().addSparkListener(jobListener);
    HadoopUtil.deletePath(sc.hadoopConfiguration(), new Path(outputPath));
    SparkUtil.modifySparkHadoopConfiguration(sc.sc(), AbstractHadoopJob.loadKylinConfigFromHdfs(new SerializableConfiguration(sc.hadoopConfiguration()), metaUrl)); // set dfs.replication and enable compress
    final SerializableConfiguration sConf = new SerializableConfiguration(sc.hadoopConfiguration());
    KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);

    final CubeInstance cubeInstance = CubeManager.getInstance(envConfig).getCube(cubeName);
    final CubeDesc cubeDesc = cubeInstance.getDescriptor();
    final CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);

    logger.info("RDD input path: {}", inputPath);
    logger.info("RDD Output path: {}", outputPath);

    final Job job = Job.getInstance(sConf.get());
    SparkUtil.setHadoopConfForCuboid(job, cubeSegment, metaUrl);

    int countMeasureIndex = 0;
    for (MeasureDesc measureDesc : cubeDesc.getMeasures()) {
        if (measureDesc.getFunction().isCount() == true) {
            break;
        } else {
            countMeasureIndex++;
        }
    }
    final CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, envConfig);
    boolean[] needAggr = new boolean[cubeDesc.getMeasures().size()];
    boolean allNormalMeasure = true;
    for (int i = 0; i < cubeDesc.getMeasures().size(); i++) {
        needAggr[i] = !cubeDesc.getMeasures().get(i).getFunction().getMeasureType().onlyAggrInBaseCuboid();
        allNormalMeasure = allNormalMeasure && needAggr[i];
    }
    logger.info("All measure are normal (agg on all cuboids) ? : " + allNormalMeasure);
    StorageLevel storageLevel = StorageLevel.fromString(envConfig.getSparkStorageLevel());

    boolean isSequenceFile = JoinedFlatTable.SEQUENCEFILE.equalsIgnoreCase(envConfig.getFlatTableStorageFormat());

    final JavaPairRDD<ByteArray, Object[]> encodedBaseRDD = SparkUtil
            .hiveRecordInputRDD(isSequenceFile, sc, inputPath, hiveTable)
            .mapToPair(new EncodeBaseCuboid(cubeName, segmentId, metaUrl, sConf));

    Long totalCount = 0L;
    if (envConfig.isSparkSanityCheckEnabled()) {
        totalCount = encodedBaseRDD.count();
    }

    final BaseCuboidReducerFunction2 baseCuboidReducerFunction = new BaseCuboidReducerFunction2(cubeName, metaUrl,
            sConf);
    BaseCuboidReducerFunction2 reducerFunction2 = baseCuboidReducerFunction;
    if (allNormalMeasure == false) {
        reducerFunction2 = new CuboidReducerFunction2(cubeName, metaUrl, sConf, needAggr);
    }

    final int totalLevels = cubeSegment.getCuboidScheduler().getBuildLevel();
    JavaPairRDD<ByteArray, Object[]>[] allRDDs = new JavaPairRDD[totalLevels + 1];
    int level = 0;
    int partition = SparkUtil.estimateLayerPartitionNum(level, cubeStatsReader, envConfig);

    // aggregate to calculate base cuboid
    allRDDs[0] = encodedBaseRDD.reduceByKey(baseCuboidReducerFunction, partition).persist(storageLevel);

    saveToHDFS(allRDDs[0], metaUrl, cubeName, cubeSegment, outputPath, 0, job, envConfig);

    PairFlatMapFunction flatMapFunction = new CuboidFlatMap(cubeName, segmentId, metaUrl, sConf);
    // aggregate to ND cuboids
    for (level = 1; level <= totalLevels; level++) {
        partition = SparkUtil.estimateLayerPartitionNum(level, cubeStatsReader, envConfig);

        allRDDs[level] = allRDDs[level - 1].flatMapToPair(flatMapFunction).reduceByKey(reducerFunction2, partition)
                .persist(storageLevel);
        allRDDs[level - 1].unpersist(false);
        if (envConfig.isSparkSanityCheckEnabled() == true) {
            sanityCheck(allRDDs[level], totalCount, level, cubeStatsReader, countMeasureIndex);
        }
        saveToHDFS(allRDDs[level], metaUrl, cubeName, cubeSegment, outputPath, level, job, envConfig);
    }
    allRDDs[totalLevels].unpersist(false);
    logger.info("Finished on calculating all level cuboids.");
    logger.info("HDFS: Number of bytes written=" + jobListener.metrics.getBytesWritten());
    //HadoopUtil.deleteHDFSMeta(metaUrl);
}
 
Example 18
Source File: FragmentFileSearcher.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
public void search(StreamingSearchContext searchContext, ResultCollector collector) throws IOException {
    String timezone = searchContext.getCubeDesc().getConfig().getStreamingDerivedTimeTimezone();
    long timezoneOffset = 0;
    if (timezone != null && timezone.length() > 0) {
        timezoneOffset = TimeZone.getTimeZone(timezone).getRawOffset();
    }
    FragmentMetaInfo fragmentMetaInfo = fragmentData.getFragmentMetaInfo();
    CuboidMetaInfo cuboidMetaInfo;
    if (searchContext.hitBasicCuboid()) {
        cuboidMetaInfo = fragmentMetaInfo.getBasicCuboidMetaInfo();
    } else {
        cuboidMetaInfo = fragmentMetaInfo.getCuboidMetaInfo(searchContext.getHitCuboid());
        if (cuboidMetaInfo == null) {
            logger.warn("the cuboid:{} is not exist in the fragment:{}, use basic cuboid instead",
                    searchContext.getHitCuboid(), fragment.getFragmentId());
            cuboidMetaInfo = fragmentMetaInfo.getBasicCuboidMetaInfo();
        }
    }

    ResponseResultSchema responseSchema = searchContext.getRespResultSchema();
    TblColRef[] dimensions = responseSchema.getDimensions();
    FunctionDesc[] metrics = responseSchema.getMetrics();
    Map<TblColRef, Dictionary<String>> dictMap = fragmentData.getDimensionDictionaries(dimensions);

    CubeDesc cubeDesc = responseSchema.getCubeDesc();
    List<MeasureDesc> allMeasures = cubeDesc.getMeasures();
    Map<FunctionDesc, MeasureDesc> funcMeasureMap = Maps.newHashMap();
    for (MeasureDesc measure : allMeasures) {
        funcMeasureMap.put(measure.getFunction(), measure);
    }
    MeasureDesc[] measures = new MeasureDesc[metrics.length];
    for (int i = 0; i < measures.length; i++) {
        measures[i] = funcMeasureMap.get(metrics[i]);
    }
    DimensionEncoding[] dimensionEncodings = ParsedStreamingCubeInfo.getDimensionEncodings(cubeDesc, dimensions,
            dictMap);
    ColumnarMetricsEncoding[] metricsEncodings = ParsedStreamingCubeInfo.getMetricsEncodings(measures);
    ColumnarRecordCodec recordCodec = new ColumnarRecordCodec(dimensionEncodings, metricsEncodings);

    // change the unEvaluable dimensions to groupBy
    Set<TblColRef> unEvaluateDims = Sets.newHashSet();
    TupleFilter fragmentFilter = null;
    if (searchContext.getFilter() != null) {
        fragmentFilter = convertFilter(fragmentMetaInfo, searchContext.getFilter(), recordCodec, dimensions,
                new CubeDimEncMap(cubeDesc, dictMap), unEvaluateDims, timezoneOffset);
    }
    if (ConstantTupleFilter.TRUE == fragmentFilter) {
        fragmentFilter = null;
    } else if (ConstantTupleFilter.FALSE == fragmentFilter) {
        collector.collectSearchResult(IStreamingSearchResult.EMPTY_RESULT);
    }
    Set<TblColRef> groups = searchContext.getGroups();
    if (!unEvaluateDims.isEmpty()) {
        searchContext.addNewGroups(unEvaluateDims);
        groups = Sets.union(groups, unEvaluateDims);
    }
    collector.collectSearchResult(new FragmentSearchResult(fragment, fragmentData, cuboidMetaInfo, responseSchema, fragmentFilter, groups, searchContext.getHavingFilter(),
            recordCodec));
}
 
Example 19
Source File: FunctionRule.java    From Kylin with Apache License 2.0 4 votes vote down vote up
@Override
public void validate(CubeDesc cube, ValidateContext context) {
    List<MeasureDesc> measures = cube.getMeasures();

    List<FunctionDesc> countFuncs = new ArrayList<FunctionDesc>();

    Iterator<MeasureDesc> it = measures.iterator();
    while (it.hasNext()) {
        MeasureDesc measure = it.next();
        FunctionDesc func = measure.getFunction();
        ParameterDesc parameter = func.getParameter();
        if (parameter == null) {
            context.addResult(ResultLevel.ERROR, "Must define parameter for function " + func.getExpression() + " in " + measure.getName());
            return;
        }

        String type = func.getParameter().getType();
        String value = func.getParameter().getValue();
        if (StringUtils.isEmpty(type)) {
            context.addResult(ResultLevel.ERROR, "Must define type for parameter type " + func.getExpression() + " in " + measure.getName());
            return;
        }
        if (StringUtils.isEmpty(value)) {
            context.addResult(ResultLevel.ERROR, "Must define type for parameter value " + func.getExpression() + " in " + measure.getName());
            return;
        }
        if (StringUtils.isEmpty(func.getReturnType())) {
            context.addResult(ResultLevel.ERROR, "Must define return type for function " + func.getExpression() + " in " + measure.getName());
            return;
        }

        if (StringUtils.equalsIgnoreCase(FunctionDesc.PARAMETER_TYPE_COLUMN, type)) {
            validateColumnParameter(context, cube, value);
        } else if (StringUtils.equals(FunctionDesc.PARAMTER_TYPE_CONSTANT, type)) {
            validateCostantParameter(context, cube, value);
        }
        validateReturnType(context, cube, func);

        if (func.isCount())
            countFuncs.add(func);
    }

    if (countFuncs.size() != 1) {
        context.addResult(ResultLevel.ERROR, "Must define one and only one count(1) function, but there are " + countFuncs.size() + " -- " + countFuncs);
    }
}
 
Example 20
Source File: FragmentFileSearcher.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
public void search(StreamingSearchContext searchContext, ResultCollector collector) throws IOException {
    String timezone = searchContext.getCubeDesc().getConfig().getStreamingDerivedTimeTimezone();
    long timezoneOffset = 0;
    if (timezone != null && timezone.length() > 0) {
        timezoneOffset = TimeZone.getTimeZone(timezone).getRawOffset();
    }
    FragmentMetaInfo fragmentMetaInfo = fragmentData.getFragmentMetaInfo();
    CuboidMetaInfo cuboidMetaInfo;
    if (searchContext.hitBasicCuboid()) {
        cuboidMetaInfo = fragmentMetaInfo.getBasicCuboidMetaInfo();
    } else {
        cuboidMetaInfo = fragmentMetaInfo.getCuboidMetaInfo(searchContext.getHitCuboid());
        if (cuboidMetaInfo == null) {
            logger.warn("the cuboid:{} is not exist in the fragment:{}, use basic cuboid instead",
                    searchContext.getHitCuboid(), fragment.getFragmentId());
            cuboidMetaInfo = fragmentMetaInfo.getBasicCuboidMetaInfo();
        }
    }

    ResponseResultSchema responseSchema = searchContext.getRespResultSchema();
    TblColRef[] dimensions = responseSchema.getDimensions();
    FunctionDesc[] metrics = responseSchema.getMetrics();
    Map<TblColRef, Dictionary<String>> dictMap = fragmentData.getDimensionDictionaries(dimensions);

    CubeDesc cubeDesc = responseSchema.getCubeDesc();
    List<MeasureDesc> allMeasures = cubeDesc.getMeasures();
    Map<FunctionDesc, MeasureDesc> funcMeasureMap = Maps.newHashMap();
    for (MeasureDesc measure : allMeasures) {
        funcMeasureMap.put(measure.getFunction(), measure);
    }
    MeasureDesc[] measures = new MeasureDesc[metrics.length];
    for (int i = 0; i < measures.length; i++) {
        measures[i] = funcMeasureMap.get(metrics[i]);
    }
    DimensionEncoding[] dimensionEncodings = ParsedStreamingCubeInfo.getDimensionEncodings(cubeDesc, dimensions,
            dictMap);
    ColumnarMetricsEncoding[] metricsEncodings = ParsedStreamingCubeInfo.getMetricsEncodings(measures);
    ColumnarRecordCodec recordCodec = new ColumnarRecordCodec(dimensionEncodings, metricsEncodings);

    // change the unEvaluable dimensions to groupBy
    Set<TblColRef> unEvaluateDims = Sets.newHashSet();
    TupleFilter fragmentFilter = null;
    if (searchContext.getFilter() != null) {
        fragmentFilter = convertFilter(fragmentMetaInfo, searchContext.getFilter(), recordCodec, dimensions,
                new CubeDimEncMap(cubeDesc, dictMap), unEvaluateDims, timezoneOffset);
    }
    if (ConstantTupleFilter.TRUE == fragmentFilter) {
        fragmentFilter = null;
    } else if (ConstantTupleFilter.FALSE == fragmentFilter) {
        collector.collectSearchResult(IStreamingSearchResult.EMPTY_RESULT);
    }
    Set<TblColRef> groups = searchContext.getGroups();
    if (!unEvaluateDims.isEmpty()) {
        searchContext.addNewGroups(unEvaluateDims);
        groups = Sets.union(groups, unEvaluateDims);
    }
    collector.collectSearchResult(new FragmentSearchResult(fragment, fragmentData, cuboidMetaInfo, responseSchema, fragmentFilter, groups, searchContext.getHavingFilter(),
            recordCodec));
}