Java Code Examples for org.apache.kylin.cube.model.CubeDesc#getMeasures()
The following examples show how to use
org.apache.kylin.cube.model.CubeDesc#getMeasures() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KeyValueCreator.java From kylin with Apache License 2.0 | 6 votes |
public KeyValueCreator(CubeDesc cubeDesc, HBaseColumnDesc colDesc) { cfBytes = Bytes.toBytes(colDesc.getColumnFamilyName()); qBytes = Bytes.toBytes(colDesc.getQualifier()); timestamp = 0; // use 0 for timestamp refIndex = colDesc.getMeasureIndex(); refMeasures = colDesc.getMeasures(); codec = new BufferedMeasureCodec(refMeasures); colValues = new Object[refMeasures.length]; isFullCopy = true; List<MeasureDesc> measures = cubeDesc.getMeasures(); for (int i = 0; i < measures.size(); i++) { if (refIndex.length <= i || refIndex[i] != i) isFullCopy = false; } }
Example 2
Source File: CubeHFileMapper.java From Kylin with Apache License 2.0 | 6 votes |
public KeyValueCreator(CubeDesc cubeDesc, HBaseColumnDesc colDesc) { cfBytes = Bytes.toBytes(colDesc.getColumnFamilyName()); qBytes = Bytes.toBytes(colDesc.getQualifier()); timestamp = System.currentTimeMillis(); List<MeasureDesc> measures = cubeDesc.getMeasures(); String[] measureNames = getMeasureNames(cubeDesc); String[] refs = colDesc.getMeasureRefs(); refIndex = new int[refs.length]; refMeasures = new MeasureDesc[refs.length]; for (int i = 0; i < refs.length; i++) { refIndex[i] = indexOf(measureNames, refs[i]); refMeasures[i] = measures.get(refIndex[i]); } codec = new MeasureCodec(refMeasures); colValues = new Object[refs.length]; isFullCopy = true; for (int i = 0; i < measures.size(); i++) { if (refIndex.length <= i || refIndex[i] != i) isFullCopy = false; } }
Example 3
Source File: InMemCuboidReducer.java From kylin with Apache License 2.0 | 6 votes |
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase(Locale.ROOT); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); List<MeasureDesc> measuresDescs = cubeDesc.getMeasures(); codec = new BufferedMeasureCodec(measuresDescs); aggs = new MeasureAggregators(measuresDescs); input = new Object[measuresDescs.size()]; result = new Object[measuresDescs.size()]; outputKey = new Text(); outputValue = new Text(); }
Example 4
Source File: CubeCapabilityChecker.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private static void tryCustomMeasureTypes(Collection<TblColRef> unmatchedDimensions, Collection<FunctionDesc> unmatchedAggregations, SQLDigest digest, CubeInstance cube, CapabilityResult result) { CubeDesc cubeDesc = cube.getDescriptor(); List<String> influencingMeasures = Lists.newArrayList(); for (MeasureDesc measure : cubeDesc.getMeasures()) { // if (unmatchedDimensions.isEmpty() && unmatchedAggregations.isEmpty()) // break; MeasureType<?> measureType = measure.getFunction().getMeasureType(); if (measureType instanceof BasicMeasureType) continue; CapabilityInfluence inf = measureType.influenceCapabilityCheck(unmatchedDimensions, unmatchedAggregations, digest, measure); if (inf != null) { result.influences.add(inf); influencingMeasures.add(measure.getName() + "@" + measureType.getClass()); } } if (influencingMeasures.size() != 0) logger.info("Cube {} CapabilityInfluences: {}", cube.getCanonicalName(), StringUtils.join(influencingMeasures, ",")); }
Example 5
Source File: KeyValueCreator.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public KeyValueCreator(CubeDesc cubeDesc, HBaseColumnDesc colDesc) { cfBytes = Bytes.toBytes(colDesc.getColumnFamilyName()); qBytes = Bytes.toBytes(colDesc.getQualifier()); timestamp = 0; // use 0 for timestamp refIndex = colDesc.getMeasureIndex(); refMeasures = colDesc.getMeasures(); codec = new BufferedMeasureCodec(refMeasures); colValues = new Object[refMeasures.length]; isFullCopy = true; List<MeasureDesc> measures = cubeDesc.getMeasures(); for (int i = 0; i < measures.size(); i++) { if (refIndex.length <= i || refIndex[i] != i) isFullCopy = false; } }
Example 6
Source File: DataController.java From kylin with Apache License 2.0 | 5 votes |
private FunctionDesc findAggrFuncFromCubeDesc(CubeDesc cubeDesc, FunctionDesc aggrFunc) { aggrFunc.init(cubeDesc.getModel()); for (MeasureDesc measure : cubeDesc.getMeasures()) { if (measure.getFunction().equals(aggrFunc)) return measure.getFunction(); } return aggrFunc; }
Example 7
Source File: CubeSizeEstimationCLI.java From Kylin with Apache License 2.0 | 5 votes |
private static int getMeasureSpace(CubeDesc cubeDesc) { int space = 0; for (MeasureDesc measureDesc : cubeDesc.getMeasures()) { DataType returnType = measureDesc.getFunction().getReturnDataType(); space += returnType.getSpaceEstimate(); } return space; }
Example 8
Source File: CubeReducerTest.java From kylin with Apache License 2.0 | 5 votes |
@Test public void testReducer() throws Exception { reduceDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, "test_kylin_cube_with_slr_ready"); CubeDesc cubeDesc = CubeManager.getInstance(getTestConfig()).getCube("test_kylin_cube_with_slr_ready").getDescriptor(); BufferedMeasureCodec codec = new BufferedMeasureCodec(cubeDesc.getMeasures()); Text key1 = new Text("72010ustech"); List<Text> values1 = new ArrayList<Text>(); values1.add(newValueText(codec, "15.09", "15.09", "15.09", 1, 100)); values1.add(newValueText(codec, "20.34", "20.34", "20.34", 1, 200)); values1.add(newValueText(codec, "10", "10", "10", 1, 300)); Text key2 = new Text("1tech"); List<Text> values2 = new ArrayList<Text>(); values2.add(newValueText(codec, "15.09", "15.09", "15.09", 1, 500)); values2.add(newValueText(codec, "20.34", "20.34", "20.34", 1, 1000)); Text key3 = new Text("0"); List<Text> values3 = new ArrayList<Text>(); values3.add(newValueText(codec, "146.52", "146.52", "146.52", 0, 0)); reduceDriver.withInput(key1, values1); reduceDriver.withInput(key2, values2); reduceDriver.withInput(key3, values3); List<Pair<Text, Text>> result = reduceDriver.run(); Pair<Text, Text> p1 = new Pair<Text, Text>(new Text("72010ustech"), newValueText(codec, "45.43", "10", "20.34", 3, 600)); Pair<Text, Text> p2 = new Pair<Text, Text>(new Text("1tech"), newValueText(codec, "35.43", "15.09", "20.34", 2, 1500)); Pair<Text, Text> p3 = new Pair<Text, Text>(new Text("0"), newValueText(codec, "146.52", "146.52", "146.52", 0, 0)); assertEquals(3, result.size()); assertTrue(result.contains(p1)); assertTrue(result.contains(p2)); assertTrue(result.contains(p3)); }
Example 9
Source File: RowValueDecoderTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@Test public void testDecode() throws Exception { CubeDesc cubeDesc = CubeManager.getInstance(getTestConfig()).getCube("test_kylin_cube_with_slr_ready").getDescriptor(); HBaseColumnDesc hbaseCol = cubeDesc.getHbaseMapping().getColumnFamily()[0].getColumns()[0]; BufferedMeasureCodec codec = new BufferedMeasureCodec(hbaseCol.getMeasures()); BigDecimal sum = new BigDecimal("333.1234567"); BigDecimal min = new BigDecimal("333.1111111"); BigDecimal max = new BigDecimal("333.1999999"); Long count = new Long(2); Long item_count = new Long(100); ByteBuffer buf = codec.encode(new Object[] { sum, min, max, count, item_count }); buf.flip(); byte[] valueBytes = new byte[buf.limit()]; System.arraycopy(buf.array(), 0, valueBytes, 0, buf.limit()); RowValueDecoder rowValueDecoder = new RowValueDecoder(hbaseCol); for (MeasureDesc measure : cubeDesc.getMeasures()) { FunctionDesc aggrFunc = measure.getFunction(); int index = hbaseCol.findMeasure(aggrFunc); rowValueDecoder.setProjectIndex(index); } rowValueDecoder.decodeAndConvertJavaObj(valueBytes); Object[] measureValues = rowValueDecoder.getValues(); //BigDecimal.ROUND_HALF_EVEN in BigDecimalSerializer assertEquals("[333.1235, 333.1111, 333.2000, 2, 100]", Arrays.toString(measureValues)); }
Example 10
Source File: RowValueDecoderTest.java From kylin with Apache License 2.0 | 5 votes |
@Test public void testDecode() throws Exception { CubeDesc cubeDesc = CubeManager.getInstance(getTestConfig()).getCube("test_kylin_cube_with_slr_ready").getDescriptor(); HBaseColumnDesc hbaseCol = cubeDesc.getHbaseMapping().getColumnFamily()[0].getColumns()[0]; BufferedMeasureCodec codec = new BufferedMeasureCodec(hbaseCol.getMeasures()); BigDecimal sum = new BigDecimal("333.1234567"); BigDecimal min = new BigDecimal("333.1111111"); BigDecimal max = new BigDecimal("333.1999999"); Long count = new Long(2); Long item_count = new Long(100); ByteBuffer buf = codec.encode(new Object[] { sum, min, max, count, item_count }); buf.flip(); byte[] valueBytes = new byte[buf.limit()]; System.arraycopy(buf.array(), 0, valueBytes, 0, buf.limit()); RowValueDecoder rowValueDecoder = new RowValueDecoder(hbaseCol); for (MeasureDesc measure : cubeDesc.getMeasures()) { FunctionDesc aggrFunc = measure.getFunction(); int index = hbaseCol.findMeasure(aggrFunc); rowValueDecoder.setProjectIndex(index); } rowValueDecoder.decodeAndConvertJavaObj(valueBytes); Object[] measureValues = rowValueDecoder.getValues(); //BigDecimal.ROUND_HALF_EVEN in BigDecimalSerializer assertEquals("[333.1235, 333.1111, 333.2000, 2, 100]", Arrays.toString(measureValues)); }
Example 11
Source File: CubeReducerTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@Test public void testReducer() throws Exception { reduceDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, "test_kylin_cube_with_slr_ready"); CubeDesc cubeDesc = CubeManager.getInstance(getTestConfig()).getCube("test_kylin_cube_with_slr_ready").getDescriptor(); BufferedMeasureCodec codec = new BufferedMeasureCodec(cubeDesc.getMeasures()); Text key1 = new Text("72010ustech"); List<Text> values1 = new ArrayList<Text>(); values1.add(newValueText(codec, "15.09", "15.09", "15.09", 1, 100)); values1.add(newValueText(codec, "20.34", "20.34", "20.34", 1, 200)); values1.add(newValueText(codec, "10", "10", "10", 1, 300)); Text key2 = new Text("1tech"); List<Text> values2 = new ArrayList<Text>(); values2.add(newValueText(codec, "15.09", "15.09", "15.09", 1, 500)); values2.add(newValueText(codec, "20.34", "20.34", "20.34", 1, 1000)); Text key3 = new Text("0"); List<Text> values3 = new ArrayList<Text>(); values3.add(newValueText(codec, "146.52", "146.52", "146.52", 0, 0)); reduceDriver.withInput(key1, values1); reduceDriver.withInput(key2, values2); reduceDriver.withInput(key3, values3); List<Pair<Text, Text>> result = reduceDriver.run(); Pair<Text, Text> p1 = new Pair<Text, Text>(new Text("72010ustech"), newValueText(codec, "45.43", "10", "20.34", 3, 600)); Pair<Text, Text> p2 = new Pair<Text, Text>(new Text("1tech"), newValueText(codec, "35.43", "15.09", "20.34", 2, 1500)); Pair<Text, Text> p3 = new Pair<Text, Text>(new Text("0"), newValueText(codec, "146.52", "146.52", "146.52", 0, 0)); assertEquals(3, result.size()); assertTrue(result.contains(p1)); assertTrue(result.contains(p2)); assertTrue(result.contains(p3)); }
Example 12
Source File: CubeController.java From kylin with Apache License 2.0 | 5 votes |
private void validateColumnFamily(CubeDesc cubeDesc) { Set<String> columnFamilyMetricsSet = Sets.newHashSet(); for (HBaseColumnFamilyDesc hBaseColumnFamilyDesc : cubeDesc.getHbaseMapping().getColumnFamily()) { for (HBaseColumnDesc hBaseColumnDesc : hBaseColumnFamilyDesc.getColumns()) { for (String columnName : hBaseColumnDesc.getMeasureRefs()) { columnFamilyMetricsSet.add(columnName); } } } for (MeasureDesc measureDesc : cubeDesc.getMeasures()) { if (!columnFamilyMetricsSet.contains(measureDesc.getName())) { throw new BadRequestException("column family lack measure:" + measureDesc.getName()); } } if (cubeDesc.getMeasures().size() != columnFamilyMetricsSet.size()) { throw new BadRequestException( "the number of input measure and the number of measure defined in cubedesc are not consistent"); } for (RowKeyColDesc rowKeyColDesc : cubeDesc.getRowkey().getRowKeyColumns()) { Object[] encodingConf = DimensionEncoding.parseEncodingConf(rowKeyColDesc.getEncoding()); String encodingName = (String) encodingConf[0]; String[] encodingArgs = (String[]) encodingConf[1]; if (!DimensionEncodingFactory.isValidEncoding(encodingName, encodingArgs, rowKeyColDesc.getEncodingVersion())) { throw new BadRequestException("Illegal row key column desc: " + rowKeyColDesc); } } }
Example 13
Source File: DataController.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private FunctionDesc findAggrFuncFromCubeDesc(CubeDesc cubeDesc, FunctionDesc aggrFunc) { aggrFunc.init(cubeDesc.getModel()); for (MeasureDesc measure : cubeDesc.getMeasures()) { if (measure.getFunction().equals(aggrFunc)) return measure.getFunction(); } return aggrFunc; }
Example 14
Source File: FlinkCubingByLayer.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override protected void execute(OptionsHelper optionsHelper) throws Exception { String metaUrl = optionsHelper.getOptionValue(OPTION_META_URL); String hiveTable = optionsHelper.getOptionValue(OPTION_INPUT_TABLE); String inputPath = optionsHelper.getOptionValue(OPTION_INPUT_PATH); String cubeName = optionsHelper.getOptionValue(OPTION_CUBE_NAME); String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID); String outputPath = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH); String enableObjectReuseOptValue = optionsHelper.getOptionValue(OPTION_ENABLE_OBJECT_REUSE); boolean enableObjectReuse = false; if (enableObjectReuseOptValue != null && !enableObjectReuseOptValue.isEmpty()) { enableObjectReuse = true; } Job job = Job.getInstance(); FileSystem fs = HadoopUtil.getWorkingFileSystem(); HadoopUtil.deletePath(job.getConfiguration(), new Path(outputPath)); final SerializableConfiguration sConf = new SerializableConfiguration(job.getConfiguration()); KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl); final CubeInstance cubeInstance = CubeManager.getInstance(envConfig).getCube(cubeName); final CubeDesc cubeDesc = cubeInstance.getDescriptor(); final CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId); logger.info("DataSet input path : {}", inputPath); logger.info("DataSet output path : {}", outputPath); int countMeasureIndex = 0; for (MeasureDesc measureDesc : cubeDesc.getMeasures()) { if (measureDesc.getFunction().isCount() == true) { break; } else { countMeasureIndex++; } } final CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, envConfig); boolean[] needAggr = new boolean[cubeDesc.getMeasures().size()]; boolean allNormalMeasure = true; for (int i = 0; i < cubeDesc.getMeasures().size(); i++) { needAggr[i] = !cubeDesc.getMeasures().get(i).getFunction().getMeasureType().onlyAggrInBaseCuboid(); allNormalMeasure = allNormalMeasure && needAggr[i]; } logger.info("All measure are normal (agg on all cuboids) ? : " + allNormalMeasure); boolean isSequenceFile = JoinedFlatTable.SEQUENCEFILE.equalsIgnoreCase(envConfig.getFlatTableStorageFormat()); ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); if (enableObjectReuse) { env.getConfig().enableObjectReuse(); } env.getConfig().registerKryoType(PercentileCounter.class); env.getConfig().registerTypeWithKryoSerializer(PercentileCounter.class, PercentileCounterSerializer.class); DataSet<String[]> hiveDataSet = FlinkUtil.readHiveRecords(isSequenceFile, env, inputPath, hiveTable, job); DataSet<Tuple2<ByteArray, Object[]>> encodedBaseDataSet = hiveDataSet.mapPartition( new EncodeBaseCuboidMapPartitionFunction(cubeName, segmentId, metaUrl, sConf)); Long totalCount = 0L; if (envConfig.isFlinkSanityCheckEnabled()) { totalCount = encodedBaseDataSet.count(); } final BaseCuboidReduceGroupFunction baseCuboidReducerFunction = new BaseCuboidReduceGroupFunction(cubeName, metaUrl, sConf); BaseCuboidReduceGroupFunction reducerFunction = baseCuboidReducerFunction; if (!allNormalMeasure) { reducerFunction = new CuboidReduceGroupFunction(cubeName, metaUrl, sConf, needAggr); } final int totalLevels = cubeSegment.getCuboidScheduler().getBuildLevel(); DataSet<Tuple2<ByteArray, Object[]>>[] allDataSets = new DataSet[totalLevels + 1]; int level = 0; // aggregate to calculate base cuboid allDataSets[0] = encodedBaseDataSet.groupBy(0).reduceGroup(baseCuboidReducerFunction); sinkToHDFS(allDataSets[0], metaUrl, cubeName, cubeSegment, outputPath, 0, Job.getInstance(), envConfig); CuboidMapPartitionFunction mapPartitionFunction = new CuboidMapPartitionFunction(cubeName, segmentId, metaUrl, sConf); for (level = 1; level <= totalLevels; level++) { allDataSets[level] = allDataSets[level - 1].mapPartition(mapPartitionFunction).groupBy(0).reduceGroup(reducerFunction); if (envConfig.isFlinkSanityCheckEnabled()) { sanityCheck(allDataSets[level], totalCount, level, cubeStatsReader, countMeasureIndex); } sinkToHDFS(allDataSets[level], metaUrl, cubeName, cubeSegment, outputPath, level, Job.getInstance(), envConfig); } env.execute("Cubing for : " + cubeName + " segment " + segmentId); logger.info("Finished on calculating all level cuboids."); logger.info("HDFS: Number of bytes written=" + FlinkBatchCubingJobBuilder2.getFileSize(outputPath, fs)); }
Example 15
Source File: OLAPProjectRel.java From kylin with Apache License 2.0 | 4 votes |
/** * Change Array[String] to Array[Specific Type] for intersect_count * https://github.com/apache/kylin/pull/785 */ private void rewriteProjectsForArrayDataType() { if (hasIntersect) { Set<TblColRef> tblColRefs = new HashSet<>(context.allColumns); // all column IRealization realization = context.realization; TblColRef groupBy = null; DataType groupByType = null; if (realization instanceof CubeInstance) { CubeDesc cubeDesc = ((CubeInstance) realization).getDescriptor(); for (MeasureDesc measureDesc : cubeDesc.getMeasures()) { if (measureDesc.getFunction().getMeasureType() instanceof BitmapMeasureType) { TblColRef col1 = measureDesc.getFunction().getParameter().getColRef(); tblColRefs.remove(col1); // Remove all column included in COUNT_DISTINCT logger.trace("Remove {}", col1); } } // After remove all columns included in COUNT_DISTINCT, last one should be a group by column if (tblColRefs.size() == 1) { for (TblColRef colRef : tblColRefs) { groupBy = colRef; groupByType = groupBy.getType(); logger.trace("Group By Column in intersect_count should be {}.", groupBy); } // only auto change to date/timestamp type from string type if (groupByType != null && groupByType.isDateTimeFamily()) { for (int i = 0; i < this.rewriteProjects.size(); i++) { RexNode rex = this.rewriteProjects.get(i); if (groupByType.isTimestamp()) { rewriteProjectForIntersect(rex, SqlTypeName.TIMESTAMP, timestampType, timestampArrayType, i); } else if (groupByType.isDate()) { rewriteProjectForIntersect(rex, SqlTypeName.DATE, dateType, dateArrayType, i); } } } } else { logger.trace("After remove, {}.", tblColRefs.size()); } } } }
Example 16
Source File: CubeDescManager.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
/** * if there is some change need be applied after getting a cubeDesc from front-end, do it here * @param cubeDesc */ private void postProcessCubeDesc(CubeDesc cubeDesc) { for (MeasureDesc measureDesc : cubeDesc.getMeasures()) { if (TopNMeasureType.FUNC_TOP_N.equalsIgnoreCase(measureDesc.getFunction().getExpression())) { // update return type scale with the estimated key length Map<String, String> configuration = measureDesc.getFunction().getConfiguration(); ParameterDesc parameter = measureDesc.getFunction().getParameter(); parameter = parameter.getNextParameter(); int keyLength = 0; while (parameter != null) { String encoding = configuration.get(TopNMeasureType.CONFIG_ENCODING_PREFIX + parameter.getValue()); String encodingVersionStr = configuration .get(TopNMeasureType.CONFIG_ENCODING_VERSION_PREFIX + parameter.getValue()); if (StringUtils.isEmpty(encoding) || DictionaryDimEnc.ENCODING_NAME.equals(encoding)) { keyLength += DictionaryDimEnc.MAX_ENCODING_LENGTH; // estimation for dict encoding } else if (encoding.startsWith("dict")) { throw new IllegalArgumentException( "TOP_N's Encoding is " + encoding + ", please choose the correct one"); } else { // non-dict encoding int encodingVersion = 1; if (!StringUtils.isEmpty(encodingVersionStr)) { try { encodingVersion = Integer.parseInt(encodingVersionStr); } catch (NumberFormatException e) { throw new RuntimeException("invalid encoding version: " + encodingVersionStr); } } Object[] encodingConf = DimensionEncoding.parseEncodingConf(encoding); DimensionEncoding dimensionEncoding = DimensionEncodingFactory.create((String) encodingConf[0], (String[]) encodingConf[1], encodingVersion); keyLength += dimensionEncoding.getLengthOfEncoding(); } parameter = parameter.getNextParameter(); } DataType returnType = DataType.getType(measureDesc.getFunction().getReturnType()); DataType newReturnType = new DataType(returnType.getName(), returnType.getPrecision(), keyLength); measureDesc.getFunction().setReturnType(newReturnType.toString()); } } }
Example 17
Source File: SparkCubingByLayer.java From kylin with Apache License 2.0 | 4 votes |
@Override protected void execute(OptionsHelper optionsHelper) throws Exception { String metaUrl = optionsHelper.getOptionValue(OPTION_META_URL); String hiveTable = optionsHelper.getOptionValue(OPTION_INPUT_TABLE); String inputPath = optionsHelper.getOptionValue(OPTION_INPUT_PATH); String cubeName = optionsHelper.getOptionValue(OPTION_CUBE_NAME); String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID); String outputPath = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH); Class[] kryoClassArray = new Class[] { Class.forName("scala.reflect.ClassTag$$anon$1") }; SparkConf conf = new SparkConf().setAppName("Cubing for:" + cubeName + " segment " + segmentId); //serialization conf conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.set("spark.kryo.registrator", "org.apache.kylin.engine.spark.KylinKryoRegistrator"); conf.set("spark.kryo.registrationRequired", "true").registerKryoClasses(kryoClassArray); KylinSparkJobListener jobListener = new KylinSparkJobListener(); JavaSparkContext sc = new JavaSparkContext(conf); sc.sc().addSparkListener(jobListener); HadoopUtil.deletePath(sc.hadoopConfiguration(), new Path(outputPath)); SparkUtil.modifySparkHadoopConfiguration(sc.sc(), AbstractHadoopJob.loadKylinConfigFromHdfs(new SerializableConfiguration(sc.hadoopConfiguration()), metaUrl)); // set dfs.replication and enable compress final SerializableConfiguration sConf = new SerializableConfiguration(sc.hadoopConfiguration()); KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl); final CubeInstance cubeInstance = CubeManager.getInstance(envConfig).getCube(cubeName); final CubeDesc cubeDesc = cubeInstance.getDescriptor(); final CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId); logger.info("RDD input path: {}", inputPath); logger.info("RDD Output path: {}", outputPath); final Job job = Job.getInstance(sConf.get()); SparkUtil.setHadoopConfForCuboid(job, cubeSegment, metaUrl); int countMeasureIndex = 0; for (MeasureDesc measureDesc : cubeDesc.getMeasures()) { if (measureDesc.getFunction().isCount() == true) { break; } else { countMeasureIndex++; } } final CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, envConfig); boolean[] needAggr = new boolean[cubeDesc.getMeasures().size()]; boolean allNormalMeasure = true; for (int i = 0; i < cubeDesc.getMeasures().size(); i++) { needAggr[i] = !cubeDesc.getMeasures().get(i).getFunction().getMeasureType().onlyAggrInBaseCuboid(); allNormalMeasure = allNormalMeasure && needAggr[i]; } logger.info("All measure are normal (agg on all cuboids) ? : " + allNormalMeasure); StorageLevel storageLevel = StorageLevel.fromString(envConfig.getSparkStorageLevel()); boolean isSequenceFile = JoinedFlatTable.SEQUENCEFILE.equalsIgnoreCase(envConfig.getFlatTableStorageFormat()); final JavaPairRDD<ByteArray, Object[]> encodedBaseRDD = SparkUtil .hiveRecordInputRDD(isSequenceFile, sc, inputPath, hiveTable) .mapToPair(new EncodeBaseCuboid(cubeName, segmentId, metaUrl, sConf)); Long totalCount = 0L; if (envConfig.isSparkSanityCheckEnabled()) { totalCount = encodedBaseRDD.count(); } final BaseCuboidReducerFunction2 baseCuboidReducerFunction = new BaseCuboidReducerFunction2(cubeName, metaUrl, sConf); BaseCuboidReducerFunction2 reducerFunction2 = baseCuboidReducerFunction; if (allNormalMeasure == false) { reducerFunction2 = new CuboidReducerFunction2(cubeName, metaUrl, sConf, needAggr); } final int totalLevels = cubeSegment.getCuboidScheduler().getBuildLevel(); JavaPairRDD<ByteArray, Object[]>[] allRDDs = new JavaPairRDD[totalLevels + 1]; int level = 0; int partition = SparkUtil.estimateLayerPartitionNum(level, cubeStatsReader, envConfig); // aggregate to calculate base cuboid allRDDs[0] = encodedBaseRDD.reduceByKey(baseCuboidReducerFunction, partition).persist(storageLevel); saveToHDFS(allRDDs[0], metaUrl, cubeName, cubeSegment, outputPath, 0, job, envConfig); PairFlatMapFunction flatMapFunction = new CuboidFlatMap(cubeName, segmentId, metaUrl, sConf); // aggregate to ND cuboids for (level = 1; level <= totalLevels; level++) { partition = SparkUtil.estimateLayerPartitionNum(level, cubeStatsReader, envConfig); allRDDs[level] = allRDDs[level - 1].flatMapToPair(flatMapFunction).reduceByKey(reducerFunction2, partition) .persist(storageLevel); allRDDs[level - 1].unpersist(false); if (envConfig.isSparkSanityCheckEnabled() == true) { sanityCheck(allRDDs[level], totalCount, level, cubeStatsReader, countMeasureIndex); } saveToHDFS(allRDDs[level], metaUrl, cubeName, cubeSegment, outputPath, level, job, envConfig); } allRDDs[totalLevels].unpersist(false); logger.info("Finished on calculating all level cuboids."); logger.info("HDFS: Number of bytes written=" + jobListener.metrics.getBytesWritten()); //HadoopUtil.deleteHDFSMeta(metaUrl); }
Example 18
Source File: FragmentFileSearcher.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override public void search(StreamingSearchContext searchContext, ResultCollector collector) throws IOException { String timezone = searchContext.getCubeDesc().getConfig().getStreamingDerivedTimeTimezone(); long timezoneOffset = 0; if (timezone != null && timezone.length() > 0) { timezoneOffset = TimeZone.getTimeZone(timezone).getRawOffset(); } FragmentMetaInfo fragmentMetaInfo = fragmentData.getFragmentMetaInfo(); CuboidMetaInfo cuboidMetaInfo; if (searchContext.hitBasicCuboid()) { cuboidMetaInfo = fragmentMetaInfo.getBasicCuboidMetaInfo(); } else { cuboidMetaInfo = fragmentMetaInfo.getCuboidMetaInfo(searchContext.getHitCuboid()); if (cuboidMetaInfo == null) { logger.warn("the cuboid:{} is not exist in the fragment:{}, use basic cuboid instead", searchContext.getHitCuboid(), fragment.getFragmentId()); cuboidMetaInfo = fragmentMetaInfo.getBasicCuboidMetaInfo(); } } ResponseResultSchema responseSchema = searchContext.getRespResultSchema(); TblColRef[] dimensions = responseSchema.getDimensions(); FunctionDesc[] metrics = responseSchema.getMetrics(); Map<TblColRef, Dictionary<String>> dictMap = fragmentData.getDimensionDictionaries(dimensions); CubeDesc cubeDesc = responseSchema.getCubeDesc(); List<MeasureDesc> allMeasures = cubeDesc.getMeasures(); Map<FunctionDesc, MeasureDesc> funcMeasureMap = Maps.newHashMap(); for (MeasureDesc measure : allMeasures) { funcMeasureMap.put(measure.getFunction(), measure); } MeasureDesc[] measures = new MeasureDesc[metrics.length]; for (int i = 0; i < measures.length; i++) { measures[i] = funcMeasureMap.get(metrics[i]); } DimensionEncoding[] dimensionEncodings = ParsedStreamingCubeInfo.getDimensionEncodings(cubeDesc, dimensions, dictMap); ColumnarMetricsEncoding[] metricsEncodings = ParsedStreamingCubeInfo.getMetricsEncodings(measures); ColumnarRecordCodec recordCodec = new ColumnarRecordCodec(dimensionEncodings, metricsEncodings); // change the unEvaluable dimensions to groupBy Set<TblColRef> unEvaluateDims = Sets.newHashSet(); TupleFilter fragmentFilter = null; if (searchContext.getFilter() != null) { fragmentFilter = convertFilter(fragmentMetaInfo, searchContext.getFilter(), recordCodec, dimensions, new CubeDimEncMap(cubeDesc, dictMap), unEvaluateDims, timezoneOffset); } if (ConstantTupleFilter.TRUE == fragmentFilter) { fragmentFilter = null; } else if (ConstantTupleFilter.FALSE == fragmentFilter) { collector.collectSearchResult(IStreamingSearchResult.EMPTY_RESULT); } Set<TblColRef> groups = searchContext.getGroups(); if (!unEvaluateDims.isEmpty()) { searchContext.addNewGroups(unEvaluateDims); groups = Sets.union(groups, unEvaluateDims); } collector.collectSearchResult(new FragmentSearchResult(fragment, fragmentData, cuboidMetaInfo, responseSchema, fragmentFilter, groups, searchContext.getHavingFilter(), recordCodec)); }
Example 19
Source File: FunctionRule.java From Kylin with Apache License 2.0 | 4 votes |
@Override public void validate(CubeDesc cube, ValidateContext context) { List<MeasureDesc> measures = cube.getMeasures(); List<FunctionDesc> countFuncs = new ArrayList<FunctionDesc>(); Iterator<MeasureDesc> it = measures.iterator(); while (it.hasNext()) { MeasureDesc measure = it.next(); FunctionDesc func = measure.getFunction(); ParameterDesc parameter = func.getParameter(); if (parameter == null) { context.addResult(ResultLevel.ERROR, "Must define parameter for function " + func.getExpression() + " in " + measure.getName()); return; } String type = func.getParameter().getType(); String value = func.getParameter().getValue(); if (StringUtils.isEmpty(type)) { context.addResult(ResultLevel.ERROR, "Must define type for parameter type " + func.getExpression() + " in " + measure.getName()); return; } if (StringUtils.isEmpty(value)) { context.addResult(ResultLevel.ERROR, "Must define type for parameter value " + func.getExpression() + " in " + measure.getName()); return; } if (StringUtils.isEmpty(func.getReturnType())) { context.addResult(ResultLevel.ERROR, "Must define return type for function " + func.getExpression() + " in " + measure.getName()); return; } if (StringUtils.equalsIgnoreCase(FunctionDesc.PARAMETER_TYPE_COLUMN, type)) { validateColumnParameter(context, cube, value); } else if (StringUtils.equals(FunctionDesc.PARAMTER_TYPE_CONSTANT, type)) { validateCostantParameter(context, cube, value); } validateReturnType(context, cube, func); if (func.isCount()) countFuncs.add(func); } if (countFuncs.size() != 1) { context.addResult(ResultLevel.ERROR, "Must define one and only one count(1) function, but there are " + countFuncs.size() + " -- " + countFuncs); } }
Example 20
Source File: FragmentFileSearcher.java From kylin with Apache License 2.0 | 4 votes |
@Override public void search(StreamingSearchContext searchContext, ResultCollector collector) throws IOException { String timezone = searchContext.getCubeDesc().getConfig().getStreamingDerivedTimeTimezone(); long timezoneOffset = 0; if (timezone != null && timezone.length() > 0) { timezoneOffset = TimeZone.getTimeZone(timezone).getRawOffset(); } FragmentMetaInfo fragmentMetaInfo = fragmentData.getFragmentMetaInfo(); CuboidMetaInfo cuboidMetaInfo; if (searchContext.hitBasicCuboid()) { cuboidMetaInfo = fragmentMetaInfo.getBasicCuboidMetaInfo(); } else { cuboidMetaInfo = fragmentMetaInfo.getCuboidMetaInfo(searchContext.getHitCuboid()); if (cuboidMetaInfo == null) { logger.warn("the cuboid:{} is not exist in the fragment:{}, use basic cuboid instead", searchContext.getHitCuboid(), fragment.getFragmentId()); cuboidMetaInfo = fragmentMetaInfo.getBasicCuboidMetaInfo(); } } ResponseResultSchema responseSchema = searchContext.getRespResultSchema(); TblColRef[] dimensions = responseSchema.getDimensions(); FunctionDesc[] metrics = responseSchema.getMetrics(); Map<TblColRef, Dictionary<String>> dictMap = fragmentData.getDimensionDictionaries(dimensions); CubeDesc cubeDesc = responseSchema.getCubeDesc(); List<MeasureDesc> allMeasures = cubeDesc.getMeasures(); Map<FunctionDesc, MeasureDesc> funcMeasureMap = Maps.newHashMap(); for (MeasureDesc measure : allMeasures) { funcMeasureMap.put(measure.getFunction(), measure); } MeasureDesc[] measures = new MeasureDesc[metrics.length]; for (int i = 0; i < measures.length; i++) { measures[i] = funcMeasureMap.get(metrics[i]); } DimensionEncoding[] dimensionEncodings = ParsedStreamingCubeInfo.getDimensionEncodings(cubeDesc, dimensions, dictMap); ColumnarMetricsEncoding[] metricsEncodings = ParsedStreamingCubeInfo.getMetricsEncodings(measures); ColumnarRecordCodec recordCodec = new ColumnarRecordCodec(dimensionEncodings, metricsEncodings); // change the unEvaluable dimensions to groupBy Set<TblColRef> unEvaluateDims = Sets.newHashSet(); TupleFilter fragmentFilter = null; if (searchContext.getFilter() != null) { fragmentFilter = convertFilter(fragmentMetaInfo, searchContext.getFilter(), recordCodec, dimensions, new CubeDimEncMap(cubeDesc, dictMap), unEvaluateDims, timezoneOffset); } if (ConstantTupleFilter.TRUE == fragmentFilter) { fragmentFilter = null; } else if (ConstantTupleFilter.FALSE == fragmentFilter) { collector.collectSearchResult(IStreamingSearchResult.EMPTY_RESULT); } Set<TblColRef> groups = searchContext.getGroups(); if (!unEvaluateDims.isEmpty()) { searchContext.addNewGroups(unEvaluateDims); groups = Sets.union(groups, unEvaluateDims); } collector.collectSearchResult(new FragmentSearchResult(fragment, fragmentData, cuboidMetaInfo, responseSchema, fragmentFilter, groups, searchContext.getHavingFilter(), recordCodec)); }