org.apache.kylin.job.JoinedFlatTable Java Examples

The following examples show how to use org.apache.kylin.job.JoinedFlatTable. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SparkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    sparkExecutable.setClassName(SparkFactDistinct.class.getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
    sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString());
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example #2
Source File: QueryGenerator.java    From kylin with Apache License 2.0 6 votes vote down vote up
public static String generateQuery(CubeDesc cubeDesc, Set<BitSet> selected, int maxNumOfDimension) {
    IJoinedFlatTableDesc flatDesc = new CubeJoinedFlatTableDesc(cubeDesc);

    String dimensionStatement = createDimensionStatement(cubeDesc.getDimensions(), selected, maxNumOfDimension);
    String measureStatement = createMeasureStatement(cubeDesc.getMeasures());

    StringBuilder sql = new StringBuilder();
    sql.append("SELECT" + "\n");
    sql.append(dimensionStatement);
    sql.append(measureStatement);

    StringBuilder joinPart = new StringBuilder();
    JoinedFlatTable.appendJoinStatement(flatDesc, joinPart, false, null);
    sql.append(joinPart.toString().replaceAll("DEFAULT\\.", ""));

    sql.append("GROUP BY" + "\n");
    sql.append(dimensionStatement);
    String ret = sql.toString();
    ret = ret.replaceAll("`", "\"");
    return ret;
}
 
Example #3
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    sparkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
 
Example #4
Source File: FlinkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
Example #5
Source File: FlinkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
Example #6
Source File: CubeController.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
/**
 * Get SQL of a Cube segment
 *
 * @param cubeName    Cube Name
 * @param segmentName Segment Name
 * @return
 * @throws IOException
 */
@RequestMapping(value = "/{cubeName}/segs/{segmentName}/sql", method = { RequestMethod.GET }, produces = {
        "application/json" })
@ResponseBody
public GeneralResponse getSql(@PathVariable String cubeName, @PathVariable String segmentName) {

    checkCubeExists(cubeName);
    CubeInstance cube = cubeService.getCubeManager().getCube(cubeName);

    CubeSegment segment = cube.getSegment(segmentName, null);
    if (segment == null) {
        throw new NotFoundException("Cannot find segment " + segmentName);
    }

    IJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(segment, true);
    String sql = JoinedFlatTable.generateSelectDataStatement(flatTableDesc);

    GeneralResponse response = new GeneralResponse();
    response.setProperty("sql", sql);

    return response;
}
 
Example #7
Source File: BaseCuboidBuilder.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void checkHiveGlobalDictionaryColumn(){
    Set<String> mrDictColumnSet = new HashSet<>();
    if (kylinConfig.getMrHiveDictColumns() != null) {
        Collections.addAll(mrDictColumnSet, kylinConfig.getMrHiveDictColumns());
    }

    for (MeasureDesc measure : measureDescList) {
        if (measure.getFunction().getExpression().equalsIgnoreCase(FunctionDesc.FUNC_COUNT_DISTINCT)) {
            FunctionDesc functionDesc = measure.getFunction();
            TblColRef colRef = functionDesc.getParameter().getColRefs().get(0);
            if (mrDictColumnSet.contains(JoinedFlatTable.colName(colRef, true))) {
                functionDesc.setMrDict(true);
                logger.info("Enable hive global dictionary for {}", colRef);
                measure.setFunction(functionDesc);
            }
        }
    }
}
 
Example #8
Source File: FlinkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public FlinkExecutable createFactDistinctColumnsFlinkStep(String jobId) {
    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    flinkExecutable.setClassName(FlinkFactDistinctColumns.class.getName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_PATH.getOpt(), tablePath);
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    flinkExecutable.setJobId(jobId);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS);
    flinkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());

    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
Example #9
Source File: HiveInputBase.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Override
public void addStepPhase_ReplaceFlatTableGlobalColumnValue(DefaultChainedExecutable jobFlow) {
    KylinConfig dictConfig = flatDesc.getSegment().getConfig();
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    String globalDictTable = MRHiveDictUtil.globalDictTableName(flatDesc, cubeName);
    String globalDictDatabase = dictConfig.getMrHiveDictDB();

    String[] mrHiveDictColumnsExcludeRefCols = dictConfig.getMrHiveDictColumnsExcludeRefColumns();
    Map<String, String> dictRef = dictConfig.getMrHiveDictRefColumns();
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);

    if (Objects.nonNull(mrHiveDictColumnsExcludeRefCols) && mrHiveDictColumnsExcludeRefCols.length > 0) {
        jobFlow.addTask(createHiveGlobalDictMergeGlobalDict(flatDesc, hiveInitStatements, cubeName, mrHiveDictColumnsExcludeRefCols, globalDictDatabase, globalDictTable));
        for (String item : mrHiveDictColumnsExcludeRefCols) {
            dictRef.put(item, "");
        }
    }

    // replace step
    if (!dictRef.isEmpty()) {
        jobFlow.addTask(createMrHiveGlobalDictReplaceStep(flatDesc, hiveInitStatements, cubeName,
                dictRef, flatTableDatabase, globalDictDatabase, globalDictTable, dictConfig.getMrHiveDictTableSuffix(), jobFlow.getId()));
    }
}
 
Example #10
Source File: SparkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
                              final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    sparkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
 
Example #11
Source File: BaseCuboidBuilder.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void checkMrDictClolumn(){
    Set<String> mrDictColumnSet = new HashSet<>();
    if (kylinConfig.getMrHiveDictColumns() != null) {
        Collections.addAll(mrDictColumnSet, kylinConfig.getMrHiveDictColumns());
    }

    for (MeasureDesc measure : measureDescList) {
        if (measure.getFunction().getExpression().equalsIgnoreCase(FunctionDesc.FUNC_COUNT_DISTINCT)) {
            FunctionDesc functionDesc = measure.getFunction();
            TblColRef colRef = functionDesc.getParameter().getColRefs().get(0);
            if (mrDictColumnSet.contains(JoinedFlatTable.colName(colRef, true))) {
                functionDesc.setMrDict(true);
                logger.info("setMrDict for {}", colRef);
                measure.setFunction(functionDesc);
            }
        }
    }
}
 
Example #12
Source File: CubeController.java    From kylin with Apache License 2.0 6 votes vote down vote up
/**
 * Get SQL of a Cube segment
 *
 * @param cubeName    Cube Name
 * @param segmentName Segment Name
 * @return
 * @throws IOException
 */
@RequestMapping(value = "/{cubeName}/segs/{segmentName}/sql", method = { RequestMethod.GET }, produces = {
        "application/json" })
@ResponseBody
public GeneralResponse getSql(@PathVariable String cubeName, @PathVariable String segmentName) {

    checkCubeExists(cubeName);
    CubeInstance cube = cubeService.getCubeManager().getCube(cubeName);

    CubeSegment segment = cube.getSegment(segmentName, null);
    if (segment == null) {
        throw new NotFoundException("Cannot find segment " + segmentName);
    }

    IJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(segment, true);
    String sql = JoinedFlatTable.generateSelectDataStatement(flatTableDesc);

    GeneralResponse response = new GeneralResponse();
    response.setProperty("sql", sql);

    return response;
}
 
Example #13
Source File: HiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
public void addStepPhase4_Cleanup(DefaultChainedExecutable jobFlow) {
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    org.apache.kylin.source.hive.GarbageCollectionStep step = new org.apache.kylin.source.hive.GarbageCollectionStep();
    step.setName(ExecutableConstants.STEP_NAME_HIVE_CLEANUP);

    List<String> deleteTables = new ArrayList<>();
    deleteTables.add(getIntermediateTableIdentity());

    // mr-hive dict and inner table do not need delete hdfs
    String[] mrHiveDicts = flatDesc.getSegment().getConfig().getMrHiveDictColumns();
    if (Objects.nonNull(mrHiveDicts) && mrHiveDicts.length > 0) {
        String dictDb = flatDesc.getSegment().getConfig().getMrHiveDictDB();
        String tableName = dictDb + "." + flatDesc.getTableName() + "_"
                + MRHiveDictUtil.DictHiveType.GroupBy.getName();
        deleteTables.add(tableName);
    }
    step.setIntermediateTables(deleteTables);

    step.setExternalDataPaths(Collections.singletonList(JoinedFlatTable.getTableDir(flatDesc, jobWorkingDir)));
    step.setHiveViewIntermediateTableIdentities(StringUtil.join(hiveViewIntermediateTables, ","));
    jobFlow.addTask(step);
}
 
Example #14
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    sparkExecutable.setClassName(SparkFactDistinct.class.getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
    sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString());
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example #15
Source File: HiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
protected void addStepPhase1_DoCreateFlatTable(DefaultChainedExecutable jobFlow) {
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    final KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
    CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);

    if (cubeInstance.getEngineType() == IEngineAware.ID_SPARK) {
        if (kylinConfig.isLivyEnabled()) {
            jobFlow.addTask(createFlatHiveTableByLivyStep(hiveInitStatements,
                    jobWorkingDir, cubeName, flatDesc));
        } else {
            if (kylinConfig.isSparCreateHiveTableViaSparkEnable()) {
                jobFlow.addTask(createFlatHiveTableBySparkSql(hiveInitStatements,
                        jobWorkingDir, cubeName, flatDesc));
            } else {
                jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc));
            }
        }
    } else {
        jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc));
    }
    //jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc));
}
 
Example #16
Source File: HiveInputBase.java    From kylin with Apache License 2.0 6 votes vote down vote up
protected void addStepPhase1_DoCreateFlatTable(DefaultChainedExecutable jobFlow) {
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    final KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
    CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);

    if (cubeInstance.getEngineType() == IEngineAware.ID_SPARK) {
        if (kylinConfig.isLivyEnabled()) {
            jobFlow.addTask(createFlatHiveTableByLivyStep(hiveInitStatements,
                    jobWorkingDir, cubeName, flatDesc));
        } else {
            if (kylinConfig.isSparCreateHiveTableViaSparkEnable()) {
                jobFlow.addTask(createFlatHiveTableBySparkSql(hiveInitStatements,
                        jobWorkingDir, cubeName, flatDesc));
            } else {
                jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc));
            }
        }
    } else {
        jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc));
    }
    //jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc));
}
 
Example #17
Source File: JoinedFlatTableTest.java    From Kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testGenCreateTableDDL() {
    String ddl = JoinedFlatTable.generateCreateTableStatement(intermediateTableDesc, "/tmp", fakeJobUUID);
    System.out.println(ddl);

    System.out.println("The length for the ddl is " + ddl.length());
}
 
Example #18
Source File: HiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
public void addStepPhase1_CreateFlatTable(DefaultChainedExecutable jobFlow) {
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName);
    final KylinConfig cubeConfig = cubeInstance.getConfig();

    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);

    // create flat table first
    addStepPhase1_DoCreateFlatTable(jobFlow);

    // create global dict
    KylinConfig dictConfig = (flatDesc.getSegment()).getConfig();
    String[] mrHiveDictColumns = dictConfig.getMrHiveDictColumns();
    if (mrHiveDictColumns.length > 0) {
        String globalDictDatabase = dictConfig.getMrHiveDictDB();
        if (null == globalDictDatabase) {
            throw new IllegalArgumentException("Mr-Hive Global dict database is null.");
        }
        String globalDictTable = cubeName + dictConfig.getMrHiveDictTableSuffix();
        addStepPhase1_DoCreateMrHiveGlobalDict(jobFlow, mrHiveDictColumns, globalDictDatabase, globalDictTable);
    }

    // then count and redistribute
    if (cubeConfig.isHiveRedistributeEnabled()) {
        final KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
        //jobFlow.addTask(createRedistributeFlatHiveTableStep(hiveInitStatements, cubeName, flatDesc, cubeInstance.getDescriptor()));
        if (kylinConfig.isLivyEnabled() && cubeInstance.getEngineType() == IEngineAware.ID_SPARK) {
            jobFlow.addTask(createRedistributeFlatHiveTableByLivyStep(hiveInitStatements, cubeName, flatDesc,
                    cubeInstance.getDescriptor()));
        } else {
            jobFlow.addTask(createRedistributeFlatHiveTableStep(hiveInitStatements, cubeName, flatDesc,
                    cubeInstance.getDescriptor()));
        }
    }

    // special for hive
    addStepPhase1_DoMaterializeLookupTable(jobFlow);
}
 
Example #19
Source File: JoinedFlatTableTest.java    From Kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testGenerateInsertSql() throws IOException {
    String sqls = JoinedFlatTable.generateInsertDataStatement(intermediateTableDesc, fakeJobUUID, new JobEngineConfig(KylinConfig.getInstanceFromEnv()));
    System.out.println(sqls);

    int length = sqls.length();
    assertEquals(1155, length);
}
 
Example #20
Source File: JdbcHiveInputBase.java    From kylin with Apache License 2.0 5 votes vote down vote up
private AbstractExecutable createFlatHiveTableFromFiles(String hiveInitStatements, String jobWorkingDir) {
    final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc);
    String filedDelimiter = getConfig().getJdbcSourceFieldDelimiter();
    // Sqoop does not support exporting SEQUENSEFILE to Hive now SQOOP-869
    final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc, jobWorkingDir,
            "TEXTFILE", filedDelimiter);

    HiveCmdStep step = new HiveCmdStep();
    step.setCmd(hiveInitStatements + dropTableHql + createTableHql);
    step.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_HIVE_TABLE);
    return step;
}
 
Example #21
Source File: CubeController.java    From kylin with Apache License 2.0 5 votes vote down vote up
/**
 * Get SQL of a Cube
 *
 * @param cubeName Cube Name
 * @return
 * @throws IOException
 */
@RequestMapping(value = "/{cubeName}/sql", method = { RequestMethod.GET }, produces = { "application/json" })
@ResponseBody
public GeneralResponse getSql(@PathVariable String cubeName) {
    checkCubeExists(cubeName);
    CubeInstance cube = cubeService.getCubeManager().getCube(cubeName);
    IJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(cube.getDescriptor(), true);
    String sql = JoinedFlatTable.generateSelectDataStatement(flatTableDesc);

    GeneralResponse response = new GeneralResponse();
    response.setProperty("sql", sql);

    return response;
}
 
Example #22
Source File: KafkaInputBase.java    From kylin with Apache License 2.0 5 votes vote down vote up
protected static AbstractExecutable createFlatTable(final String hiveTableDatabase,
                                                    final String baseLocation, final String cubeName,
                                                    final StreamCubeFactTableDesc streamFactDesc, final List<String> intermediateTables,
                                                    final List<String> intermediatePaths) {
    final IJoinedFlatTableDesc flatDesc = streamFactDesc.getFlatTableDesc();

    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(hiveTableDatabase);

    final String dropFactTableHql = JoinedFlatTable.generateDropTableStatement(streamFactDesc);
    // the table inputformat is sequence file
    final String createFactTableHql = JoinedFlatTable.generateCreateTableStatement(streamFactDesc, baseLocation,
            JoinedFlatTable.SEQUENCEFILE);

    final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc);
    final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc, baseLocation);
    String insertDataHqls = JoinedFlatTable.generateInsertDataStatement(flatDesc);
    insertDataHqls = insertDataHqls.replace(
            quoteTableIdentity(flatDesc.getDataModel().getRootFactTable(), null) + " ",
            quoteTableIdentity(hiveTableDatabase, streamFactDesc.getTableName(), null) + " ");

    CreateFlatHiveTableStep step = new CreateFlatHiveTableStep();
    CubingExecutableUtil.setCubeName(cubeName, step.getParams());
    step.setInitStatement(hiveInitStatements);
    step.setCreateTableStatement(
            dropFactTableHql + createFactTableHql + dropTableHql + createTableHql + insertDataHqls);
    step.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_HIVE_TABLE);

    intermediateTables.add(flatDesc.getTableName());
    intermediateTables.add(streamFactDesc.getTableName());
    intermediatePaths.add(baseLocation + "/" + flatDesc.getTableName());
    intermediatePaths.add(baseLocation + "/" + streamFactDesc.getTableName());
    return step;
}
 
Example #23
Source File: HiveInputBase.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
public void addStepPhase4_Cleanup(DefaultChainedExecutable jobFlow) {
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    org.apache.kylin.source.hive.GarbageCollectionStep step = new org.apache.kylin.source.hive.GarbageCollectionStep();
    step.setName(ExecutableConstants.STEP_NAME_HIVE_CLEANUP);

    List<String> deleteTables = new ArrayList<>();
    deleteTables.add(getIntermediateTableIdentity());

    // mr-hive dict and inner table do not need delete hdfs
    String[] mrHiveDicts = flatDesc.getSegment().getConfig().getMrHiveDictColumns();
    if (Objects.nonNull(mrHiveDicts) && mrHiveDicts.length > 0) {
        String dictDb = flatDesc.getSegment().getConfig().getMrHiveDictDB();
        String tableName = dictDb + "." + flatDesc.getTableName()
                + flatDesc.getSegment().getConfig().getMrHiveDistinctValueTableSuffix();
        String tableName2 = dictDb + "." + flatDesc.getTableName()
                + flatDesc.getSegment().getConfig().getMrHiveDictTableSuffix();
        deleteTables.add(tableName);
        deleteTables.add(tableName2);
    }
    step.setIntermediateTables(deleteTables);

    step.setExternalDataPaths(Collections.singletonList(JoinedFlatTable.getTableDir(flatDesc, jobWorkingDir)));
    step.setHiveViewIntermediateTableIdentities(StringUtil.join(hiveViewIntermediateTables, ","));
    jobFlow.addTask(step);
}
 
Example #24
Source File: KafkaMRInput.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
public void configureJob(Job job) {
    job.setInputFormatClass(SequenceFileInputFormat.class);
    String jobId = job.getConfiguration().get(BatchConstants.ARG_CUBING_JOB_ID);
    IJoinedFlatTableDesc flatHiveTableDesc = new CubeJoinedFlatTableDesc(cubeSegment);
    String inputPath = JoinedFlatTable.getTableDir(flatHiveTableDesc,
            JobBuilderSupport.getJobWorkingDir(conf, jobId));
    try {
        FileInputFormat.addInputPath(job, new Path(inputPath));
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
}
 
Example #25
Source File: HiveFlinkInput.java    From kylin with Apache License 2.0 5 votes vote down vote up
protected void addStepPhase1_DoMaterializeLookupTable(DefaultChainedExecutable jobFlow) {
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    AbstractExecutable task = createLookupHiveViewMaterializationStep(hiveInitStatements, jobWorkingDir,
            flatDesc, hiveViewIntermediateTables, jobFlow.getId());
    if (task != null) {
        jobFlow.addTask(task);
    }
}
 
Example #26
Source File: HiveInputBase.java    From kylin with Apache License 2.0 5 votes vote down vote up
protected static AbstractExecutable createRedistributeFlatHiveTableByLivyStep(String hiveInitStatements,
                                                                              String cubeName, IJoinedFlatTableDesc flatDesc, CubeDesc cubeDesc) {
    RedistributeFlatHiveTableByLivyStep step = new RedistributeFlatHiveTableByLivyStep();
    step.setInitStatement(hiveInitStatements);
    step.setIntermediateTable(flatDesc.getTableName());
    step.setRedistributeDataStatement(JoinedFlatTable.generateRedistributeFlatTableStatement(flatDesc, cubeDesc));
    CubingExecutableUtil.setCubeName(cubeName, step.getParams());
    step.setName(ExecutableConstants.STEP_NAME_REDISTRIBUTE_FLAT_HIVE_TABLE);
    return step;
}
 
Example #27
Source File: HiveInputBase.java    From kylin with Apache License 2.0 5 votes vote down vote up
protected static AbstractExecutable createRedistributeFlatHiveTableStep(String hiveInitStatements, String cubeName,
                                                                        IJoinedFlatTableDesc flatDesc, CubeDesc cubeDesc) {
    RedistributeFlatHiveTableStep step = new RedistributeFlatHiveTableStep();
    step.setInitStatement(hiveInitStatements);
    step.setIntermediateTable(flatDesc.getTableName());
    step.setRedistributeDataStatement(JoinedFlatTable.generateRedistributeFlatTableStatement(flatDesc, cubeDesc));
    CubingExecutableUtil.setCubeName(cubeName, step.getParams());
    step.setName(ExecutableConstants.STEP_NAME_REDISTRIBUTE_FLAT_HIVE_TABLE);
    return step;
}
 
Example #28
Source File: HiveInputBase.java    From kylin with Apache License 2.0 5 votes vote down vote up
protected static AbstractExecutable createFlatHiveTableByLivyStep(String hiveInitStatements, String jobWorkingDir,
                                                                  String cubeName, IJoinedFlatTableDesc flatDesc) {
    //from hive to hive
    final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc);
    final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc, jobWorkingDir);
    String insertDataHqls = JoinedFlatTable.generateInsertDataStatement(flatDesc);

    CreateFlatHiveTableByLivyStep step = new CreateFlatHiveTableByLivyStep();
    step.setInitStatement(hiveInitStatements);
    step.setCreateTableStatement(dropTableHql + createTableHql + insertDataHqls);
    CubingExecutableUtil.setCubeName(cubeName, step.getParams());
    step.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_HIVE_TABLE);
    return step;
}
 
Example #29
Source File: HiveInputBase.java    From kylin with Apache License 2.0 5 votes vote down vote up
protected static AbstractExecutable createFlatHiveTableStep(String hiveInitStatements, String jobWorkingDir,
                                                            String cubeName, IJoinedFlatTableDesc flatDesc) {
    //from hive to hive
    final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc);
    final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc, jobWorkingDir);
    String insertDataHqls = JoinedFlatTable.generateInsertDataStatement(flatDesc);

    CreateFlatHiveTableStep step = new CreateFlatHiveTableStep();
    step.setInitStatement(hiveInitStatements);
    step.setCreateTableStatement(dropTableHql + createTableHql + insertDataHqls);
    CubingExecutableUtil.setCubeName(cubeName, step.getParams());
    step.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_HIVE_TABLE);
    return step;
}
 
Example #30
Source File: CubeController.java    From Kylin with Apache License 2.0 5 votes vote down vote up
/**
 * Get hive SQL of the cube
 *
 * @param cubeName Cube Name
 * @return
 * @throws UnknownHostException
 * @throws IOException
 */
@RequestMapping(value = "/{cubeName}/segs/{segmentName}/sql", method = {RequestMethod.GET})
@ResponseBody
public GeneralResponse getSql(@PathVariable String cubeName, @PathVariable String segmentName) {
    CubeInstance cube = cubeService.getCubeManager().getCube(cubeName);
    CubeDesc cubeDesc = cube.getDescriptor();
    CubeSegment cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.READY);
    CubeJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, cubeSegment);
    String sql = JoinedFlatTable.generateSelectDataStatement(flatTableDesc);

    GeneralResponse repsonse = new GeneralResponse();
    repsonse.setProperty("sql", sql);

    return repsonse;
}