Java Code Examples for org.apache.kylin.common.util.StringUtil#appendWithSeparator()

The following examples show how to use org.apache.kylin.common.util.StringUtil#appendWithSeparator() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    sparkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
 
Example 2
Source File: SparkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
                              final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    sparkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
 
Example 3
Source File: SparkBatchMergeJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    sparkExecutable.setClassName(SparkMergingDictionary.class.getName());

    sparkExecutable.setParam(SparkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));

    sparkExecutable.setJobId(jobID);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY + ":" + seg.toString());
    sparkExecutable.setSparkConfigName(ExecutableConstants.SPARK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
Example 4
Source File: SparkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createBuildUHCDictSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());

    sparkExecutable.setClassName(SparkUHCDictionary.class.getName());
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_OUTPUT_PATH.getOpt(), getDictRootPath(jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_UHC_DICTIONARY);
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example 5
Source File: FlinkBatchMergeJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public FlinkExecutable createMergeCuboidDataFlinkStep(CubeSegment seg, List<CubeSegment> mergingSegments, String jobID) {
    final List<String> mergingCuboidPaths = Lists.newArrayList();
    for (CubeSegment merging : mergingSegments) {
        mergingCuboidPaths.add(getCuboidRootPath(merging));
    }
    String formattedPath = StringUtil.join(mergingCuboidPaths, ",");
    String outputPath = getCuboidRootPath(jobID);

    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    flinkExecutable.setClassName(FlinkCubingMerge.class.getName());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_INPUT_PATH.getOpt(), formattedPath);
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_OUTPUT_PATH.getOpt(), outputPath);

    flinkExecutable.setJobId(jobID);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
Example 6
Source File: FlinkBatchMergeJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public FlinkExecutable createMergeDictionaryFlinkStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    flinkExecutable.setClassName(FlinkMergingDictionary.class.getName());

    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));

    flinkExecutable.setJobId(jobID);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
    flinkExecutable.setFlinkConfigName(ExecutableConstants.FLINK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
Example 7
Source File: FlinkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
Example 8
Source File: FlinkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public FlinkExecutable createFactDistinctColumnsFlinkStep(String jobId) {
    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    flinkExecutable.setClassName(FlinkFactDistinctColumns.class.getName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_PATH.getOpt(), tablePath);
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    flinkExecutable.setJobId(jobId);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS);
    flinkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());

    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
Example 9
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createBuildDictionarySparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());

    sparkExecutable.setClassName(SparkBuildDictionary.class.getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_DICT_PATH.getOpt(), getDictRootPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_DICTIONARY);
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());

    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
Example 10
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createBuildUHCDictSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());

    sparkExecutable.setClassName(SparkUHCDictionary.class.getName());
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_OUTPUT_PATH.getOpt(), getDictRootPath(jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_UHC_DICTIONARY);
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example 11
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    sparkExecutable.setClassName(SparkFactDistinct.class.getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
    sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString());
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example 12
Source File: SparkBatchMergeJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    sparkExecutable.setClassName(SparkMergingDictionary.class.getName());

    sparkExecutable.setParam(SparkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));

    sparkExecutable.setJobId(jobID);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY + ":" + seg.toString());
    sparkExecutable.setSparkConfigName(ExecutableConstants.SPARK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
Example 13
Source File: FlinkBatchMergeJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public FlinkExecutable createMergeCuboidDataFlinkStep(CubeSegment seg, List<CubeSegment> mergingSegments, String jobID) {
    final List<String> mergingCuboidPaths = Lists.newArrayList();
    for (CubeSegment merging : mergingSegments) {
        mergingCuboidPaths.add(getCuboidRootPath(merging));
    }
    String formattedPath = StringUtil.join(mergingCuboidPaths, ",");
    String outputPath = getCuboidRootPath(jobID);

    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    flinkExecutable.setClassName(FlinkCubingMerge.class.getName());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_INPUT_PATH.getOpt(), formattedPath);
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_OUTPUT_PATH.getOpt(), outputPath);

    flinkExecutable.setJobId(jobID);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
Example 14
Source File: FlinkBatchMergeJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public FlinkExecutable createMergeDictionaryFlinkStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    flinkExecutable.setClassName(FlinkMergingDictionary.class.getName());

    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));

    flinkExecutable.setJobId(jobID);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
    flinkExecutable.setFlinkConfigName(ExecutableConstants.FLINK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
Example 15
Source File: FlinkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
Example 16
Source File: SparkBatchMergeJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public SparkExecutable createMergeCuboidDataStep(CubeSegment seg, List<CubeSegment> mergingSegments, String jobID) {

        final List<String> mergingCuboidPaths = Lists.newArrayList();
        for (CubeSegment merging : mergingSegments) {
            mergingCuboidPaths.add(getCuboidRootPath(merging));
        }
        String formattedPath = StringUtil.join(mergingCuboidPaths, ",");
        String outputPath = getCuboidRootPath(jobID);

        final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
        sparkExecutable.setClassName(SparkCubingMerge.class.getName());
        sparkExecutable.setParam(SparkCubingMerge.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
        sparkExecutable.setParam(SparkCubingMerge.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
        sparkExecutable.setParam(SparkCubingMerge.OPTION_INPUT_PATH.getOpt(), formattedPath);
        sparkExecutable.setParam(SparkCubingMerge.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
        sparkExecutable.setParam(SparkCubingMerge.OPTION_OUTPUT_PATH.getOpt(), outputPath);

        sparkExecutable.setJobId(jobID);
        sparkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID + ":" + seg.toString());

        StringBuilder jars = new StringBuilder();

        StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
        sparkExecutable.setJars(jars.toString());

        return sparkExecutable;
    }
 
Example 17
Source File: HiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
protected static AbstractExecutable createFlatHiveTableBySparkSql(String hiveInitStatements,
        String jobWorkingDir, String cubeName, IJoinedFlatTableDesc flatDesc) {
    final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc);
    final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc,
            jobWorkingDir);
    String insertDataHqls = JoinedFlatTable.generateInsertDataStatement(flatDesc);

    KylinConfig config = flatDesc.getSegment().getConfig();
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(config);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_TABLE_WITH_SPARK);
    sparkExecutable.setClassName(SparkCreatingFlatTable.class.getName());

    sparkExecutable.setParam(SparkSqlBatch.OPTION_CUBE_NAME.getOpt(), cubeName);
    sparkExecutable.setParam(SparkSqlBatch.OPTION_STEP_NAME.getOpt(),
            base64EncodeStr(ExecutableConstants.STEP_NAME_CREATE_FLAT_TABLE_WITH_SPARK));
    sparkExecutable.setParam(SparkSqlBatch.OPTION_SEGMENT_ID.getOpt(),
            flatDesc.getSegment().getName());
    sparkExecutable.setParam(SparkSqlBatch.OPTION_SQL_COUNT.getOpt(),
            String.valueOf(SparkCreatingFlatTable.SQL_COUNT));

    sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(0).getOpt(),
            base64EncodeStr(hiveInitStatements));
    sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(1).getOpt(),
            base64EncodeStr(dropTableHql));

    // createTableHql include create table sql and alter table sql
    String[] sqlArr = createTableHql.trim().split(";");
    if (2 != sqlArr.length) {
        throw new RuntimeException("create table hql should combined by a create table sql " +
                "and a alter sql, but got: " + createTableHql);
    }
    sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(2).getOpt(),
            base64EncodeStr(sqlArr[0]));
    sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(3).getOpt(),
            base64EncodeStr(sqlArr[1]));

    sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(4).getOpt(),
            base64EncodeStr(insertDataHqls));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, config.getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
Example 18
Source File: HiveInputBase.java    From kylin with Apache License 2.0 4 votes vote down vote up
protected static AbstractExecutable createFlatHiveTableBySparkSql(String hiveInitStatements,
                                                                  String jobWorkingDir, String cubeName, IJoinedFlatTableDesc flatDesc) {
    final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc);
    final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc,
            jobWorkingDir);
    String insertDataHqls = JoinedFlatTable.generateInsertDataStatement(flatDesc);

    KylinConfig config = flatDesc.getSegment().getConfig();
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(config);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_TABLE_WITH_SPARK);
    sparkExecutable.setClassName(SparkCreatingFlatTable.class.getName());

    sparkExecutable.setParam(SparkSqlBatch.OPTION_CUBE_NAME.getOpt(), cubeName);
    sparkExecutable.setParam(SparkSqlBatch.OPTION_STEP_NAME.getOpt(),
            base64EncodeStr(ExecutableConstants.STEP_NAME_CREATE_FLAT_TABLE_WITH_SPARK));
    sparkExecutable.setParam(SparkSqlBatch.OPTION_SEGMENT_ID.getOpt(),
            flatDesc.getSegment().getName());
    sparkExecutable.setParam(SparkSqlBatch.OPTION_SQL_COUNT.getOpt(),
            String.valueOf(SparkCreatingFlatTable.SQL_COUNT));

    sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(0).getOpt(),
            base64EncodeStr(hiveInitStatements));
    sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(1).getOpt(),
            base64EncodeStr(dropTableHql));

    // createTableHql include create table sql and alter table sql
    String[] sqlArr = createTableHql.trim().split(";");
    if (2 != sqlArr.length) {
        throw new RuntimeException("create table hql should combined by a create table sql " +
                "and a alter sql, but got: " + createTableHql);
    }
    sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(2).getOpt(),
            base64EncodeStr(sqlArr[0]));
    sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(3).getOpt(),
            base64EncodeStr(sqlArr[1]));

    sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(4).getOpt(),
            base64EncodeStr(insertDataHqls));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, config.getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
Example 19
Source File: HBaseSparkSteps.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public AbstractExecutable createConvertCuboidToHfileStep(String jobId) {
    String cuboidRootPath = getCuboidRootPath(jobId);
    String inputPath = cuboidRootPath + (cuboidRootPath.endsWith("/") ? "" : "/");

    SparkBatchCubingJobBuilder2 jobBuilder2 = new SparkBatchCubingJobBuilder2(seg, null);
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    sparkExecutable.setClassName(SparkCubeHFile.class.getName());
    sparkExecutable.setParam(SparkCubeHFile.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkCubeHFile.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkCubeHFile.OPTION_INPUT_PATH.getOpt(), inputPath);
    sparkExecutable.setParam(SparkCubeHFile.OPTION_META_URL.getOpt(),
            jobBuilder2.getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkCubeHFile.OPTION_OUTPUT_PATH.getOpt(), getHFilePath(jobId));
    sparkExecutable.setParam(SparkCubeHFile.OPTION_PARTITION_FILE_PATH.getOpt(),
            getRowkeyDistributionOutputPath(jobId) + "/part-r-00000_hfile");
    sparkExecutable.setParam(AbstractHadoopJob.OPTION_HBASE_CONF_PATH.getOpt(), getHBaseConfFilePath(jobId));
    sparkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar(org.apache.hadoop.hbase.KeyValue.class));
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar(org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.class));
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar(org.apache.hadoop.hbase.regionserver.BloomType.class));
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar(org.apache.hadoop.hbase.protobuf.generated.HFileProtos.class)); //hbase-protocal.jar
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar(org.apache.hadoop.hbase.CompatibilityFactory.class)); //hbase-hadoop-compact.jar
    StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.htrace.HTraceConfiguration", null)); // htrace-core.jar
    StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.apache.htrace.Trace", null)); // htrace-core.jar
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar("com.yammer.metrics.core.MetricsRegistry", null)); // metrics-core.jar
    //KYLIN-3607
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar("org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory", null));//hbase-hadoop-compat-1.1.1.jar
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar("org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactoryImpl", null));//hbase-hadoop2-compat-1.1.1.jar

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());

    sparkExecutable.setName(ExecutableConstants.STEP_NAME_CONVERT_CUBOID_TO_HFILE);
    sparkExecutable.setCounterSaveAs(",," + CubingJob.CUBE_SIZE_BYTES, getCounterOutputPath(jobId));

    return sparkExecutable;
}
 
Example 20
Source File: HBaseFlinkSteps.java    From kylin with Apache License 2.0 4 votes vote down vote up
public AbstractExecutable createConvertCuboidToHfileStep(String jobId) {
    String cuboidRootPath = getCuboidRootPath(jobId);
    String inputPath = cuboidRootPath + (cuboidRootPath.endsWith("/") ? "" : "/");

    FlinkBatchCubingJobBuilder2 jobBuilder2 = new FlinkBatchCubingJobBuilder2(seg, null, 0);
    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    flinkExecutable.setClassName(FlinkCubeHFile.class.getName());
    flinkExecutable.setParam(FlinkCubeHFile.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubeHFile.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubeHFile.OPTION_META_URL.getOpt(),
            jobBuilder2.getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubeHFile.OPTION_OUTPUT_PATH.getOpt(), getHFilePath(jobId));
    flinkExecutable.setParam(FlinkCubeHFile.OPTION_INPUT_PATH.getOpt(), inputPath);
    flinkExecutable.setParam(FlinkCubeHFile.OPTION_PARTITION_FILE_PATH.getOpt(),
            getRowkeyDistributionOutputPath(jobId) + "/part-r-00000_hfile");
    flinkExecutable.setParam(AbstractHadoopJob.OPTION_HBASE_CONF_PATH.getOpt(), getHBaseConfFilePath(jobId));
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar(org.apache.hadoop.hbase.KeyValue.class));
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar(org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.class));
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar(org.apache.hadoop.hbase.regionserver.BloomType.class));
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar(org.apache.hadoop.hbase.protobuf.generated.HFileProtos.class)); //hbase-protocal.jar
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar(org.apache.hadoop.hbase.CompatibilityFactory.class)); //hbase-hadoop-compact.jar
    StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.htrace.HTraceConfiguration", null)); // htrace-core.jar
    StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.apache.htrace.Trace", null)); // htrace-core.jar
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar("com.yammer.metrics.core.MetricsRegistry", null)); // metrics-core.jar
    //KYLIN-3607
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar("org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory", null));//hbase-hadoop-compat-1.1.1.jar
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar("org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactoryImpl", null));//hbase-hadoop2-compat-1.1.1.jar

    //KYLIN-3537
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar("org.apache.hadoop.hbase.io.hfile.HFileWriterImpl", null));//hbase-server.jar
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar("org.apache.hbase.thirdparty.com.google.common.cache.CacheLoader", null));//hbase-shaded-miscellaneous.jar
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar("org.apache.hadoop.hbase.metrics.MetricRegistry", null));//hbase-metrics-api.jar
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar("org.apache.hadoop.hbase.metrics.impl.MetricRegistriesImpl", null));//hbase-metrics.jar
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar("org.apache.hbase.thirdparty.com.google.protobuf.Message", null));//hbase-shaded-protobuf.jar
    StringUtil.appendWithSeparator(jars,
            ClassUtil.findContainingJar("org.apache.hadoop.hbase.shaded.protobuf.generated.HFileProtos", null));//hbase-protocol-shaded.jar

    if (!StringUtil.isEmpty(seg.getConfig().getFlinkAdditionalJars())) {
        StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    }
    flinkExecutable.setJars(jars.toString());

    flinkExecutable.setName(ExecutableConstants.STEP_NAME_CONVERT_CUBOID_TO_HFILE);
    flinkExecutable.setCounterSaveAs(",," + CubingJob.CUBE_SIZE_BYTES, getCounterOutputPath(jobId));

    return flinkExecutable;
}