Java Code Examples for org.apache.kylin.job.JoinedFlatTable#getTableDir()

The following examples show how to use org.apache.kylin.job.JoinedFlatTable#getTableDir() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FlinkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
Example 2
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    sparkExecutable.setClassName(SparkFactDistinct.class.getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
    sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString());
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example 3
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    sparkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
 
Example 4
Source File: FlinkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public FlinkExecutable createFactDistinctColumnsFlinkStep(String jobId) {
    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    flinkExecutable.setClassName(FlinkFactDistinctColumns.class.getName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_PATH.getOpt(), tablePath);
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    flinkExecutable.setJobId(jobId);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS);
    flinkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());

    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
Example 5
Source File: FlinkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
Example 6
Source File: SparkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    sparkExecutable.setClassName(SparkFactDistinct.class.getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
    sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString());
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example 7
Source File: SparkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
                              final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    sparkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
 
Example 8
Source File: KafkaMRInput.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
public void configureJob(Job job) {
    job.setInputFormatClass(SequenceFileInputFormat.class);
    String jobId = job.getConfiguration().get(BatchConstants.ARG_CUBING_JOB_ID);
    IJoinedFlatTableDesc flatHiveTableDesc = new CubeJoinedFlatTableDesc(cubeSegment);
    String inputPath = JoinedFlatTable.getTableDir(flatHiveTableDesc,
            JobBuilderSupport.getJobWorkingDir(conf, jobId));
    try {
        FileInputFormat.addInputPath(job, new Path(inputPath));
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
}
 
Example 9
Source File: KafkaMRInput.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
public void configureJob(Job job) {
    job.setInputFormatClass(SequenceFileInputFormat.class);
    String jobId = job.getConfiguration().get(BatchConstants.ARG_CUBING_JOB_ID);
    IJoinedFlatTableDesc flatHiveTableDesc = new CubeJoinedFlatTableDesc(cubeSegment);
    String inputPath = JoinedFlatTable.getTableDir(flatHiveTableDesc,
            JobBuilderSupport.getJobWorkingDir(conf, jobId));
    try {
        FileInputFormat.addInputPath(job, new Path(inputPath));
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
}