Java Code Examples for org.apache.kylin.common.util.StringUtil

The following examples show how to use org.apache.kylin.common.util.StringUtil. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: kylin-on-parquet-v2   Source File: HiveInputBase.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void addStepPhase4_Cleanup(DefaultChainedExecutable jobFlow) {
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    org.apache.kylin.source.hive.GarbageCollectionStep step = new org.apache.kylin.source.hive.GarbageCollectionStep();
    step.setName(ExecutableConstants.STEP_NAME_HIVE_CLEANUP);

    List<String> deleteTables = new ArrayList<>();
    deleteTables.add(getIntermediateTableIdentity());

    // mr-hive dict and inner table do not need delete hdfs
    String[] mrHiveDicts = flatDesc.getSegment().getConfig().getMrHiveDictColumns();
    if (Objects.nonNull(mrHiveDicts) && mrHiveDicts.length > 0) {
        String dictDb = flatDesc.getSegment().getConfig().getMrHiveDictDB();
        String tableName = dictDb + "." + flatDesc.getTableName() + "_"
                + MRHiveDictUtil.DictHiveType.GroupBy.getName();
        deleteTables.add(tableName);
    }
    step.setIntermediateTables(deleteTables);

    step.setExternalDataPaths(Collections.singletonList(JoinedFlatTable.getTableDir(flatDesc, jobWorkingDir)));
    step.setHiveViewIntermediateTableIdentities(StringUtil.join(hiveViewIntermediateTables, ","));
    jobFlow.addTask(step);
}
 
Example 2
Source Project: kylin   Source File: BatchMergeJobBuilder2.java    License: Apache License 2.0 6 votes vote down vote up
public MapReduceExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    MapReduceExecutable mergeDictionaryStep = new MapReduceExecutable();
    mergeDictionaryStep.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
    StringBuilder cmd = new StringBuilder();
    appendMapReduceParameters(cmd, JobEngineConfig.CUBE_MERGE_JOB_CONF_SUFFIX);

    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getCubeInstance().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_META_URL, getSegmentMetadataUrl(seg.getConfig(), jobID));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_Merge_Dictionary_" + seg.getCubeInstance().getName() + "_Step");

    mergeDictionaryStep.setMapReduceParams(cmd.toString());
    mergeDictionaryStep.setMapReduceJobClass(MergeDictionaryJob.class);

    return mergeDictionaryStep;
}
 
Example 3
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
Example 4
Source Project: kylin   Source File: SparkBatchCubingJobBuilder2.java    License: Apache License 2.0 6 votes vote down vote up
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    sparkExecutable.setClassName(SparkFactDistinct.class.getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
    sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString());
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example 5
public FlinkExecutable createMergeDictionaryFlinkStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    flinkExecutable.setClassName(FlinkMergingDictionary.class.getName());

    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));

    flinkExecutable.setJobId(jobID);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
    flinkExecutable.setFlinkConfigName(ExecutableConstants.FLINK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
Example 6
Source Project: kylin-on-parquet-v2   Source File: CubeMigrationCheckCLI.java    License: Apache License 2.0 6 votes vote down vote up
public void check(List<String> segFullNameList) {
    issueExistHTables = Lists.newArrayList();
    inconsistentHTables = Lists.newArrayList();

    for (String segFullName : segFullNameList) {
        String[] sepNameList = StringUtil.splitByComma(segFullName);
        try {
            HTableDescriptor hTableDescriptor = hbaseAdmin.getTableDescriptor(TableName.valueOf(sepNameList[0]));
            String host = hTableDescriptor.getValue(IRealizationConstants.HTableTag);
            if (!dstCfg.getMetadataUrlPrefix().equalsIgnoreCase(host)) {
                inconsistentHTables.add(segFullName);
            }
        } catch (IOException e) {
            issueExistHTables.add(segFullName);
            continue;
        }
    }
}
 
Example 7
Source Project: kylin   Source File: TableController.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Regenerate table cardinality
 *
 * @return Table metadata array
 * @throws IOException
 */
@RequestMapping(value = "/{project}/{tableNames}/cardinality", method = { RequestMethod.PUT }, produces = {
        "application/json" })
@ResponseBody
public CardinalityRequest generateCardinality(@PathVariable String tableNames,
        @RequestBody CardinalityRequest request, @PathVariable String project) throws Exception {
    String submitter = SecurityContextHolder.getContext().getAuthentication().getName();
    String[] tables = StringUtil.splitByComma(tableNames);
    try {
        for (String table : tables) {
            tableService.calculateCardinality(table.trim().toUpperCase(Locale.ROOT), submitter, project);
        }
    } catch (IOException e) {
        logger.error("Failed to calculate cardinality", e);
        throw new InternalErrorException(e.getLocalizedMessage(), e);
    }
    return request;
}
 
Example 8
Source Project: kylin-on-parquet-v2   Source File: ModelDimensionDesc.java    License: Apache License 2.0 6 votes vote down vote up
void init(DataModelDesc model) {
    table = table.toUpperCase(Locale.ROOT);
    if (columns != null) {
        StringUtil.toUpperCaseArray(columns, columns);
    }

    if (model != null) {
        table = model.findTable(table).getAlias();
        if (columns != null) {
            for (int i = 0; i < columns.length; i++) {
                TblColRef column = model.findColumn(table, columns[i]);

                if (column.getColumnDesc().isComputedColumn() && !model.isFactTable(column.getTableRef())) {
                    throw new RuntimeException("Computed Column on lookup table is not allowed");
                }

                columns[i] = column.getName();
            }
        }
    }
}
 
Example 9
Source Project: kylin-on-parquet-v2   Source File: BatchMergeJobBuilder2.java    License: Apache License 2.0 6 votes vote down vote up
public MapReduceExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    MapReduceExecutable mergeDictionaryStep = new MapReduceExecutable();
    mergeDictionaryStep.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
    StringBuilder cmd = new StringBuilder();
    appendMapReduceParameters(cmd, JobEngineConfig.CUBE_MERGE_JOB_CONF_SUFFIX);

    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getCubeInstance().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_META_URL, getSegmentMetadataUrl(seg.getConfig(), jobID));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_Merge_Dictionary_" + seg.getCubeInstance().getName() + "_Step");

    mergeDictionaryStep.setMapReduceParams(cmd.toString());
    mergeDictionaryStep.setMapReduceJobClass(MergeDictionaryJob.class);

    return mergeDictionaryStep;
}
 
Example 10
Source Project: kylin-on-parquet-v2   Source File: MergeDictionaryMapper.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected void doSetup(Context context) throws IOException, InterruptedException {
    super.doSetup(context);

    final SerializableConfiguration sConf = new SerializableConfiguration(context.getConfiguration());
    final String metaUrl = context.getConfiguration().get(BatchConstants.ARG_META_URL);
    final String cubeName = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME);
    final String segmentIds = context.getConfiguration().get(MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt());

    final KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
    final CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);
    final CubeDesc cubeDesc = CubeDescManager.getInstance(kylinConfig).getCubeDesc(cubeInstance.getDescName());

    mergingSegments = getMergingSegments(cubeInstance, StringUtil.splitByComma(segmentIds));
    tblColRefs = cubeDesc.getAllColumnsNeedDictionaryBuilt().toArray(new TblColRef[0]);
    dictMgr = DictionaryManager.getInstance(kylinConfig);
}
 
Example 11
Source Project: kylin-on-parquet-v2   Source File: AbstractExecutable.java    License: Apache License 2.0 6 votes vote down vote up
protected void handleMetadataPersistException(ExecutableContext context, Throwable exception) {
    final String[] adminDls = context.getConfig().getAdminDls();
    if (adminDls == null || adminDls.length < 1) {
        logger.warn(NO_NEED_TO_SEND_EMAIL_USER_LIST_IS_EMPTY);
        return;
    }
    List<String> users = Lists.newArrayList(adminDls);

    Map<String, Object> dataMap = Maps.newHashMap();
    dataMap.put("job_name", getName());
    dataMap.put("env_name", context.getConfig().getDeployEnv());
    dataMap.put(SUBMITTER, StringUtil.noBlank(getSubmitter(), "missing submitter"));
    dataMap.put("job_engine", MailNotificationUtil.getLocalHostName());
    dataMap.put("error_log",
            Matcher.quoteReplacement(StringUtil.noBlank(exception.getMessage(), "no error message")));

    String content = MailNotificationUtil.getMailContent(MailNotificationUtil.METADATA_PERSIST_FAIL, dataMap);
    String title = MailNotificationUtil.getMailTitle("METADATA PERSIST", "FAIL",
            context.getConfig().getDeployEnv());

    new MailService(context.getConfig()).sendMail(users, title, content);
}
 
Example 12
Source Project: kylin   Source File: SparkBatchCubingJobBuilder2.java    License: Apache License 2.0 6 votes vote down vote up
public SparkExecutable createBuildDictionarySparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());

    sparkExecutable.setClassName(SparkBuildDictionary.class.getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_DICT_PATH.getOpt(), getDictRootPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_DICTIONARY);
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());

    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
Example 13
Source Project: kylin-on-parquet-v2   Source File: HBaseJobSteps.java    License: Apache License 2.0 6 votes vote down vote up
public MapReduceExecutable createMergeCuboidDataStep(CubeSegment seg, List<CubeSegment> mergingSegments,
        String jobID, Class<? extends AbstractHadoopJob> clazz) {
    final List<String> mergingCuboidPaths = Lists.newArrayList();
    for (CubeSegment merging : mergingSegments) {
        mergingCuboidPaths.add(getCuboidRootPath(merging) + "*");
    }
    String formattedPath = StringUtil.join(mergingCuboidPaths, ",");
    String outputPath = getCuboidRootPath(jobID);

    MapReduceExecutable mergeCuboidDataStep = new MapReduceExecutable();
    mergeCuboidDataStep.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID);
    StringBuilder cmd = new StringBuilder();

    appendMapReduceParameters(cmd);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getCubeInstance().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, formattedPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, outputPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME,
            "Kylin_Merge_Cuboid_" + seg.getCubeInstance().getName() + "_Step");

    mergeCuboidDataStep.setMapReduceParams(cmd.toString());
    mergeCuboidDataStep.setMapReduceJobClass(clazz);
    return mergeCuboidDataStep;
}
 
Example 14
Source Project: kylin   Source File: SparkBatchCubingJobBuilder2.java    License: Apache License 2.0 6 votes vote down vote up
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
                              final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    sparkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
 
Example 15
Source Project: kylin   Source File: CubeSignatureRefresher.java    License: Apache License 2.0 6 votes vote down vote up
public void update() {
    logger.info("Reloading Cube Metadata from store: " + store.getReadableResourcePath(ResourceStore.CUBE_DESC_RESOURCE_ROOT));
    CubeDescManager cubeDescManager = CubeDescManager.getInstance(config);
    List<CubeDesc> cubeDescs;
    if (ArrayUtils.isEmpty(cubeNames)) {
        cubeDescs = cubeDescManager.listAllDesc();
    } else {
        String[] names = StringUtil.splitByComma(cubeNames[0]);
        if (ArrayUtils.isEmpty(names))
            return;
        cubeDescs = Lists.newArrayListWithCapacity(names.length);
        for (String name : names) {
            cubeDescs.add(cubeDescManager.getCubeDesc(name));
        }
    }
    for (CubeDesc cubeDesc : cubeDescs) {
        updateCubeDesc(cubeDesc);
    }

    verify();
}
 
Example 16
public SparkExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    sparkExecutable.setClassName(SparkMergingDictionary.class.getName());

    sparkExecutable.setParam(SparkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));

    sparkExecutable.setJobId(jobID);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY + ":" + seg.toString());
    sparkExecutable.setSparkConfigName(ExecutableConstants.SPARK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
Example 17
Source Project: kylin-on-parquet-v2   Source File: SparkUtil.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Read the given path as a Java RDD; The path can have second level sub folder.
 * @param inputPath
 * @param fs
 * @param sc
 * @param keyClass
 * @param valueClass
 * @return
 * @throws IOException
 */
public static JavaPairRDD parseInputPath(String inputPath, FileSystem fs, JavaSparkContext sc, Class keyClass,
        Class valueClass) throws IOException {
    List<String> inputFolders = Lists.newArrayList();
    Path inputHDFSPath = new Path(inputPath);
    FileStatus[] fileStatuses = fs.listStatus(inputHDFSPath);
    boolean hasDir = false;
    for (FileStatus stat : fileStatuses) {
        if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) {
            hasDir = true;
            inputFolders.add(stat.getPath().toString());
        }
    }

    if (!hasDir) {
        return sc.sequenceFile(inputHDFSPath.toString(), keyClass, valueClass);
    }

    return sc.sequenceFile(StringUtil.join(inputFolders, ","), keyClass, valueClass);
}
 
Example 18
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    sparkExecutable.setClassName(SparkFactDistinct.class.getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
    sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString());
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example 19
public SparkExecutable createBuildUHCDictSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());

    sparkExecutable.setClassName(SparkUHCDictionary.class.getName());
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_OUTPUT_PATH.getOpt(), getDictRootPath(jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_UHC_DICTIONARY);
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example 20
public SparkExecutable createBuildDictionarySparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());

    sparkExecutable.setClassName(SparkBuildDictionary.class.getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_DICT_PATH.getOpt(), getDictRootPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_DICTIONARY);
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());

    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
Example 21
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    sparkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
 
Example 22
Source Project: kylin-on-parquet-v2   Source File: TableController.java    License: Apache License 2.0 6 votes vote down vote up
@RequestMapping(value = "/{tables}/{project}", method = { RequestMethod.DELETE }, produces = { "application/json" })
@ResponseBody
public Map<String, String[]> unLoadHiveTables(@PathVariable String tables, @PathVariable String project) {
    Set<String> unLoadSuccess = Sets.newHashSet();
    Set<String> unLoadFail = Sets.newHashSet();
    Map<String, String[]> result = new HashMap<String, String[]>();
    try {
        for (String tableName : StringUtil.splitByComma(tables)) {
            tableACLService.deleteFromTableACLByTbl(project, tableName);
            if (tableService.unloadHiveTable(tableName, project)) {
                unLoadSuccess.add(tableName);
            } else {
                unLoadFail.add(tableName);
            }
        }
    } catch (Throwable e) {
        logger.error("Failed to unload Hive Table", e);
        throw new InternalErrorException(e.getLocalizedMessage(), e);
    }
    result.put("result.unload.success", (String[]) unLoadSuccess.toArray(new String[unLoadSuccess.size()]));
    result.put("result.unload.fail", (String[]) unLoadFail.toArray(new String[unLoadFail.size()]));
    return result;
}
 
Example 23
Source Project: kylin-on-parquet-v2   Source File: TableController.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Regenerate table cardinality
 *
 * @return Table metadata array
 * @throws IOException
 */
@RequestMapping(value = "/{project}/{tableNames}/cardinality", method = { RequestMethod.PUT }, produces = {
        "application/json" })
@ResponseBody
public CardinalityRequest generateCardinality(@PathVariable String tableNames,
        @RequestBody CardinalityRequest request, @PathVariable String project) throws Exception {
    String submitter = SecurityContextHolder.getContext().getAuthentication().getName();
    String[] tables = StringUtil.splitByComma(tableNames);
    try {
        for (String table : tables) {
            tableService.calculateCardinality(table.trim().toUpperCase(Locale.ROOT), submitter, project);
        }
    } catch (IOException e) {
        logger.error("Failed to calculate cardinality", e);
        throw new InternalErrorException(e.getLocalizedMessage(), e);
    }
    return request;
}
 
Example 24
Source Project: kylin   Source File: AbstractExecutable.java    License: Apache License 2.0 6 votes vote down vote up
protected void handleMetadataPersistException(ExecutableContext context, Throwable exception) {
    final String[] adminDls = context.getConfig().getAdminDls();
    if (adminDls == null || adminDls.length < 1) {
        logger.warn(NO_NEED_TO_SEND_EMAIL_USER_LIST_IS_EMPTY);
        return;
    }
    List<String> users = Lists.newArrayList(adminDls);

    Map<String, Object> dataMap = Maps.newHashMap();
    dataMap.put("job_name", getName());
    dataMap.put("env_name", context.getConfig().getDeployEnv());
    dataMap.put(SUBMITTER, StringUtil.noBlank(getSubmitter(), "missing submitter"));
    dataMap.put("job_engine", MailNotificationUtil.getLocalHostName());
    dataMap.put("error_log",
            Matcher.quoteReplacement(StringUtil.noBlank(exception.getMessage(), "no error message")));

    String content = MailNotificationUtil.getMailContent(MailNotificationUtil.METADATA_PERSIST_FAIL, dataMap);
    String title = MailNotificationUtil.getMailTitle("METADATA PERSIST", "FAIL",
            context.getConfig().getDeployEnv());

    new MailService(context.getConfig()).sendMail(users, title, content);
}
 
Example 25
Source Project: kylin   Source File: FlinkUtil.java    License: Apache License 2.0 6 votes vote down vote up
public static DataSet parseInputPath(String inputPath, FileSystem fs, ExecutionEnvironment env, Class keyClass,
        Class valueClass) throws IOException {
    List<String> inputFolders = Lists.newArrayList();
    Path inputHDFSPath = new Path(inputPath);
    FileStatus[] fileStatuses = fs.listStatus(inputHDFSPath);
    boolean hasDir = false;
    for (FileStatus stat : fileStatuses) {
        if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) {
            hasDir = true;
            inputFolders.add(stat.getPath().toString());
        }
    }

    if (!hasDir) {
        return env.createInput(HadoopInputs.readSequenceFile(keyClass, valueClass, inputHDFSPath.toString()));
    }

    Job job = Job.getInstance();
    FileInputFormat.setInputPaths(job, StringUtil.join(inputFolders, ","));
    return env.createInput(HadoopInputs.createHadoopInput(new SequenceFileInputFormat(), keyClass, valueClass, job));
}
 
Example 26
Source Project: kylin   Source File: FlinkBatchCubingJobBuilder2.java    License: Apache License 2.0 6 votes vote down vote up
public FlinkExecutable createFactDistinctColumnsFlinkStep(String jobId) {
    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    flinkExecutable.setClassName(FlinkFactDistinctColumns.class.getName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_PATH.getOpt(), tablePath);
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    flinkExecutable.setJobId(jobId);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS);
    flinkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());

    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
Example 27
Source Project: kylin   Source File: FlinkBatchCubingJobBuilder2.java    License: Apache License 2.0 6 votes vote down vote up
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
Example 28
Source Project: kylin   Source File: FlinkBatchMergeJobBuilder2.java    License: Apache License 2.0 6 votes vote down vote up
public FlinkExecutable createMergeCuboidDataFlinkStep(CubeSegment seg, List<CubeSegment> mergingSegments, String jobID) {
    final List<String> mergingCuboidPaths = Lists.newArrayList();
    for (CubeSegment merging : mergingSegments) {
        mergingCuboidPaths.add(getCuboidRootPath(merging));
    }
    String formattedPath = StringUtil.join(mergingCuboidPaths, ",");
    String outputPath = getCuboidRootPath(jobID);

    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    flinkExecutable.setClassName(FlinkCubingMerge.class.getName());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_INPUT_PATH.getOpt(), formattedPath);
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_OUTPUT_PATH.getOpt(), outputPath);

    flinkExecutable.setJobId(jobID);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
Example 29
Source Project: kylin   Source File: CubeMigrationCheckCLI.java    License: Apache License 2.0 6 votes vote down vote up
public void check(List<String> segFullNameList) {
    issueExistHTables = Lists.newArrayList();
    inconsistentHTables = Lists.newArrayList();

    for (String segFullName : segFullNameList) {
        String[] sepNameList = StringUtil.splitByComma(segFullName);
        try {
            HTableDescriptor hTableDescriptor = hbaseAdmin.getTableDescriptor(TableName.valueOf(sepNameList[0]));
            String host = hTableDescriptor.getValue(IRealizationConstants.HTableTag);
            if (!dstCfg.getMetadataUrlPrefix().equalsIgnoreCase(host)) {
                inconsistentHTables.add(segFullName);
            }
        } catch (IOException e) {
            issueExistHTables.add(segFullName);
            continue;
        }
    }
}
 
Example 30
Source Project: kylin   Source File: ModelDimensionDesc.java    License: Apache License 2.0 6 votes vote down vote up
void init(DataModelDesc model) {
    table = table.toUpperCase(Locale.ROOT);
    if (columns != null) {
        StringUtil.toUpperCaseArray(columns, columns);
    }

    if (model != null) {
        table = model.findTable(table).getAlias();
        if (columns != null) {
            for (int i = 0; i < columns.length; i++) {
                TblColRef column = model.findColumn(table, columns[i]);

                if (column.getColumnDesc().isComputedColumn() && !model.isFactTable(column.getTableRef())) {
                    throw new RuntimeException("Computed Column on lookup table is not allowed");
                }

                columns[i] = column.getName();
            }
        }
    }
}