org.apache.kylin.common.util.StringUtil Java Examples

The following examples show how to use org.apache.kylin.common.util.StringUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BatchMergeJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public MapReduceExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    MapReduceExecutable mergeDictionaryStep = new MapReduceExecutable();
    mergeDictionaryStep.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
    StringBuilder cmd = new StringBuilder();
    appendMapReduceParameters(cmd, JobEngineConfig.CUBE_MERGE_JOB_CONF_SUFFIX);

    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getCubeInstance().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_META_URL, getSegmentMetadataUrl(seg.getConfig(), jobID));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_Merge_Dictionary_" + seg.getCubeInstance().getName() + "_Step");

    mergeDictionaryStep.setMapReduceParams(cmd.toString());
    mergeDictionaryStep.setMapReduceJobClass(MergeDictionaryJob.class);

    return mergeDictionaryStep;
}
 
Example #2
Source File: CubeMigrationCheckCLI.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public void check(List<String> segFullNameList) {
    issueExistHTables = Lists.newArrayList();
    inconsistentHTables = Lists.newArrayList();

    for (String segFullName : segFullNameList) {
        String[] sepNameList = StringUtil.splitByComma(segFullName);
        try {
            HTableDescriptor hTableDescriptor = hbaseAdmin.getTableDescriptor(TableName.valueOf(sepNameList[0]));
            String host = hTableDescriptor.getValue(IRealizationConstants.HTableTag);
            if (!dstCfg.getMetadataUrlPrefix().equalsIgnoreCase(host)) {
                inconsistentHTables.add(segFullName);
            }
        } catch (IOException e) {
            issueExistHTables.add(segFullName);
            continue;
        }
    }
}
 
Example #3
Source File: BatchMergeJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public MapReduceExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    MapReduceExecutable mergeDictionaryStep = new MapReduceExecutable();
    mergeDictionaryStep.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
    StringBuilder cmd = new StringBuilder();
    appendMapReduceParameters(cmd, JobEngineConfig.CUBE_MERGE_JOB_CONF_SUFFIX);

    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getCubeInstance().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_META_URL, getSegmentMetadataUrl(seg.getConfig(), jobID));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_Merge_Dictionary_" + seg.getCubeInstance().getName() + "_Step");

    mergeDictionaryStep.setMapReduceParams(cmd.toString());
    mergeDictionaryStep.setMapReduceJobClass(MergeDictionaryJob.class);

    return mergeDictionaryStep;
}
 
Example #4
Source File: CubeMigrationCheckCLI.java    From kylin with Apache License 2.0 6 votes vote down vote up
public void check(List<String> segFullNameList) {
    issueExistHTables = Lists.newArrayList();
    inconsistentHTables = Lists.newArrayList();

    for (String segFullName : segFullNameList) {
        String[] sepNameList = StringUtil.splitByComma(segFullName);
        try {
            HTableDescriptor hTableDescriptor = hbaseAdmin.getTableDescriptor(TableName.valueOf(sepNameList[0]));
            String host = hTableDescriptor.getValue(IRealizationConstants.HTableTag);
            if (!dstCfg.getMetadataUrlPrefix().equalsIgnoreCase(host)) {
                inconsistentHTables.add(segFullName);
            }
        } catch (IOException e) {
            issueExistHTables.add(segFullName);
            continue;
        }
    }
}
 
Example #5
Source File: HiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
public void addStepPhase4_Cleanup(DefaultChainedExecutable jobFlow) {
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    org.apache.kylin.source.hive.GarbageCollectionStep step = new org.apache.kylin.source.hive.GarbageCollectionStep();
    step.setName(ExecutableConstants.STEP_NAME_HIVE_CLEANUP);

    List<String> deleteTables = new ArrayList<>();
    deleteTables.add(getIntermediateTableIdentity());

    // mr-hive dict and inner table do not need delete hdfs
    String[] mrHiveDicts = flatDesc.getSegment().getConfig().getMrHiveDictColumns();
    if (Objects.nonNull(mrHiveDicts) && mrHiveDicts.length > 0) {
        String dictDb = flatDesc.getSegment().getConfig().getMrHiveDictDB();
        String tableName = dictDb + "." + flatDesc.getTableName() + "_"
                + MRHiveDictUtil.DictHiveType.GroupBy.getName();
        deleteTables.add(tableName);
    }
    step.setIntermediateTables(deleteTables);

    step.setExternalDataPaths(Collections.singletonList(JoinedFlatTable.getTableDir(flatDesc, jobWorkingDir)));
    step.setHiveViewIntermediateTableIdentities(StringUtil.join(hiveViewIntermediateTables, ","));
    jobFlow.addTask(step);
}
 
Example #6
Source File: FlinkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
Example #7
Source File: FlinkBatchMergeJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public FlinkExecutable createMergeDictionaryFlinkStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    flinkExecutable.setClassName(FlinkMergingDictionary.class.getName());

    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));

    flinkExecutable.setJobId(jobID);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
    flinkExecutable.setFlinkConfigName(ExecutableConstants.FLINK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
Example #8
Source File: SparkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    sparkExecutable.setClassName(SparkFactDistinct.class.getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
    sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString());
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example #9
Source File: FlinkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
Example #10
Source File: FlinkUtil.java    From kylin with Apache License 2.0 6 votes vote down vote up
public static DataSet parseInputPath(String inputPath, FileSystem fs, ExecutionEnvironment env, Class keyClass,
        Class valueClass) throws IOException {
    List<String> inputFolders = Lists.newArrayList();
    Path inputHDFSPath = new Path(inputPath);
    FileStatus[] fileStatuses = fs.listStatus(inputHDFSPath);
    boolean hasDir = false;
    for (FileStatus stat : fileStatuses) {
        if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) {
            hasDir = true;
            inputFolders.add(stat.getPath().toString());
        }
    }

    if (!hasDir) {
        return env.createInput(HadoopInputs.readSequenceFile(keyClass, valueClass, inputHDFSPath.toString()));
    }

    Job job = Job.getInstance();
    FileInputFormat.setInputPaths(job, StringUtil.join(inputFolders, ","));
    return env.createInput(HadoopInputs.createHadoopInput(new SequenceFileInputFormat(), keyClass, valueClass, job));
}
 
Example #11
Source File: FlinkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public FlinkExecutable createFactDistinctColumnsFlinkStep(String jobId) {
    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    flinkExecutable.setClassName(FlinkFactDistinctColumns.class.getName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_PATH.getOpt(), tablePath);
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    flinkExecutable.setJobId(jobId);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS);
    flinkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());

    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
Example #12
Source File: FlinkBatchMergeJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public FlinkExecutable createMergeCuboidDataFlinkStep(CubeSegment seg, List<CubeSegment> mergingSegments, String jobID) {
    final List<String> mergingCuboidPaths = Lists.newArrayList();
    for (CubeSegment merging : mergingSegments) {
        mergingCuboidPaths.add(getCuboidRootPath(merging));
    }
    String formattedPath = StringUtil.join(mergingCuboidPaths, ",");
    String outputPath = getCuboidRootPath(jobID);

    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    flinkExecutable.setClassName(FlinkCubingMerge.class.getName());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_INPUT_PATH.getOpt(), formattedPath);
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_OUTPUT_PATH.getOpt(), outputPath);

    flinkExecutable.setJobId(jobID);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
Example #13
Source File: TableController.java    From kylin with Apache License 2.0 6 votes vote down vote up
/**
 * Regenerate table cardinality
 *
 * @return Table metadata array
 * @throws IOException
 */
@RequestMapping(value = "/{project}/{tableNames}/cardinality", method = { RequestMethod.PUT }, produces = {
        "application/json" })
@ResponseBody
public CardinalityRequest generateCardinality(@PathVariable String tableNames,
        @RequestBody CardinalityRequest request, @PathVariable String project) throws Exception {
    String submitter = SecurityContextHolder.getContext().getAuthentication().getName();
    String[] tables = StringUtil.splitByComma(tableNames);
    try {
        for (String table : tables) {
            tableService.calculateCardinality(table.trim().toUpperCase(Locale.ROOT), submitter, project);
        }
    } catch (IOException e) {
        logger.error("Failed to calculate cardinality", e);
        throw new InternalErrorException(e.getLocalizedMessage(), e);
    }
    return request;
}
 
Example #14
Source File: AbstractExecutable.java    From kylin with Apache License 2.0 6 votes vote down vote up
protected void handleMetadataPersistException(ExecutableContext context, Throwable exception) {
    final String[] adminDls = context.getConfig().getAdminDls();
    if (adminDls == null || adminDls.length < 1) {
        logger.warn(NO_NEED_TO_SEND_EMAIL_USER_LIST_IS_EMPTY);
        return;
    }
    List<String> users = Lists.newArrayList(adminDls);

    Map<String, Object> dataMap = Maps.newHashMap();
    dataMap.put("job_name", getName());
    dataMap.put("env_name", context.getConfig().getDeployEnv());
    dataMap.put(SUBMITTER, StringUtil.noBlank(getSubmitter(), "missing submitter"));
    dataMap.put("job_engine", MailNotificationUtil.getLocalHostName());
    dataMap.put("error_log",
            Matcher.quoteReplacement(StringUtil.noBlank(exception.getMessage(), "no error message")));

    String content = MailNotificationUtil.getMailContent(MailNotificationUtil.METADATA_PERSIST_FAIL, dataMap);
    String title = MailNotificationUtil.getMailTitle("METADATA PERSIST", "FAIL",
            context.getConfig().getDeployEnv());

    new MailService(context.getConfig()).sendMail(users, title, content);
}
 
Example #15
Source File: ModelDimensionDesc.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
void init(DataModelDesc model) {
    table = table.toUpperCase(Locale.ROOT);
    if (columns != null) {
        StringUtil.toUpperCaseArray(columns, columns);
    }

    if (model != null) {
        table = model.findTable(table).getAlias();
        if (columns != null) {
            for (int i = 0; i < columns.length; i++) {
                TblColRef column = model.findColumn(table, columns[i]);

                if (column.getColumnDesc().isComputedColumn() && !model.isFactTable(column.getTableRef())) {
                    throw new RuntimeException("Computed Column on lookup table is not allowed");
                }

                columns[i] = column.getName();
            }
        }
    }
}
 
Example #16
Source File: ModelDimensionDesc.java    From kylin with Apache License 2.0 6 votes vote down vote up
void init(DataModelDesc model) {
    table = table.toUpperCase(Locale.ROOT);
    if (columns != null) {
        StringUtil.toUpperCaseArray(columns, columns);
    }

    if (model != null) {
        table = model.findTable(table).getAlias();
        if (columns != null) {
            for (int i = 0; i < columns.length; i++) {
                TblColRef column = model.findColumn(table, columns[i]);

                if (column.getColumnDesc().isComputedColumn() && !model.isFactTable(column.getTableRef())) {
                    throw new RuntimeException("Computed Column on lookup table is not allowed");
                }

                columns[i] = column.getName();
            }
        }
    }
}
 
Example #17
Source File: MergeDictionaryMapper.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
protected void doSetup(Context context) throws IOException, InterruptedException {
    super.doSetup(context);

    final SerializableConfiguration sConf = new SerializableConfiguration(context.getConfiguration());
    final String metaUrl = context.getConfiguration().get(BatchConstants.ARG_META_URL);
    final String cubeName = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME);
    final String segmentIds = context.getConfiguration().get(MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt());

    final KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
    final CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);
    final CubeDesc cubeDesc = CubeDescManager.getInstance(kylinConfig).getCubeDesc(cubeInstance.getDescName());

    mergingSegments = getMergingSegments(cubeInstance, StringUtil.splitByComma(segmentIds));
    tblColRefs = cubeDesc.getAllColumnsNeedDictionaryBuilt().toArray(new TblColRef[0]);
    dictMgr = DictionaryManager.getInstance(kylinConfig);
}
 
Example #18
Source File: TableController.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
/**
 * Regenerate table cardinality
 *
 * @return Table metadata array
 * @throws IOException
 */
@RequestMapping(value = "/{project}/{tableNames}/cardinality", method = { RequestMethod.PUT }, produces = {
        "application/json" })
@ResponseBody
public CardinalityRequest generateCardinality(@PathVariable String tableNames,
        @RequestBody CardinalityRequest request, @PathVariable String project) throws Exception {
    String submitter = SecurityContextHolder.getContext().getAuthentication().getName();
    String[] tables = StringUtil.splitByComma(tableNames);
    try {
        for (String table : tables) {
            tableService.calculateCardinality(table.trim().toUpperCase(Locale.ROOT), submitter, project);
        }
    } catch (IOException e) {
        logger.error("Failed to calculate cardinality", e);
        throw new InternalErrorException(e.getLocalizedMessage(), e);
    }
    return request;
}
 
Example #19
Source File: TableController.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@RequestMapping(value = "/{tables}/{project}", method = { RequestMethod.DELETE }, produces = { "application/json" })
@ResponseBody
public Map<String, String[]> unLoadHiveTables(@PathVariable String tables, @PathVariable String project) {
    Set<String> unLoadSuccess = Sets.newHashSet();
    Set<String> unLoadFail = Sets.newHashSet();
    Map<String, String[]> result = new HashMap<String, String[]>();
    try {
        for (String tableName : StringUtil.splitByComma(tables)) {
            tableACLService.deleteFromTableACLByTbl(project, tableName);
            if (tableService.unloadHiveTable(tableName, project)) {
                unLoadSuccess.add(tableName);
            } else {
                unLoadFail.add(tableName);
            }
        }
    } catch (Throwable e) {
        logger.error("Failed to unload Hive Table", e);
        throw new InternalErrorException(e.getLocalizedMessage(), e);
    }
    result.put("result.unload.success", (String[]) unLoadSuccess.toArray(new String[unLoadSuccess.size()]));
    result.put("result.unload.fail", (String[]) unLoadFail.toArray(new String[unLoadFail.size()]));
    return result;
}
 
Example #20
Source File: AbstractExecutable.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
protected void handleMetadataPersistException(ExecutableContext context, Throwable exception) {
    final String[] adminDls = context.getConfig().getAdminDls();
    if (adminDls == null || adminDls.length < 1) {
        logger.warn(NO_NEED_TO_SEND_EMAIL_USER_LIST_IS_EMPTY);
        return;
    }
    List<String> users = Lists.newArrayList(adminDls);

    Map<String, Object> dataMap = Maps.newHashMap();
    dataMap.put("job_name", getName());
    dataMap.put("env_name", context.getConfig().getDeployEnv());
    dataMap.put(SUBMITTER, StringUtil.noBlank(getSubmitter(), "missing submitter"));
    dataMap.put("job_engine", MailNotificationUtil.getLocalHostName());
    dataMap.put("error_log",
            Matcher.quoteReplacement(StringUtil.noBlank(exception.getMessage(), "no error message")));

    String content = MailNotificationUtil.getMailContent(MailNotificationUtil.METADATA_PERSIST_FAIL, dataMap);
    String title = MailNotificationUtil.getMailTitle("METADATA PERSIST", "FAIL",
            context.getConfig().getDeployEnv());

    new MailService(context.getConfig()).sendMail(users, title, content);
}
 
Example #21
Source File: SparkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createBuildDictionarySparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());

    sparkExecutable.setClassName(SparkBuildDictionary.class.getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_DICT_PATH.getOpt(), getDictRootPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_DICTIONARY);
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());

    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
Example #22
Source File: HBaseJobSteps.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public MapReduceExecutable createMergeCuboidDataStep(CubeSegment seg, List<CubeSegment> mergingSegments,
        String jobID, Class<? extends AbstractHadoopJob> clazz) {
    final List<String> mergingCuboidPaths = Lists.newArrayList();
    for (CubeSegment merging : mergingSegments) {
        mergingCuboidPaths.add(getCuboidRootPath(merging) + "*");
    }
    String formattedPath = StringUtil.join(mergingCuboidPaths, ",");
    String outputPath = getCuboidRootPath(jobID);

    MapReduceExecutable mergeCuboidDataStep = new MapReduceExecutable();
    mergeCuboidDataStep.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID);
    StringBuilder cmd = new StringBuilder();

    appendMapReduceParameters(cmd);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getCubeInstance().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, formattedPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, outputPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME,
            "Kylin_Merge_Cuboid_" + seg.getCubeInstance().getName() + "_Step");

    mergeCuboidDataStep.setMapReduceParams(cmd.toString());
    mergeCuboidDataStep.setMapReduceJobClass(clazz);
    return mergeCuboidDataStep;
}
 
Example #23
Source File: SparkBatchCubingJobBuilder2.java    From kylin with Apache License 2.0 6 votes vote down vote up
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
                              final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    sparkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
 
Example #24
Source File: CubeSignatureRefresher.java    From kylin with Apache License 2.0 6 votes vote down vote up
public void update() {
    logger.info("Reloading Cube Metadata from store: " + store.getReadableResourcePath(ResourceStore.CUBE_DESC_RESOURCE_ROOT));
    CubeDescManager cubeDescManager = CubeDescManager.getInstance(config);
    List<CubeDesc> cubeDescs;
    if (ArrayUtils.isEmpty(cubeNames)) {
        cubeDescs = cubeDescManager.listAllDesc();
    } else {
        String[] names = StringUtil.splitByComma(cubeNames[0]);
        if (ArrayUtils.isEmpty(names))
            return;
        cubeDescs = Lists.newArrayListWithCapacity(names.length);
        for (String name : names) {
            cubeDescs.add(cubeDescManager.getCubeDesc(name));
        }
    }
    for (CubeDesc cubeDesc : cubeDescs) {
        updateCubeDesc(cubeDesc);
    }

    verify();
}
 
Example #25
Source File: SparkBatchMergeJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    sparkExecutable.setClassName(SparkMergingDictionary.class.getName());

    sparkExecutable.setParam(SparkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));

    sparkExecutable.setJobId(jobID);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY + ":" + seg.toString());
    sparkExecutable.setSparkConfigName(ExecutableConstants.SPARK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
Example #26
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    sparkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
 
Example #27
Source File: SparkUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
/**
 * Read the given path as a Java RDD; The path can have second level sub folder.
 * @param inputPath
 * @param fs
 * @param sc
 * @param keyClass
 * @param valueClass
 * @return
 * @throws IOException
 */
public static JavaPairRDD parseInputPath(String inputPath, FileSystem fs, JavaSparkContext sc, Class keyClass,
        Class valueClass) throws IOException {
    List<String> inputFolders = Lists.newArrayList();
    Path inputHDFSPath = new Path(inputPath);
    FileStatus[] fileStatuses = fs.listStatus(inputHDFSPath);
    boolean hasDir = false;
    for (FileStatus stat : fileStatuses) {
        if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) {
            hasDir = true;
            inputFolders.add(stat.getPath().toString());
        }
    }

    if (!hasDir) {
        return sc.sequenceFile(inputHDFSPath.toString(), keyClass, valueClass);
    }

    return sc.sequenceFile(StringUtil.join(inputFolders, ","), keyClass, valueClass);
}
 
Example #28
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    sparkExecutable.setClassName(SparkFactDistinct.class.getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
    sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString());
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example #29
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createBuildUHCDictSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());

    sparkExecutable.setClassName(SparkUHCDictionary.class.getName());
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_OUTPUT_PATH.getOpt(), getDictRootPath(jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_UHC_DICTIONARY);
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example #30
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createBuildDictionarySparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());

    sparkExecutable.setClassName(SparkBuildDictionary.class.getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_DICT_PATH.getOpt(), getDictRootPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_DICTIONARY);
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());

    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}