Java Code Examples for org.apache.kylin.metadata.model.PartitionDesc#isPartitioned()

The following examples show how to use org.apache.kylin.metadata.model.PartitionDesc#isPartitioned() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SegmentPruner.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public static DimensionRangeInfo tryDeduceRangeFromPartitionCol(CubeSegment seg, TblColRef col) {
    DataModelDesc model = seg.getModel();
    PartitionDesc part = model.getPartitionDesc();

    if (!part.isPartitioned())
        return null;
    if (!col.equals(part.getPartitionDateColumnRef()))
        return null;

    // deduce the dim range from TSRange
    TSRange tsRange = seg.getTSRange();
    if (tsRange.start.isMin || tsRange.end.isMax)
        return null; // DimensionRangeInfo cannot express infinite

    String min = tsRangeToStr(tsRange.start.v, part);
    String max = tsRangeToStr(tsRange.end.v - 1, part); // note the -1, end side is exclusive
    return new DimensionRangeInfo(min, max);
}
 
Example 2
Source File: KeyValueBuilder.java    From kylin with Apache License 2.0 6 votes vote down vote up
/**
 * Use the segment start time as the map key, the time unit depends on the partition columns
 * If the partition_time_column is null, the unit is day;
 *                            otherwise, the unit is second
 */
private String getSegmentStartTime(CubeSegment segment) {
    long startTime = segment.getTSRange().start.v;
    DataModelDesc model = segment.getModel();
    PartitionDesc partitionDesc = model.getPartitionDesc();
    if (partitionDesc == null || !partitionDesc.isPartitioned()) {
        return "0";
    } else if (partitionDesc.partitionColumnIsTimeMillis()) {
        return "" + startTime;
    } else if (partitionDesc.getPartitionTimeColumnRef() != null) {
        return "" + startTime / 1000L;
    } else if (partitionDesc.getPartitionDateColumnRef() != null) {
        return "" + startTime / 86400000L;
    }
    return "0";
}
 
Example 3
Source File: SegmentPruner.java    From kylin with Apache License 2.0 6 votes vote down vote up
public static DimensionRangeInfo tryDeduceRangeFromPartitionCol(CubeSegment seg, TblColRef col) {
    DataModelDesc model = seg.getModel();
    PartitionDesc part = model.getPartitionDesc();

    if (!part.isPartitioned())
        return null;
    if (!col.equals(part.getPartitionDateColumnRef()))
        return null;

    // deduce the dim range from TSRange
    TSRange tsRange = seg.getTSRange();
    if (tsRange.start.isMin || tsRange.end.isMax)
        return null; // DimensionRangeInfo cannot express infinite

    String min = tsRangeToStr(tsRange.start.v, part);
    String max = tsRangeToStr(tsRange.end.v - 1, part); // note the -1, end side is exclusive
    return new DimensionRangeInfo(min, max);
}
 
Example 4
Source File: CubeManager.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
CubeSegment appendSegment(CubeInstance cube, TSRange tsRange, SegmentRange segRange,
        Map<Integer, Long> sourcePartitionOffsetStart, Map<Integer, Long> sourcePartitionOffsetEnd)
        throws IOException {
    CubeInstance cubeCopy = cube.latestCopyForWrite(); // get a latest copy

    checkInputRanges(tsRange, segRange);

    // fix start/end a bit
    PartitionDesc partitionDesc = cubeCopy.getModel().getPartitionDesc();
    if (partitionDesc != null && partitionDesc.isPartitioned()) {
        // if missing start, set it to where last time ends
        if (tsRange != null && tsRange.start.v == 0) {
            CubeDesc cubeDesc = cubeCopy.getDescriptor();
            CubeSegment last = cubeCopy.getLastSegment();
            if (last == null)
                tsRange = new TSRange(cubeDesc.getPartitionDateStart(), tsRange.end.v);
            else if (!last.isOffsetCube())
                tsRange = new TSRange(last.getTSRange().end.v, tsRange.end.v);
        }
    } else {
        // full build
        tsRange = null;
        segRange = null;
    }

    CubeSegment newSegment = newSegment(cubeCopy, tsRange, segRange);
    newSegment.setSourcePartitionOffsetStart(sourcePartitionOffsetStart);
    newSegment.setSourcePartitionOffsetEnd(sourcePartitionOffsetEnd);
    validateNewSegments(cubeCopy, newSegment);

    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToAddSegs(newSegment);
    updateCube(update);
    return newSegment;
}
 
Example 5
Source File: CubeManager.java    From kylin with Apache License 2.0 5 votes vote down vote up
CubeSegment appendSegment(CubeInstance cube, TSRange tsRange, SegmentRange segRange,
        Map<Integer, Long> sourcePartitionOffsetStart, Map<Integer, Long> sourcePartitionOffsetEnd)
        throws IOException {
    CubeInstance cubeCopy = cube.latestCopyForWrite(); // get a latest copy

    checkInputRanges(tsRange, segRange);

    // fix start/end a bit
    PartitionDesc partitionDesc = cubeCopy.getModel().getPartitionDesc();
    if (partitionDesc != null && partitionDesc.isPartitioned()) {
        // if missing start, set it to where last time ends
        if (tsRange != null && tsRange.start.v == 0) {
            CubeDesc cubeDesc = cubeCopy.getDescriptor();
            CubeSegment last = cubeCopy.getLastSegment();
            if (last == null)
                tsRange = new TSRange(cubeDesc.getPartitionDateStart(), tsRange.end.v);
            else if (!last.isOffsetCube())
                tsRange = new TSRange(last.getTSRange().end.v, tsRange.end.v);
        }
    } else {
        // full build
        tsRange = null;
        segRange = null;
    }

    CubeSegment newSegment = newSegment(cubeCopy, tsRange, segRange);
    newSegment.setSourcePartitionOffsetStart(sourcePartitionOffsetStart);
    newSegment.setSourcePartitionOffsetEnd(sourcePartitionOffsetEnd);
    validateNewSegments(cubeCopy, newSegment);

    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToAddSegs(newSegment);
    updateCube(update);
    return newSegment;
}
 
Example 6
Source File: JdbcHiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
protected AbstractExecutable createSqoopToFlatHiveStep(String jobWorkingDir, String cubeName) {
    KylinConfig config = getConfig();
    PartitionDesc partitionDesc = flatDesc.getDataModel().getPartitionDesc();
    String partCol = null;

    if (partitionDesc.isPartitioned()) {
        partCol = partitionDesc.getPartitionDateColumn();//tablename.colname
    }

    String splitTableAlias;
    String splitColumn;
    String splitDatabase;
    TblColRef splitColRef = determineSplitColumn();
    splitTableAlias = splitColRef.getTableAlias();

    splitColumn = getColumnIdentityQuoted(splitColRef, jdbcMetadataDialect, metaMap, true);
    splitDatabase = splitColRef.getColumnDesc().getTable().getDatabase();

    String selectSql = generateSelectDataStatementRDBMS(flatDesc, true, new String[] { partCol },
            jdbcMetadataDialect, metaMap);
    selectSql = escapeQuotationInSql(selectSql);

    String hiveTable = flatDesc.getTableName();
    String connectionUrl = config.getJdbcSourceConnectionUrl();
    String driverClass = config.getJdbcSourceDriver();
    String jdbcUser = config.getJdbcSourceUser();
    String jdbcPass = config.getJdbcSourcePass();
    String sqoopHome = config.getSqoopHome();
    String sqoopNullString = config.getSqoopNullString();
    String sqoopNullNonString = config.getSqoopNullNonString();
    String filedDelimiter = config.getJdbcSourceFieldDelimiter();
    int mapperNum = config.getSqoopMapperNum();

    String bquery = String.format(Locale.ROOT, "SELECT min(%s), max(%s) FROM %s.%s ", splitColumn,
            splitColumn, getSchemaQuoted(metaMap, splitDatabase, jdbcMetadataDialect, true),
            getTableIdentityQuoted(splitColRef.getTableRef(), metaMap, jdbcMetadataDialect, true));
    if (partitionDesc.isPartitioned()) {
        SegmentRange segRange = flatDesc.getSegRange();
        if (segRange != null && !segRange.isInfinite()) {
            if (partitionDesc.getPartitionDateColumnRef().getTableAlias().equals(splitTableAlias)
                    && (partitionDesc.getPartitionTimeColumnRef() == null || partitionDesc
                            .getPartitionTimeColumnRef().getTableAlias().equals(splitTableAlias))) {

                String quotedPartCond = partitionDesc.getPartitionConditionBuilder().buildDateRangeCondition(
                        partitionDesc, flatDesc.getSegment(), segRange,
                        col -> getTableColumnIdentityQuoted(col, jdbcMetadataDialect, metaMap, true));
                bquery += " WHERE " + quotedPartCond;
            }
        }
    }
    bquery = escapeQuotationInSql(bquery);

    // escape ` in cmd
    splitColumn = escapeQuotationInSql(splitColumn);

    String cmd = String.format(Locale.ROOT, "%s/bin/sqoop import" + generateSqoopConfigArgString()
            + "--connect \"%s\" --driver %s --username %s --password \"%s\" --query \"%s AND \\$CONDITIONS\" "
            + "--target-dir %s/%s --split-by %s --boundary-query \"%s\" --null-string '%s' "
            + "--null-non-string '%s' --fields-terminated-by '%s' --num-mappers %d", sqoopHome, connectionUrl,
            driverClass, jdbcUser, jdbcPass, selectSql, jobWorkingDir, hiveTable, splitColumn, bquery,
            sqoopNullString, sqoopNullNonString, filedDelimiter, mapperNum);
    logger.debug("sqoop cmd : {}", cmd);
    CmdStep step = new CmdStep();
    step.setCmd(cmd);
    step.setName(ExecutableConstants.STEP_NAME_SQOOP_TO_FLAT_HIVE_TABLE);
    return step;
}
 
Example 7
Source File: JdbcHiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
protected AbstractExecutable createSqoopToFlatHiveStep(String jobWorkingDir, String cubeName) {
    KylinConfig config = flatDesc.getDataModel().getConfig();
    PartitionDesc partitionDesc = flatDesc.getDataModel().getPartitionDesc();
    String partCol = null;
    boolean enableQuote = dataSource.getSqlConverter().getConfigurer().enableQuote();
    SqlDialect sqlDialect = enableQuote ? dataSource.getSqlConverter().getConfigurer().getSqlDialect() : FlatTableSqlQuoteUtils.NON_QUOTE_DIALECT;
    SqlConverter.IConfigurer iconfigurer = dataSource.getSqlConverter().getConfigurer();

    if (partitionDesc.isPartitioned()) {
        partCol = partitionDesc.getPartitionDateColumn(); //tablename.colname
    }

    String splitTable;
    String splitTableAlias;
    String splitColumn;
    String splitDatabase;
    TblColRef splitColRef = determineSplitColumn();
    splitTable = splitColRef.getTableRef().getTableDesc().getName();
    splitTableAlias = splitColRef.getTableAlias();
    splitDatabase = splitColRef.getColumnDesc().getTable().getDatabase();

    if (enableQuote) {
        splitColumn = sqlDialect.quoteIdentifier(splitColRef.getTableAlias()) + "."
                + sqlDialect.quoteIdentifier(splitColRef.getName());
        splitDatabase = sqlDialect.quoteIdentifier(splitDatabase);
        splitTable = sqlDialect.quoteIdentifier(splitTable);
        splitTableAlias = sqlDialect.quoteIdentifier(splitTableAlias);
    } else {
        splitColumn = splitColRef.getTableAlias() + "." + splitColRef.getName();
    }

    String selectSql = JoinedFlatTable.generateSelectDataStatement(flatDesc, true, new String[]{partCol}, sqlDialect);
    selectSql = escapeQuotationInSql(dataSource.convertSql(selectSql));

    String hiveTable = flatDesc.getTableName();
    String filedDelimiter = config.getJdbcSourceFieldDelimiter();
    int mapperNum = config.getSqoopMapperNum();

    String bquery;
    bquery = String.format(Locale.ROOT, "SELECT min(%s), max(%s) FROM %s.%s as %s",
            splitColumn, splitColumn, splitDatabase, splitTable, splitTableAlias);
    bquery = dataSource.convertSql(bquery);

    if (partitionDesc.isPartitioned()) {
        SegmentRange segRange = flatDesc.getSegRange();
        if (segRange != null && !segRange.isInfinite()) {
            if (partitionDesc.getPartitionDateColumnRef().getTableAlias().equals(splitTableAlias)
                    && (partitionDesc.getPartitionTimeColumnRef() == null || partitionDesc
                            .getPartitionTimeColumnRef().getTableAlias().equals(splitTableAlias))) {
                String quotedPartCond = FlatTableSqlQuoteUtils.quoteIdentifierInSqlExpr(flatDesc,
                        partitionDesc.getPartitionConditionBuilder().buildDateRangeCondition(partitionDesc,
                                flatDesc.getSegment(), segRange, null), sqlDialect);
                bquery += " WHERE " + quotedPartCond;
            }
        }
    }

    bquery = escapeQuotationInSql(bquery);
    splitColumn = escapeQuotationInSql(splitColumn);
    String cmd = StringUtils.format(
            "--connect \"%s\" --driver \"%s\" --username \"%s\" --password \"%s\" --query \"%s AND \\$CONDITIONS\" "
                    + "--target-dir \"%s/%s\" --split-by \"%s\" --boundary-query \"%s\" --null-string '' "
                    + "--fields-terminated-by '%s' --num-mappers %d",
            dataSource.getJdbcUrl(), dataSource.getJdbcDriver(), dataSource.getJdbcUser(),
            dataSource.getJdbcPassword(), selectSql, jobWorkingDir, hiveTable, splitColumn, bquery,
            filedDelimiter, mapperNum);
    if (iconfigurer.getTransactionIsolationLevel() != null) {
        cmd = cmd + " --relaxed-isolation --metadata-transaction-isolation-level "
                + iconfigurer.getTransactionIsolationLevel();
    }
    logger.debug("sqoop cmd: {}", cmd);

    SqoopCmdStep step = new SqoopCmdStep();
    step.setCmd(cmd);
    step.setName(ExecutableConstants.STEP_NAME_SQOOP_TO_FLAT_HIVE_TABLE);
    return step;
}
 
Example 8
Source File: GTCubeStorageQueryBase.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
private boolean isExactAggregation(StorageContext context, Cuboid cuboid, Collection<TblColRef> groups,
        Set<TblColRef> othersD, Set<TblColRef> singleValuesD, Set<TblColRef> derivedPostAggregation,
        Collection<FunctionDesc> functionDescs, List<SQLDigest.SQLCall> aggrSQLCalls, boolean groupByExpression) {
    if (context.isNeedStorageAggregation()) {
        logger.info("exactAggregation is false because need storage aggregation");
        return false;
    }

    if (cuboid.requirePostAggregation()) {
        logger.info("exactAggregation is false because cuboid {}=>{}", cuboid.getInputID(), cuboid.getId());
        return false;
    }

    // derived aggregation is bad, unless expanded columns are already in group by
    if (!groups.containsAll(derivedPostAggregation)) {
        logger.info("exactAggregation is false because derived column require post aggregation: {}",
                derivedPostAggregation);
        return false;
    }

    // other columns (from filter) is bad, unless they are ensured to have single value
    if (!singleValuesD.containsAll(othersD)) {
        logger.info("exactAggregation is false because some column not on group by: {} (single value column: {})",
                othersD, singleValuesD);
        return false;
    }

    //for DimensionAsMetric like max(cal_dt), the dimension column maybe not in real group by
    for (FunctionDesc functionDesc : functionDescs) {
        if (functionDesc.isDimensionAsMetric()) {
            logger.info("exactAggregation is false because has DimensionAsMetric");
            return false;
        }
    }
    for (SQLDigest.SQLCall aggrSQLCall : aggrSQLCalls) {
        if (aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_COUNT_DISTINCT)
        || aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_VALUE)) {
            logger.info("exactAggregation is false because has INTERSECT_COUNT OR INTERSECT_VALUE");
            return false;
        }
    }

    // for partitioned cube, the partition column must belong to group by or has single value
    PartitionDesc partDesc = cuboid.getCubeDesc().getModel().getPartitionDesc();
    if (partDesc.isPartitioned()) {
        TblColRef col = partDesc.getPartitionDateColumnRef();
        if (!groups.contains(col) && !singleValuesD.contains(col)) {
            logger.info("exactAggregation is false because cube is partitioned and {} is not on group by", col);
            return false;
        }
    }

    // for group by expression like: group by seller_id/100. seller_id_1(200) get 2, seller_id_2(201) also get 2, so can't aggregate exactly
    if (groupByExpression) {
        logger.info("exactAggregation is false because group by expression");
        return false;
    }

    logger.info("exactAggregation is true, cuboid id is {}", cuboid.getId());
    return true;
}
 
Example 9
Source File: CubeManager.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public CubeSegment refreshSegment(CubeInstance cube, TSRange tsRange, SegmentRange segRange)
        throws IOException {
    CubeInstance cubeCopy = cube.latestCopyForWrite(); // get a latest copy

    checkInputRanges(tsRange, segRange);
    PartitionDesc partitionDesc = cubeCopy.getModel().getPartitionDesc();
    if (partitionDesc == null || partitionDesc.isPartitioned() == false) {
        // full build
        tsRange = null;
        segRange = null;
    }

    CubeSegment newSegment = newSegment(cubeCopy, tsRange, segRange);

    Pair<Boolean, Boolean> pair = cubeCopy.getSegments().fitInSegments(newSegment);
    if (pair.getFirst() == false || pair.getSecond() == false)
        throw new IllegalArgumentException("The new refreshing segment " + newSegment
                + " does not match any existing segment in cube " + cubeCopy);

    if (segRange != null) {
        CubeSegment toRefreshSeg = null;
        for (CubeSegment cubeSegment : cubeCopy.getSegments()) {
            if (cubeSegment.getSegRange().equals(segRange)) {
                toRefreshSeg = cubeSegment;
                break;
            }
        }

        if (toRefreshSeg == null) {
            throw new IllegalArgumentException(
                    "For streaming cube, only one segment can be refreshed at one time");
        }

        newSegment.setSourcePartitionOffsetStart(toRefreshSeg.getSourcePartitionOffsetStart());
        newSegment.setSourcePartitionOffsetEnd(toRefreshSeg.getSourcePartitionOffsetEnd());
    }

    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToAddSegs(newSegment);
    updateCube(update);

    return newSegment;
}
 
Example 10
Source File: JdbcHiveInputBase.java    From kylin with Apache License 2.0 4 votes vote down vote up
protected AbstractExecutable createSqoopToFlatHiveStep(String jobWorkingDir, String cubeName) {
    KylinConfig config = getConfig();
    PartitionDesc partitionDesc = flatDesc.getDataModel().getPartitionDesc();
    String partCol = null;

    if (partitionDesc.isPartitioned()) {
        partCol = partitionDesc.getPartitionDateColumn();//tablename.colname
    }

    String splitTableAlias;
    String splitColumn;
    String splitDatabase;
    TblColRef splitColRef = determineSplitColumn();
    splitTableAlias = splitColRef.getTableAlias();

    splitColumn = getColumnIdentityQuoted(splitColRef, jdbcMetadataDialect, metaMap, true);
    splitDatabase = splitColRef.getColumnDesc().getTable().getDatabase();

    String selectSql = generateSelectDataStatementRDBMS(flatDesc, true, new String[] { partCol },
            jdbcMetadataDialect, metaMap);
    selectSql = escapeQuotationInSql(selectSql);

    String hiveTable = flatDesc.getTableName();
    String connectionUrl = config.getJdbcSourceConnectionUrl();
    String driverClass = config.getJdbcSourceDriver();
    String jdbcUser = config.getJdbcSourceUser();
    String jdbcPass = config.getJdbcSourcePass();
    String sqoopHome = config.getSqoopHome();
    String sqoopNullString = config.getSqoopNullString();
    String sqoopNullNonString = config.getSqoopNullNonString();
    String filedDelimiter = config.getJdbcSourceFieldDelimiter();
    int mapperNum = config.getSqoopMapperNum();

    String bquery = String.format(Locale.ROOT, "SELECT min(%s), max(%s) FROM %s.%s ", splitColumn,
            splitColumn, getSchemaQuoted(metaMap, splitDatabase, jdbcMetadataDialect, true),
            getTableIdentityQuoted(splitColRef.getTableRef(), metaMap, jdbcMetadataDialect, true));
    if (partitionDesc.isPartitioned()) {
        SegmentRange segRange = flatDesc.getSegRange();
        if (segRange != null && !segRange.isInfinite()) {
            if (partitionDesc.getPartitionDateColumnRef().getTableAlias().equals(splitTableAlias)
                    && (partitionDesc.getPartitionTimeColumnRef() == null || partitionDesc
                            .getPartitionTimeColumnRef().getTableAlias().equals(splitTableAlias))) {

                String quotedPartCond = partitionDesc.getPartitionConditionBuilder().buildDateRangeCondition(
                        partitionDesc, flatDesc.getSegment(), segRange,
                        col -> getTableColumnIdentityQuoted(col, jdbcMetadataDialect, metaMap, true));
                bquery += " WHERE " + quotedPartCond;
            }
        }
    }
    bquery = escapeQuotationInSql(bquery);

    // escape ` in cmd
    splitColumn = escapeQuotationInSql(splitColumn);

    String cmd = String.format(Locale.ROOT, "%s/bin/sqoop import" + generateSqoopConfigArgString()
            + "--connect \"%s\" --driver %s --username %s --password \"%s\" --query \"%s AND \\$CONDITIONS\" "
            + "--target-dir %s/%s --split-by %s --boundary-query \"%s\" --null-string '%s' "
            + "--null-non-string '%s' --fields-terminated-by '%s' --num-mappers %d", sqoopHome, connectionUrl,
            driverClass, jdbcUser, jdbcPass, selectSql, jobWorkingDir, hiveTable, splitColumn, bquery,
            sqoopNullString, sqoopNullNonString, filedDelimiter, mapperNum);
    logger.debug("sqoop cmd : {}", cmd);
    CmdStep step = new CmdStep();
    step.setCmd(cmd);
    step.setName(ExecutableConstants.STEP_NAME_SQOOP_TO_FLAT_HIVE_TABLE);
    return step;
}
 
Example 11
Source File: JdbcHiveInputBase.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected AbstractExecutable createSqoopToFlatHiveStep(String jobWorkingDir, String cubeName) {
    KylinConfig config = flatDesc.getDataModel().getConfig();
    PartitionDesc partitionDesc = flatDesc.getDataModel().getPartitionDesc();
    String partCol = null;
    boolean enableQuote = dataSource.getSqlConverter().getConfigurer().enableQuote();
    enableQuote = enableQuote && config.enableHiveDdlQuote();
    logger.debug("Quote switch is set to {}", enableQuote);
    SqlDialect sqlDialect = enableQuote ? dataSource.getSqlConverter().getConfigurer().getSqlDialect() : FlatTableSqlQuoteUtils.NON_QUOTE_DIALECT;
    SqlConverter.IConfigurer iconfigurer = dataSource.getSqlConverter().getConfigurer();

    if (partitionDesc.isPartitioned()) {
        partCol = partitionDesc.getPartitionDateColumn(); //tablename.colname
    }

    String splitTable;
    String splitTableAlias;
    String splitColumn;
    String splitDatabase;
    TblColRef splitColRef = determineSplitColumn();
    splitTable = splitColRef.getTableRef().getTableDesc().getName();
    splitTableAlias = splitColRef.getTableAlias();
    splitDatabase = splitColRef.getColumnDesc().getTable().getDatabase();

    if (enableQuote) {
        splitColumn = sqlDialect.quoteIdentifier(splitColRef.getTableAlias()) + "."
                + sqlDialect.quoteIdentifier(splitColRef.getName());
        splitDatabase = sqlDialect.quoteIdentifier(splitDatabase);
        splitTable = sqlDialect.quoteIdentifier(splitTable);
        splitTableAlias = sqlDialect.quoteIdentifier(splitTableAlias);
    } else {
        splitColumn = splitColRef.getTableAlias() + "." + splitColRef.getName();
    }

    String selectSql = JoinedFlatTable.generateSelectDataStatement(flatDesc, true, new String[]{partCol}, sqlDialect);
    selectSql = escapeQuotationInSql(dataSource.convertSql(selectSql));

    String hiveTable = flatDesc.getTableName();
    String filedDelimiter = config.getJdbcSourceFieldDelimiter();
    int mapperNum = config.getSqoopMapperNum();

    String bquery;
    bquery = String.format(Locale.ROOT, "SELECT min(%s), max(%s) FROM %s.%s as %s",
            splitColumn, splitColumn, splitDatabase, splitTable, splitTableAlias);
    bquery = dataSource.convertSql(bquery);

    if (partitionDesc.isPartitioned()) {
        SegmentRange segRange = flatDesc.getSegRange();
        if (segRange != null && !segRange.isInfinite()) {
            if (partitionDesc.getPartitionDateColumnRef().getTableAlias().equals(splitTableAlias)
                    && (partitionDesc.getPartitionTimeColumnRef() == null || partitionDesc
                    .getPartitionTimeColumnRef().getTableAlias().equals(splitTableAlias))) {
                String quotedPartCond = FlatTableSqlQuoteUtils.quoteIdentifierInSqlExpr(flatDesc,
                        partitionDesc.getPartitionConditionBuilder().buildDateRangeCondition(partitionDesc,
                                flatDesc.getSegment(), segRange, null), sqlDialect);
                bquery += " WHERE " + quotedPartCond;
            }
        }
    }

    bquery = escapeQuotationInSql(bquery);
    splitColumn = escapeQuotationInSql(splitColumn);
    String cmd = StringUtils.format(
            "--connect \"%s\" --driver \"%s\" --username \"%s\" --password \"%s\" --query \"%s AND \\$CONDITIONS\" "
                    + "--target-dir \"%s/%s\" --split-by \"%s\" --boundary-query \"%s\" --null-string '' "
                    + "--fields-terminated-by '%s' --num-mappers %d",
            dataSource.getJdbcUrl(), dataSource.getJdbcDriver(), dataSource.getJdbcUser(),
            dataSource.getJdbcPassword(), selectSql, jobWorkingDir, hiveTable, splitColumn, bquery,
            filedDelimiter, mapperNum);
    if (iconfigurer.getTransactionIsolationLevel() != null) {
        cmd = cmd + " --relaxed-isolation --metadata-transaction-isolation-level "
                + iconfigurer.getTransactionIsolationLevel();
    }
    logger.debug("sqoop cmd: {}", cmd);

    SqoopCmdStep step = new SqoopCmdStep();
    step.setCmd(cmd);
    step.setName(ExecutableConstants.STEP_NAME_SQOOP_TO_FLAT_HIVE_TABLE);
    return step;
}
 
Example 12
Source File: GTCubeStorageQueryBase.java    From kylin with Apache License 2.0 4 votes vote down vote up
private boolean isExactAggregation(StorageContext context, Cuboid cuboid, Collection<TblColRef> groups,
        Set<TblColRef> othersD, Set<TblColRef> singleValuesD, Set<TblColRef> derivedPostAggregation,
        Collection<FunctionDesc> functionDescs, List<SQLDigest.SQLCall> aggrSQLCalls, boolean groupByExpression) {
    if (context.isNeedStorageAggregation()) {
        logger.info("exactAggregation is false because need storage aggregation");
        return false;
    }

    if (cuboid.requirePostAggregation()) {
        logger.info("exactAggregation is false because cuboid {}=>{}", cuboid.getInputID(), cuboid.getId());
        return false;
    }

    // derived aggregation is bad, unless expanded columns are already in group by
    if (!groups.containsAll(derivedPostAggregation)) {
        logger.info("exactAggregation is false because derived column require post aggregation: {}",
                derivedPostAggregation);
        return false;
    }

    // other columns (from filter) is bad, unless they are ensured to have single value
    if (!singleValuesD.containsAll(othersD)) {
        logger.info("exactAggregation is false because some column not on group by: {} (single value column: {})",
                othersD, singleValuesD);
        return false;
    }

    //for DimensionAsMetric like max(cal_dt), the dimension column maybe not in real group by
    for (FunctionDesc functionDesc : functionDescs) {
        if (functionDesc.isDimensionAsMetric()) {
            logger.info("exactAggregation is false because has DimensionAsMetric");
            return false;
        }
    }
    for (SQLDigest.SQLCall aggrSQLCall : aggrSQLCalls) {
        if (aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_COUNT_DISTINCT)
        || aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_VALUE)) {
            logger.info("exactAggregation is false because has INTERSECT_COUNT OR INTERSECT_VALUE");
            return false;
        }
    }

    // for partitioned cube, the partition column must belong to group by or has single value
    PartitionDesc partDesc = cuboid.getCubeDesc().getModel().getPartitionDesc();
    if (partDesc.isPartitioned()) {
        TblColRef col = partDesc.getPartitionDateColumnRef();
        if (!groups.contains(col) && !singleValuesD.contains(col)) {
            logger.info("exactAggregation is false because cube is partitioned and {} is not on group by", col);
            return false;
        }
    }

    // for group by expression like: group by seller_id/100. seller_id_1(200) get 2, seller_id_2(201) also get 2, so can't aggregate exactly
    if (groupByExpression) {
        logger.info("exactAggregation is false because group by expression");
        return false;
    }

    logger.info("exactAggregation is true, cuboid id is {}", cuboid.getId());
    return true;
}
 
Example 13
Source File: CubeManager.java    From kylin with Apache License 2.0 4 votes vote down vote up
public CubeSegment refreshSegment(CubeInstance cube, TSRange tsRange, SegmentRange segRange)
        throws IOException {
    CubeInstance cubeCopy = cube.latestCopyForWrite(); // get a latest copy

    checkInputRanges(tsRange, segRange);
    PartitionDesc partitionDesc = cubeCopy.getModel().getPartitionDesc();
    if (partitionDesc == null || partitionDesc.isPartitioned() == false) {
        // full build
        tsRange = null;
        segRange = null;
    }

    CubeSegment newSegment = newSegment(cubeCopy, tsRange, segRange);

    Pair<Boolean, Boolean> pair = cubeCopy.getSegments().fitInSegments(newSegment);
    if (pair.getFirst() == false || pair.getSecond() == false)
        throw new IllegalArgumentException("The new refreshing segment " + newSegment
                + " does not match any existing segment in cube " + cubeCopy);

    if (segRange != null) {
        CubeSegment toRefreshSeg = null;
        for (CubeSegment cubeSegment : cubeCopy.getSegments()) {
            if (cubeSegment.getSegRange().equals(segRange)) {
                toRefreshSeg = cubeSegment;
                break;
            }
        }

        if (toRefreshSeg == null) {
            throw new IllegalArgumentException(
                    "For streaming cube, only one segment can be refreshed at one time");
        }

        newSegment.setSourcePartitionOffsetStart(toRefreshSeg.getSourcePartitionOffsetStart());
        newSegment.setSourcePartitionOffsetEnd(toRefreshSeg.getSourcePartitionOffsetEnd());
    }

    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToAddSegs(newSegment);
    updateCube(update);

    return newSegment;
}