Java Code Examples for org.apache.kylin.metadata.model.TblColRef#getTable()

The following examples show how to use org.apache.kylin.metadata.model.TblColRef#getTable() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: CubeSegment.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

public String getDictResPath(TblColRef col) {
    String r;
    String dictKey = col.getIdentity();
    r = getDictionaries().get(dictKey);

    // try Kylin v1.x dict key as well
    if (r == null) {
        String v1DictKey = col.getTable() + "/" + col.getName();
        r = getDictionaries().get(v1DictKey);
    }

    return r;
}

Example 2

Source File: CubeSegment.java From kylin with Apache License 2.0

5 votes

public String getDictResPath(TblColRef col) {
    String r;
    String dictKey = col.getIdentity();
    r = getDictionaries().get(dictKey);

    // try Kylin v1.x dict key as well
    if (r == null) {
        String v1DictKey = col.getTable() + "/" + col.getName();
        r = getDictionaries().get(v1DictKey);
    }

    return r;
}

Example 3

Source File: MergeDictReducer.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

@Override
protected void doReduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    String col = key.toString();
    logger.info("merge dictionary for column:{}", col);
    TblColRef tblColRef = colNeedDictMap.get(col);

    if (tblColRef == null) {
        logger.warn("column:{} not found in the columns need dictionary map: {}", col, colNeedDictMap.keySet());
        return;
    }

    DataType dataType = tblColRef.getType();
    List<Dictionary<String>> dicts = Lists.newLinkedList();
    for (Text value : values) {
        ByteArray byteArray = new ByteArray(value.getBytes());
        Dictionary<String> dict = (Dictionary<String>) DictionarySerializer.deserialize(byteArray);
        dicts.add(dict);
    }
    Dictionary mergedDict;
    if (dicts.size() > 1) {
        MultipleDictionaryValueEnumerator multipleDictionaryValueEnumerator = new MultipleDictionaryValueEnumerator(
                dataType, dicts);
        mergedDict = DictionaryGenerator.buildDictionary(dataType, multipleDictionaryValueEnumerator);
    } else if (dicts.size() == 1) {
        mergedDict = dicts.get(0);
    } else {
        throw new IllegalArgumentException("Dictionary missing for column " + col);
    }
    if (mergedDict == null) {
        throw new IllegalArgumentException("Merge dictionaries error for column " + col);
    }

    TableDesc tableDesc = tblColRef.getColumnDesc().getTable();
    IReadableTable.TableSignature signature = new IReadableTable.TableSignature();
    signature.setLastModifiedTime(System.currentTimeMillis());
    signature.setPath(tableDesc.getResourcePath());

    //TODO: Table signature size?
    //        signature.setSize(mergedDict.getSize());

    DictionaryInfo dictionaryInfo = new DictionaryInfo(tblColRef.getTable(), tblColRef.getName(), tblColRef
            .getColumnDesc().getZeroBasedIndex(), tblColRef.getDatatype(), signature);
    dictionaryInfo.setDictionaryObject(mergedDict);
    dictionaryInfo.setDictionaryClass(mergedDict.getClass().getName());
    dictionaryInfo.setCardinality(mergedDict.getSize());

    ByteArrayOutputStream fulBuf = new ByteArrayOutputStream();
    DataOutputStream fulDout = new DataOutputStream(fulBuf);
    DictionaryInfoSerializer.FULL_SERIALIZER.serialize(dictionaryInfo, fulDout);

    Text outValue = new Text(fulBuf.toByteArray());
    context.write(key, outValue);
    logger.debug("output dict info of column {} to path: {}", col,
            context.getConfiguration().get(FileOutputFormat.OUTDIR));
}

Example 4

Source File: MergeDictReducer.java From kylin with Apache License 2.0

4 votes

@Override
protected void doReduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    String col = key.toString();
    logger.info("merge dictionary for column:{}", col);
    TblColRef tblColRef = colNeedDictMap.get(col);

    if (tblColRef == null) {
        logger.warn("column:{} not found in the columns need dictionary map: {}", col, colNeedDictMap.keySet());
        return;
    }

    DataType dataType = tblColRef.getType();
    List<Dictionary<String>> dicts = Lists.newLinkedList();
    for (Text value : values) {
        ByteArray byteArray = new ByteArray(value.getBytes());
        Dictionary<String> dict = (Dictionary<String>) DictionarySerializer.deserialize(byteArray);
        dicts.add(dict);
    }
    Dictionary mergedDict;
    if (dicts.size() > 1) {
        MultipleDictionaryValueEnumerator multipleDictionaryValueEnumerator = new MultipleDictionaryValueEnumerator(
                dataType, dicts);
        mergedDict = DictionaryGenerator.buildDictionary(dataType, multipleDictionaryValueEnumerator);
    } else if (dicts.size() == 1) {
        mergedDict = dicts.get(0);
    } else {
        throw new IllegalArgumentException("Dictionary missing for column " + col);
    }
    if (mergedDict == null) {
        throw new IllegalArgumentException("Merge dictionaries error for column " + col);
    }

    TableDesc tableDesc = tblColRef.getColumnDesc().getTable();
    IReadableTable.TableSignature signature = new IReadableTable.TableSignature();
    signature.setLastModifiedTime(System.currentTimeMillis());
    signature.setPath(tableDesc.getResourcePath());

    //TODO: Table signature size?
    //        signature.setSize(mergedDict.getSize());

    DictionaryInfo dictionaryInfo = new DictionaryInfo(tblColRef.getTable(), tblColRef.getName(), tblColRef
            .getColumnDesc().getZeroBasedIndex(), tblColRef.getDatatype(), signature);
    dictionaryInfo.setDictionaryObject(mergedDict);
    dictionaryInfo.setDictionaryClass(mergedDict.getClass().getName());
    dictionaryInfo.setCardinality(mergedDict.getSize());

    ByteArrayOutputStream fulBuf = new ByteArrayOutputStream();
    DataOutputStream fulDout = new DataOutputStream(fulBuf);
    DictionaryInfoSerializer.FULL_SERIALIZER.serialize(dictionaryInfo, fulDout);

    Text outValue = new Text(fulBuf.toByteArray());
    context.write(key, outValue);
    logger.debug("output dict info of column {} to path: {}", col,
            context.getConfiguration().get(FileOutputFormat.OUTDIR));
}

Example 5

Source File: DictionaryManager.java From Kylin with Apache License 2.0

4 votes

/**
 * Get column origin
 *
 * @return 1. source table name
 * 2. column name
 * 3. column cardinal in source table
 * 4. ReadableTable object
 */

public Object[] decideSourceData(DataModelDesc model, String dict, TblColRef col, String factColumnsPath) throws IOException {
    String srcTable;
    String srcCol;
    int srcColIdx;
    ReadableTable table;
    MetadataManager metaMgr = MetadataManager.getInstance(config);

    // case of full table (dict on fact table)
    if (model == null) {
        srcTable = col.getTable();
        srcCol = col.getName();
        srcColIdx = col.getColumn().getZeroBasedIndex();
        int nColumns = metaMgr.getTableDesc(col.getTable()).getColumnCount();
        table = new FileTable(factColumnsPath + "/" + col.getName(), nColumns);
        return new Object[] { srcTable, srcCol, srcColIdx, table };
    }

    // Decide source data of dictionary:
    // 1. If 'useDict' specifies pre-defined data set, use that
    // 2. Otherwise find a lookup table to scan through

    // Note FK on fact table is supported by scan the related PK on lookup
    // table

    //String useDict = cube.getRowkey().getDictionary(col);

    // normal case, source from lookup table
    if ("true".equals(dict) || "string".equals(dict) || "number".equals(dict) || "any".equals(dict)) {
        // FK on fact table and join type is inner, use PK from lookup instead
        if (model.isFactTable(col.getTable())) {
            TblColRef pkCol = model.findPKByFK(col, "inner");
            if (pkCol != null)
                col = pkCol; // scan the counterparty PK on lookup table
            // instead
        }
        srcTable = col.getTable();
        srcCol = col.getName();
        srcColIdx = col.getColumn().getZeroBasedIndex();
        if (model.isFactTable(col.getTable())) {
            table = new FileTable(factColumnsPath + "/" + col.getName(), -1);
        } else {
            table = new HiveTable(metaMgr, col.getTable());
        }
    }
    // otherwise could refer to a data set, e.g. common_indicators.txt
    // (LEGACY PATH, since distinct values are collected from fact table)
    else {
        String dictDataSetPath = unpackDataSet(this.config.getTempHDFSDir(), dict);
        if (dictDataSetPath == null)
            throw new IllegalArgumentException("Unknown dictionary data set '" + dict + "', referred from " + col);
        srcTable = "PREDEFINED";
        srcCol = dict;
        srcColIdx = 0;
        table = new FileTable(dictDataSetPath, -1);
    }

    return new Object[] { srcTable, srcCol, srcColIdx, table };
}

Example 6

Source File: CubeSegment.java From Kylin with Apache License 2.0

4 votes

private String dictKey(TblColRef col) {
    return col.getTable() + "/" + col.getName();
}

Example 7

Source File: CubeSegment.java From Kylin with Apache License 2.0

4 votes

private String dictKey(TblColRef col) {
    return col.getTable() + "/" + col.getName();
}

Example 8

Source File: IISegment.java From Kylin with Apache License 2.0

4 votes

private String dictKey(TblColRef col) {
    return col.getTable() + "/" + col.getName();
}