Java Code Examples for org.apache.kylin.metadata.model.TblColRef#getDatatype()

The following examples show how to use org.apache.kylin.metadata.model.TblColRef#getDatatype() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CubingUtils.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public static Map<TblColRef, Dictionary<String>> writeDictionary(CubeSegment cubeSegment,
        Map<TblColRef, Dictionary<String>> dictionaryMap, long startOffset, long endOffset) {
    Map<TblColRef, Dictionary<String>> realDictMap = Maps.newHashMap();

    for (Map.Entry<TblColRef, Dictionary<String>> entry : dictionaryMap.entrySet()) {
        final TblColRef tblColRef = entry.getKey();
        final Dictionary<String> dictionary = entry.getValue();
        IReadableTable.TableSignature signature = new IReadableTable.TableSignature();
        signature.setLastModifiedTime(System.currentTimeMillis());
        signature.setPath(String.format(Locale.ROOT, "streaming_%s_%s", startOffset, endOffset));
        signature.setSize(endOffset - startOffset);
        DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature);
        logger.info("writing dictionary for TblColRef:" + tblColRef.toString());
        DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig());
        try {
            DictionaryInfo realDict = dictionaryManager.trySaveNewDict(dictionary, dictInfo);
            cubeSegment.putDictResPath(tblColRef, realDict.getResourcePath());
            realDictMap.put(tblColRef, (Dictionary<String>) realDict.getDictionaryObject());
        } catch (IOException e) {
            throw new RuntimeException("error save dictionary for column:" + tblColRef, e);
        }
    }

    return realDictMap;
}
 
Example 2
Source File: CubingUtils.java    From kylin with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public static Map<TblColRef, Dictionary<String>> writeDictionary(CubeSegment cubeSegment,
        Map<TblColRef, Dictionary<String>> dictionaryMap, long startOffset, long endOffset) {
    Map<TblColRef, Dictionary<String>> realDictMap = Maps.newHashMap();

    for (Map.Entry<TblColRef, Dictionary<String>> entry : dictionaryMap.entrySet()) {
        final TblColRef tblColRef = entry.getKey();
        final Dictionary<String> dictionary = entry.getValue();
        IReadableTable.TableSignature signature = new IReadableTable.TableSignature();
        signature.setLastModifiedTime(System.currentTimeMillis());
        signature.setPath(String.format(Locale.ROOT, "streaming_%s_%s", startOffset, endOffset));
        signature.setSize(endOffset - startOffset);
        DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature);
        logger.info("writing dictionary for TblColRef:" + tblColRef.toString());
        DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig());
        try {
            DictionaryInfo realDict = dictionaryManager.trySaveNewDict(dictionary, dictInfo);
            cubeSegment.putDictResPath(tblColRef, realDict.getResourcePath());
            realDictMap.put(tblColRef, (Dictionary<String>) realDict.getDictionaryObject());
        } catch (IOException e) {
            throw new RuntimeException("error save dictionary for column:" + tblColRef, e);
        }
    }

    return realDictMap;
}
 
Example 3
Source File: DictionaryManager.java    From Kylin with Apache License 2.0 6 votes vote down vote up
public DictionaryInfo buildDictionary(DataModelDesc model, String dict, TblColRef col, String factColumnsPath) throws IOException {

        logger.info("building dictionary for " + col);

        Object[] tmp = decideSourceData(model, dict, col, factColumnsPath);
        String srcTable = (String) tmp[0];
        String srcCol = (String) tmp[1];
        int srcColIdx = (Integer) tmp[2];
        ReadableTable inpTable = (ReadableTable) tmp[3];

        DictionaryInfo dictInfo = new DictionaryInfo(srcTable, srcCol, srcColIdx, col.getDatatype(), inpTable.getSignature(), inpTable.getColumnDelimeter());

        String dupDict = checkDupByInfo(dictInfo);
        if (dupDict != null) {
            logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupDict);
            return getDictionaryInfo(dupDict);
        }

        Dictionary<?> dictionary = DictionaryGenerator.buildDictionary(dictInfo, inpTable);

        return trySaveNewDict(dictionary, dictInfo);
    }
 
Example 4
Source File: DictionaryManager.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private DictionaryInfo createDictionaryInfo(TblColRef col, IReadableTable inpTable) throws IOException {
    TableSignature inputSig = inpTable.getSignature();
    if (inputSig == null) // table does not exists
        throw new IllegalStateException("Input table does not exist: " + inpTable);

    DictionaryInfo dictInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype(), inputSig);
    return dictInfo;
}
 
Example 5
Source File: SparkUHCDictionary.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<String, Tuple3<Writable, Writable, String>> call(Tuple2<Integer, List<String>> columnValues) throws Exception {
    if (initialized == false) {
        synchronized (SparkFactDistinct.class) {
            if (initialized == false) {
                init();
            }
        }
    }

    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig.setAndUnsetThreadLocalConfig(config);
         ByteArrayOutputStream baos = new ByteArrayOutputStream();
         DataOutputStream outputStream = new DataOutputStream(baos)) {
        TblColRef col = uhcColumns.get(columnValues._1);
        logger.info("Processing column " + col.getName());
        if (cube.getDescriptor().getShardByColumns().contains(col)) {
            //for ShardByColumns
            builder = DictionaryGenerator.newDictionaryBuilder(col.getType());
            builder.init(null, 0, null);
        } else {
            //for GlobalDictionaryColumns
            DictionaryInfo dictionaryInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype());
            String builderClass = cubeDesc.getDictionaryBuilderClass(col);
            builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass);
            builder.init(dictionaryInfo, 0, hdfsDir);
        }
        Iterator<String> values = columnValues._2.iterator();
        while (values.hasNext()) {
            builder.addValue(values.next());
        }
        Dictionary<String> dict = builder.build();
        String dictFileName = col.getIdentity() + "/" + col.getName() + DICT_FILE_POSTFIX;
        logger.info("Dictionary file name is " + dictFileName);

        outputStream.writeUTF(dict.getClass().getName());
        dict.write(outputStream);
        Tuple3 tuple3 = new Tuple3(NullWritable.get(), new ArrayPrimitiveWritable(baos.toByteArray()), dictFileName);
        return new Tuple2<>(BatchConstants.CFG_OUTPUT_DICT, tuple3);
    }
}
 
Example 6
Source File: DictionaryManager.java    From kylin with Apache License 2.0 5 votes vote down vote up
private DictionaryInfo createDictionaryInfo(TblColRef col, IReadableTable inpTable) throws IOException {
    TableSignature inputSig = inpTable.getSignature();
    if (inputSig == null) // table does not exists
        throw new IllegalStateException("Input table does not exist: " + inpTable);

    DictionaryInfo dictInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype(), inputSig);
    return dictInfo;
}
 
Example 7
Source File: SparkUHCDictionary.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<String, Tuple3<Writable, Writable, String>> call(Tuple2<Integer, List<String>> columnValues) throws Exception {
    if (initialized == false) {
        synchronized (SparkFactDistinct.class) {
            if (initialized == false) {
                init();
            }
        }
    }

    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig.setAndUnsetThreadLocalConfig(config);
         ByteArrayOutputStream baos = new ByteArrayOutputStream();
         DataOutputStream outputStream = new DataOutputStream(baos)) {
        TblColRef col = uhcColumns.get(columnValues._1);
        logger.info("Processing column " + col.getName());
        if (cube.getDescriptor().getShardByColumns().contains(col)) {
            //for ShardByColumns
            builder = DictionaryGenerator.newDictionaryBuilder(col.getType());
            builder.init(null, 0, null);
        } else {
            //for GlobalDictionaryColumns
            DictionaryInfo dictionaryInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype());
            String builderClass = cubeDesc.getDictionaryBuilderClass(col);
            builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass);
            builder.init(dictionaryInfo, 0, hdfsDir);
        }
        Iterator<String> values = columnValues._2.iterator();
        while (values.hasNext()) {
            builder.addValue(values.next());
        }
        Dictionary<String> dict = builder.build();
        String dictFileName = col.getIdentity() + "/" + col.getName() + DICT_FILE_POSTFIX;
        logger.info("Dictionary file name is " + dictFileName);

        outputStream.writeUTF(dict.getClass().getName());
        dict.write(outputStream);
        Tuple3 tuple3 = new Tuple3(NullWritable.get(), new ArrayPrimitiveWritable(baos.toByteArray()), dictFileName);
        return new Tuple2<>(BatchConstants.CFG_OUTPUT_DICT, tuple3);
    }
}
 
Example 8
Source File: MergeDictReducer.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
protected void doReduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    String col = key.toString();
    logger.info("merge dictionary for column:{}", col);
    TblColRef tblColRef = colNeedDictMap.get(col);

    if (tblColRef == null) {
        logger.warn("column:{} not found in the columns need dictionary map: {}", col, colNeedDictMap.keySet());
        return;
    }

    DataType dataType = tblColRef.getType();
    List<Dictionary<String>> dicts = Lists.newLinkedList();
    for (Text value : values) {
        ByteArray byteArray = new ByteArray(value.getBytes());
        Dictionary<String> dict = (Dictionary<String>) DictionarySerializer.deserialize(byteArray);
        dicts.add(dict);
    }
    Dictionary mergedDict;
    if (dicts.size() > 1) {
        MultipleDictionaryValueEnumerator multipleDictionaryValueEnumerator = new MultipleDictionaryValueEnumerator(
                dataType, dicts);
        mergedDict = DictionaryGenerator.buildDictionary(dataType, multipleDictionaryValueEnumerator);
    } else if (dicts.size() == 1) {
        mergedDict = dicts.get(0);
    } else {
        throw new IllegalArgumentException("Dictionary missing for column " + col);
    }
    if (mergedDict == null) {
        throw new IllegalArgumentException("Merge dictionaries error for column " + col);
    }

    TableDesc tableDesc = tblColRef.getColumnDesc().getTable();
    IReadableTable.TableSignature signature = new IReadableTable.TableSignature();
    signature.setLastModifiedTime(System.currentTimeMillis());
    signature.setPath(tableDesc.getResourcePath());

    //TODO: Table signature size?
    //        signature.setSize(mergedDict.getSize());

    DictionaryInfo dictionaryInfo = new DictionaryInfo(tblColRef.getTable(), tblColRef.getName(), tblColRef
            .getColumnDesc().getZeroBasedIndex(), tblColRef.getDatatype(), signature);
    dictionaryInfo.setDictionaryObject(mergedDict);
    dictionaryInfo.setDictionaryClass(mergedDict.getClass().getName());
    dictionaryInfo.setCardinality(mergedDict.getSize());

    ByteArrayOutputStream fulBuf = new ByteArrayOutputStream();
    DataOutputStream fulDout = new DataOutputStream(fulBuf);
    DictionaryInfoSerializer.FULL_SERIALIZER.serialize(dictionaryInfo, fulDout);

    Text outValue = new Text(fulBuf.toByteArray());
    context.write(key, outValue);
    logger.debug("output dict info of column {} to path: {}", col,
            context.getConfiguration().get(FileOutputFormat.OUTDIR));
}
 
Example 9
Source File: MergeDictReducer.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected void doReduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    String col = key.toString();
    logger.info("merge dictionary for column:{}", col);
    TblColRef tblColRef = colNeedDictMap.get(col);

    if (tblColRef == null) {
        logger.warn("column:{} not found in the columns need dictionary map: {}", col, colNeedDictMap.keySet());
        return;
    }

    DataType dataType = tblColRef.getType();
    List<Dictionary<String>> dicts = Lists.newLinkedList();
    for (Text value : values) {
        ByteArray byteArray = new ByteArray(value.getBytes());
        Dictionary<String> dict = (Dictionary<String>) DictionarySerializer.deserialize(byteArray);
        dicts.add(dict);
    }
    Dictionary mergedDict;
    if (dicts.size() > 1) {
        MultipleDictionaryValueEnumerator multipleDictionaryValueEnumerator = new MultipleDictionaryValueEnumerator(
                dataType, dicts);
        mergedDict = DictionaryGenerator.buildDictionary(dataType, multipleDictionaryValueEnumerator);
    } else if (dicts.size() == 1) {
        mergedDict = dicts.get(0);
    } else {
        throw new IllegalArgumentException("Dictionary missing for column " + col);
    }
    if (mergedDict == null) {
        throw new IllegalArgumentException("Merge dictionaries error for column " + col);
    }

    TableDesc tableDesc = tblColRef.getColumnDesc().getTable();
    IReadableTable.TableSignature signature = new IReadableTable.TableSignature();
    signature.setLastModifiedTime(System.currentTimeMillis());
    signature.setPath(tableDesc.getResourcePath());

    //TODO: Table signature size?
    //        signature.setSize(mergedDict.getSize());

    DictionaryInfo dictionaryInfo = new DictionaryInfo(tblColRef.getTable(), tblColRef.getName(), tblColRef
            .getColumnDesc().getZeroBasedIndex(), tblColRef.getDatatype(), signature);
    dictionaryInfo.setDictionaryObject(mergedDict);
    dictionaryInfo.setDictionaryClass(mergedDict.getClass().getName());
    dictionaryInfo.setCardinality(mergedDict.getSize());

    ByteArrayOutputStream fulBuf = new ByteArrayOutputStream();
    DataOutputStream fulDout = new DataOutputStream(fulBuf);
    DictionaryInfoSerializer.FULL_SERIALIZER.serialize(dictionaryInfo, fulDout);

    Text outValue = new Text(fulBuf.toByteArray());
    context.write(key, outValue);
    logger.debug("output dict info of column {} to path: {}", col,
            context.getConfiguration().get(FileOutputFormat.OUTDIR));
}