Java Code Examples for org.apache.kylin.cube.CubeManager#updateCube()

The following examples show how to use org.apache.kylin.cube.CubeManager#updateCube() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CopyDictionaryStep.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())).latestCopyForWrite();
    final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment);
    Preconditions.checkNotNull(oldSegment,
            "cannot find the original segment to be optimized by " + optimizeSegment);

    // --- Copy dictionary
    optimizeSegment.getDictionaries().putAll(oldSegment.getDictionaries());
    optimizeSegment.getSnapshots().putAll(oldSegment.getSnapshots());
    optimizeSegment.getRowkeyStats().addAll(oldSegment.getRowkeyStats());

    try {
        CubeUpdate cubeBuilder = new CubeUpdate(cube);
        cubeBuilder.setToUpdateSegs(optimizeSegment);
        mgr.updateCube(cubeBuilder);
    } catch (IOException e) {
        logger.error("fail to merge dictionary or lookup snapshots", e);
        return ExecuteResult.createError(e);
    }

    return new ExecuteResult();
}
 
Example 2
Source File: CuboidShardUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public static void saveCuboidShards(CubeSegment segment, Map<Long, Short> cuboidShards, int totalShards) throws IOException {
    CubeManager cubeManager = CubeManager.getInstance(segment.getConfig());

    Map<Long, Short> filtered = Maps.newHashMap();
    for (Map.Entry<Long, Short> entry : cuboidShards.entrySet()) {
        if (entry.getValue() > 1) {
            filtered.put(entry.getKey(), entry.getValue());
        }
    }
    
    // work on copy instead of cached objects
    CubeInstance cubeCopy = segment.getCubeInstance().latestCopyForWrite();
    CubeSegment segCopy = cubeCopy.getSegmentById(segment.getUuid());

    segCopy.setCuboidShardNums(filtered);
    segCopy.setTotalShards(totalShards);

    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToUpdateSegs(segCopy);
    cubeManager.updateCube(update);
}
 
Example 3
Source File: CuboidShardUtil.java    From kylin with Apache License 2.0 6 votes vote down vote up
public static void saveCuboidShards(CubeSegment segment, Map<Long, Short> cuboidShards, int totalShards) throws IOException {
    CubeManager cubeManager = CubeManager.getInstance(segment.getConfig());

    Map<Long, Short> filtered = Maps.newHashMap();
    for (Map.Entry<Long, Short> entry : cuboidShards.entrySet()) {
        if (entry.getValue() > 1) {
            filtered.put(entry.getKey(), entry.getValue());
        }
    }
    
    // work on copy instead of cached objects
    CubeInstance cubeCopy = segment.getCubeInstance().latestCopyForWrite();
    CubeSegment segCopy = cubeCopy.getSegmentById(segment.getUuid());

    segCopy.setCuboidShardNums(filtered);
    segCopy.setTotalShards(totalShards);

    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToUpdateSegs(segCopy);
    cubeManager.updateCube(update);
}
 
Example 4
Source File: CopyDictionaryStep.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())).latestCopyForWrite();
    final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment);
    Preconditions.checkNotNull(oldSegment,
            "cannot find the original segment to be optimized by " + optimizeSegment);

    // --- Copy dictionary
    optimizeSegment.getDictionaries().putAll(oldSegment.getDictionaries());
    optimizeSegment.getSnapshots().putAll(oldSegment.getSnapshots());
    optimizeSegment.getRowkeyStats().addAll(oldSegment.getRowkeyStats());

    try {
        CubeUpdate cubeBuilder = new CubeUpdate(cube);
        cubeBuilder.setToUpdateSegs(optimizeSegment);
        mgr.updateCube(cubeBuilder);
    } catch (IOException e) {
        logger.error("fail to merge dictionary or lookup snapshots", e);
        return ExecuteResult.createError(e);
    }

    return new ExecuteResult();
}
 
Example 5
Source File: MergeDictionaryStep.java    From Kylin with Apache License 2.0 6 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    KylinConfig conf = context.getConfig();
    final CubeManager mgr = CubeManager.getInstance(conf);
    final CubeInstance cube = mgr.getCube(getCubeName());
    final CubeSegment newSegment = cube.getSegmentById(getSegmentId());
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    
    Collections.sort(mergingSegments);
    
    try {
        checkLookupSnapshotsMustIncremental(mergingSegments);
        
        makeDictForNewSegment(conf, cube, newSegment, mergingSegments);
        makeSnapshotForNewSegment(cube, newSegment, mergingSegments);
        
        mgr.updateCube(cube);
        return new ExecuteResult(ExecuteResult.State.SUCCEED, "succeed");
    } catch (IOException e) {
        logger.error("fail to merge dictionary or lookup snapshots", e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    }
}
 
Example 6
Source File: SignatureCalculatorTest.java    From kylin with Apache License 2.0 5 votes vote down vote up
private CubeInstance cloneCubeInstance(CubeManager cubeManager, CubeInstance cube, String name) throws IOException {
    CubeInstance cubeClone = cubeManager.createCube(name, projectName, cube.getDescriptor(), cube.getOwner());
    CubeUpdate cubeUpdate = new CubeUpdate(cubeClone.latestCopyForWrite());
    cubeUpdate.setToAddSegs(cube.getSegments().toArray(new CubeSegment[cube.getSegments().size()]));
    cubeUpdate.setStatus(RealizationStatusEnum.READY);
    return cubeManager.updateCube(cubeUpdate);
}
 
Example 7
Source File: MergeOffsetStep.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cubeCopy = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams())).latestCopyForWrite();
    final String segmentId = CubingExecutableUtil.getSegmentId(this.getParams());
    final CubeSegment segCopy = cubeCopy.getSegmentById(segmentId);

    Preconditions.checkNotNull(segCopy, "Cube segment '" + segmentId + "' not found.");
    Segments<CubeSegment> mergingSegs = cubeCopy.getMergingSegments(segCopy);

    Preconditions.checkArgument(mergingSegs.size() > 0, "Merging segment not exist.");

    Collections.sort(mergingSegs);
    final CubeSegment first = mergingSegs.get(0);
    final CubeSegment last = mergingSegs.get(mergingSegs.size() - 1);

    segCopy.setSegRange(new SegmentRange(first.getSegRange().start, last.getSegRange().end));
    segCopy.setSourcePartitionOffsetStart(first.getSourcePartitionOffsetStart());
    segCopy.setSourcePartitionOffsetEnd(last.getSourcePartitionOffsetEnd());

    segCopy.setTSRange(new TSRange(mergingSegs.getTSStart(), mergingSegs.getTSEnd()));

    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToUpdateSegs(segCopy);
    try {
        cubeManager.updateCube(update);
        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to update cube segment offset", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 8
Source File: CheckpointExecutable.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected void onExecuteFinished(ExecuteResult result, ExecutableContext executableContext) {
    super.onExecuteFinished(result, executableContext);
    if (!isDiscarded() && result.succeed()) {
        List<? extends Executable> jobs = getTasks();
        boolean allSucceed = true;
        for (Executable task : jobs) {
            final ExecutableState status = task.getStatus();
            if (status != ExecutableState.SUCCEED) {
                allSucceed = false;
            }
        }
        if (allSucceed) {
            // Add last optimization time
            CubeManager cubeManager = CubeManager.getInstance(executableContext.getConfig());
            CubeInstance cube = cubeManager.getCube(getCubeName());
            CubeInstance copyForWrite = cube.latestCopyForWrite();
            try {
                copyForWrite.setCuboidLastOptimized(getEndTime());
                CubeUpdate cubeUpdate = new CubeUpdate(copyForWrite);
                cubeManager.updateCube(cubeUpdate);
            } catch (IOException e) {
                logger.error("Failed to update last optimized for " + getCubeName(), e);
            }
        }
    }
}
 
Example 9
Source File: MergeDictionaryStep.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    KylinConfig conf = cube.getConfig();

    Collections.sort(mergingSegments);

    try {
        checkLookupSnapshotsMustIncremental(mergingSegments);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());
        
        makeDictForNewSegment(conf, cubeCopy, newSegCopy, mergingSegments);
        makeSnapshotForNewSegment(cubeCopy, newSegCopy, mergingSegments);

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        mgr.updateCube(update);
        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary or lookup snapshots", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 10
Source File: NSparkCubingStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void updateMetaAfterBuilding(KylinConfig config) throws IOException {
    CubeManager cubeManager = CubeManager.getInstance(config);
    CubeInstance currentInstanceCopy = cubeManager.getCube(getCubeName()).latestCopyForWrite();
    KylinConfig kylinDistConfig = MetaDumpUtil.loadKylinConfigFromHdfs(getDistMetaUrl());
    CubeInstance distCube = CubeManager.getInstance(kylinDistConfig).reloadCube(getCubeName());
    CubeUpdate update = new CubeUpdate(currentInstanceCopy);
    Set<String> segmentIds = Sets.newHashSet(org.apache.hadoop.util.StringUtils.split(getParam(MetadataConstants.P_SEGMENT_IDS)));
    CubeSegment toUpdateSegs = distCube.getSegmentById(segmentIds.iterator().next());

    List<CubeSegment> tobe = currentInstanceCopy.calculateToBeSegments(toUpdateSegs);

    if (tobe.contains(toUpdateSegs) == false)
        throw new IllegalStateException(
                String.format(Locale.ROOT, "For cube %s, segment %s is expected but not in the tobe %s",
                        currentInstanceCopy.toString(), toUpdateSegs.toString(), tobe.toString()));

    toUpdateSegs.setStatus(SegmentStatusEnum.READY);

    List<CubeSegment> toRemoveSegs = Lists.newArrayList();
    for (CubeSegment segment : currentInstanceCopy.getSegments()) {
        if (!tobe.contains(segment))
            toRemoveSegs.add(segment);
    }

    logger.info("Promoting cube {}, new segment {}, to remove segments {}", currentInstanceCopy, toUpdateSegs, toRemoveSegs);

    update.setToRemoveSegs(toRemoveSegs.toArray(new CubeSegment[toRemoveSegs.size()]))
            .setToUpdateSegs(toUpdateSegs);
    if (currentInstanceCopy.getConfig().isJobAutoReadyCubeEnabled()) {
        update.setStatus(RealizationStatusEnum.READY);
    }
    cubeManager.updateCube(update);
}
 
Example 11
Source File: SignatureCalculatorTest.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private CubeInstance cloneCubeInstance(CubeManager cubeManager, CubeInstance cube, String name) throws IOException {
    CubeInstance cubeClone = cubeManager.createCube(name, projectName, cube.getDescriptor(), cube.getOwner());
    CubeUpdate cubeUpdate = new CubeUpdate(cubeClone.latestCopyForWrite());
    cubeUpdate.setToAddSegs(cube.getSegments().toArray(new CubeSegment[cube.getSegments().size()]));
    cubeUpdate.setStatus(RealizationStatusEnum.READY);
    return cubeManager.updateCube(cubeUpdate);
}
 
Example 12
Source File: MergeOffsetStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cubeCopy = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams())).latestCopyForWrite();
    final String segmentId = CubingExecutableUtil.getSegmentId(this.getParams());
    final CubeSegment segCopy = cubeCopy.getSegmentById(segmentId);

    Preconditions.checkNotNull(segCopy, "Cube segment '" + segmentId + "' not found.");
    Segments<CubeSegment> mergingSegs = cubeCopy.getMergingSegments(segCopy);

    Preconditions.checkArgument(mergingSegs.size() > 0, "Merging segment not exist.");

    Collections.sort(mergingSegs);
    final CubeSegment first = mergingSegs.get(0);
    final CubeSegment last = mergingSegs.get(mergingSegs.size() - 1);

    segCopy.setSegRange(new SegmentRange(first.getSegRange().start, last.getSegRange().end));
    segCopy.setSourcePartitionOffsetStart(first.getSourcePartitionOffsetStart());
    segCopy.setSourcePartitionOffsetEnd(last.getSourcePartitionOffsetEnd());

    segCopy.setTSRange(new TSRange(mergingSegs.getTSStart(), mergingSegs.getTSEnd()));

    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToUpdateSegs(segCopy);
    try {
        cubeManager.updateCube(update);
        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to update cube segment offset", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 13
Source File: CheckpointExecutable.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void onExecuteFinished(ExecuteResult result, ExecutableContext executableContext) {
    super.onExecuteFinished(result, executableContext);
    if (!isDiscarded() && result.succeed()) {
        List<? extends Executable> jobs = getTasks();
        boolean allSucceed = true;
        for (Executable task : jobs) {
            final ExecutableState status = task.getStatus();
            if (status != ExecutableState.SUCCEED) {
                allSucceed = false;
            }
        }
        if (allSucceed) {
            // Add last optimization time
            CubeManager cubeManager = CubeManager.getInstance(executableContext.getConfig());
            CubeInstance cube = cubeManager.getCube(getCubeName());
            CubeInstance copyForWrite = cube.latestCopyForWrite();
            try {
                copyForWrite.setCuboidLastOptimized(getEndTime());
                CubeUpdate cubeUpdate = new CubeUpdate(copyForWrite);
                cubeManager.updateCube(cubeUpdate);
            } catch (IOException e) {
                logger.error("Failed to update last optimized for " + getCubeName(), e);
            }
        }
    }
}
 
Example 14
Source File: MergeDictionaryStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    KylinConfig conf = cube.getConfig();

    Collections.sort(mergingSegments);

    try {
        checkLookupSnapshotsMustIncremental(mergingSegments);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());
        
        makeDictForNewSegment(conf, cubeCopy, newSegCopy, mergingSegments);
        makeSnapshotForNewSegment(cubeCopy, newSegCopy, mergingSegments);

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        mgr.updateCube(update);
        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary or lookup snapshots", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 15
Source File: UpdateCubeInfoAfterBuildStep.java    From Kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(getCubeName());
    final CubeSegment segment = cube.getSegmentById(getSegmentId());

    Output baseCuboidOutput = executableManager.getOutput(getBaseCuboidStepId());
    String sourceRecordsCount = baseCuboidOutput.getExtra().get(ExecutableConstants.SOURCE_RECORDS_COUNT);
    Preconditions.checkState(StringUtils.isNotEmpty(sourceRecordsCount), "Can't get cube source record count.");
    long sourceCount = Long.parseLong(sourceRecordsCount);

    String sourceRecordsSize = baseCuboidOutput.getExtra().get(ExecutableConstants.SOURCE_RECORDS_SIZE);
    Preconditions.checkState(StringUtils.isNotEmpty(sourceRecordsSize), "Can't get cube source record size.");
    long sourceSize = Long.parseLong(sourceRecordsSize);

    long size = 0;
    boolean segmentReady = true;
    if (!StringUtils.isBlank(getConvertToHfileStepId())) {
        String cubeSizeString = executableManager.getOutput(getConvertToHfileStepId()).getExtra().get(ExecutableConstants.HDFS_BYTES_WRITTEN);
        Preconditions.checkState(StringUtils.isNotEmpty(cubeSizeString), "Can't get cube segment size.");
        size = Long.parseLong(cubeSizeString) / 1024;
    } else {
        // for the increment & merge case, the increment segment is only built to be merged, won't serve query by itself
        segmentReady = false;
    }

    segment.setLastBuildJobID(getCubingJobId());
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setSizeKB(size);
    segment.setInputRecords(sourceCount);
    segment.setInputRecordsSize(sourceSize);

    try {
        if (segmentReady) {
            cubeManager.promoteNewlyBuiltSegments(cube, segment);
        } else {
            cubeManager.updateCube(cube);
        }
        return new ExecuteResult(ExecuteResult.State.SUCCEED, "succeed");
    } catch (IOException e) {
        logger.error("fail to update cube after build", e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    }
}
 
Example 16
Source File: UpdateDictionaryStep.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 17
Source File: CacheServiceTest.java    From Kylin with Apache License 2.0 4 votes vote down vote up
@Test
public void testCubeCRUD() throws Exception {
    final Broadcaster broadcaster = Broadcaster.getInstance();
    broadcaster.getCounterAndClear();

    getStore().deleteResource("/cube/a_whole_new_cube.json");

    //create cube

    final String cubeName = "a_whole_new_cube";
    final CubeManager cubeManager = getCubeManager(configA);
    final CubeManager cubeManagerB = getCubeManager(configB);
    final ProjectManager projectManager = getProjectManager(configA);
    final ProjectManager projectManagerB = getProjectManager(configB);
    final CubeDescManager cubeDescManager = getCubeDescManager(configA);
    final CubeDescManager cubeDescManagerB = getCubeDescManager(configB);
    final CubeDesc cubeDesc = getCubeDescManager(configA).getCubeDesc("test_kylin_cube_with_slr_desc");

    assertTrue(cubeManager.getCube(cubeName) == null);
    assertTrue(cubeManagerB.getCube(cubeName) == null);
    assertTrue(!containsRealization(projectManager.listAllRealizations(ProjectInstance.DEFAULT_PROJECT_NAME), RealizationType.CUBE, cubeName));
    assertTrue(!containsRealization(projectManagerB.listAllRealizations(ProjectInstance.DEFAULT_PROJECT_NAME), RealizationType.CUBE, cubeName));
    cubeManager.createCube(cubeName, ProjectInstance.DEFAULT_PROJECT_NAME, cubeDesc, null);
    assertNotNull(cubeManager.getCube(cubeName));
    //one for cube update, one for project update
    assertEquals(2, broadcaster.getCounterAndClear());
    waitForCounterAndClear(2);
    assertNotNull(cubeManagerB.getCube(cubeName));
    assertTrue(containsRealization(projectManager.listAllRealizations(ProjectInstance.DEFAULT_PROJECT_NAME), RealizationType.CUBE, cubeName));
    assertTrue(containsRealization(projectManagerB.listAllRealizations(ProjectInstance.DEFAULT_PROJECT_NAME), RealizationType.CUBE, cubeName));

    //update cube
    CubeInstance cube = cubeManager.getCube(cubeName);
    assertEquals(0, cube.getSegments().size());
    assertEquals(0, cubeManagerB.getCube(cubeName).getSegments().size());
    CubeSegment segment = new CubeSegment();
    segment.setName("test_segment");
    cube.getSegments().add(segment);
    cubeManager.updateCube(cube);
    //only one for update cube
    assertEquals(1, broadcaster.getCounterAndClear());
    waitForCounterAndClear(1);
    assertEquals(1, cubeManagerB.getCube(cubeName).getSegments().size());
    assertEquals(segment.getName(), cubeManagerB.getCube(cubeName).getSegments().get(0).getName());

    //delete cube
    cubeManager.dropCube(cubeName, false);
    assertTrue(cubeManager.getCube(cubeName) == null);
    assertTrue(!containsRealization(projectManager.listAllRealizations(ProjectInstance.DEFAULT_PROJECT_NAME), RealizationType.CUBE, cubeName));
    //one for cube update, one for project update
    assertEquals(2, broadcaster.getCounterAndClear());
    waitForCounterAndClear(2);
    assertTrue(cubeManagerB.getCube(cubeName) == null);
    assertTrue(!containsRealization(projectManagerB.listAllRealizations(ProjectInstance.DEFAULT_PROJECT_NAME), RealizationType.CUBE, cubeName));


    final String cubeDescName = "test_cube_desc";
    cubeDesc.setName(cubeDescName);
    cubeDesc.setLastModified(0);
    assertTrue(cubeDescManager.getCubeDesc(cubeDescName) == null);
    assertTrue(cubeDescManagerB.getCubeDesc(cubeDescName) == null);
    cubeDescManager.createCubeDesc(cubeDesc);
    //one for add cube desc
    assertEquals(1, broadcaster.getCounterAndClear());
    waitForCounterAndClear(1);
    assertNotNull(cubeDescManager.getCubeDesc(cubeDescName));
    assertNotNull(cubeDescManagerB.getCubeDesc(cubeDescName));


    cubeDesc.setNotifyList(Arrays.asList("test@email", "test@email", "test@email"));
    cubeDescManager.updateCubeDesc(cubeDesc);
    assertEquals(1, broadcaster.getCounterAndClear());
    waitForCounterAndClear(1);
    assertEquals(cubeDesc.getNotifyList(), cubeDescManagerB.getCubeDesc(cubeDescName).getNotifyList());

    cubeDescManager.removeCubeDesc(cubeDesc);
    //one for add cube desc
    assertEquals(1, broadcaster.getCounterAndClear());
    waitForCounterAndClear(1);
    assertTrue(cubeDescManager.getCubeDesc(cubeDescName) == null);
    assertTrue(cubeDescManagerB.getCubeDesc(cubeDescName) == null);



    getStore().deleteResource("/cube/a_whole_new_cube.json");
}
 
Example 18
Source File: UpdateDictionaryStep.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 19
Source File: JobStepFactoryTest.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Test
public void testAddStepInMerging() throws Exception {
    CubeManager cubeMgr = CubeManager.getInstance(config);
    CubeInstance cube = cubeMgr.getCube(CUBE_NAME);

    cleanupSegments(CUBE_NAME);
    /**
     * Round1. Add 2 segment
     */
    CubeSegment segment1 = cubeMgr.appendSegment(cube, new SegmentRange.TSRange(dateToLong("2010-01-01"), dateToLong("2013-01-01")));
    CubeSegment segment2 = cubeMgr.appendSegment(cube, new SegmentRange.TSRange(dateToLong("2013-01-01"), dateToLong("2015-01-01")));
    segment1.setStatus(SegmentStatusEnum.READY);
    segment2.setStatus(SegmentStatusEnum.READY);

    CubeInstance reloadCube = cube.latestCopyForWrite();
    Segments segments = new Segments();
    segments.add(segment1);
    segments.add(segment2);
    reloadCube.setSegments(segments);
    CubeUpdate update = new CubeUpdate(reloadCube);
    cubeMgr.updateCube(update);

    /**
     * Round2. Merge two segments
     */

    reloadCube = cubeMgr.reloadCube(CUBE_NAME);
    CubeSegment mergedSegment = cubeMgr.mergeSegments(reloadCube, new SegmentRange.TSRange(dateToLong("2010-01-01"), dateToLong("2015-01-01"))
            , null, true);
    NSparkMergingJob job = NSparkMergingJob.merge(mergedSegment, "ADMIN");
    Assert.assertEquals(CUBE_NAME, job.getParam(MetadataConstants.P_CUBE_NAME));

    NSparkExecutable resourceDetectStep = job.getResourceDetectStep();
    Assert.assertEquals(ResourceDetectBeforeMergingJob.class.getName(),
            resourceDetectStep.getSparkSubmitClassName());
    Assert.assertEquals(ExecutableConstants.STEP_NAME_DETECT_RESOURCE, resourceDetectStep.getName());
    job.getParams().forEach((key, value) -> Assert.assertEquals(value, resourceDetectStep.getParam(key)));
    Assert.assertEquals(config.getJobTmpMetaStoreUrl(getProject(), resourceDetectStep.getId()).toString(),
            resourceDetectStep.getDistMetaUrl());

    NSparkExecutable mergeStep = job.getSparkMergingStep();
    Assert.assertEquals(config.getSparkMergeClassName(), mergeStep.getSparkSubmitClassName());
    Assert.assertEquals(ExecutableConstants.STEP_NAME_MERGER_SPARK_SEGMENT, mergeStep.getName());
    job.getParams().forEach((key, value) -> Assert.assertEquals(value, mergeStep.getParam(key)));
    Assert.assertEquals(config.getJobTmpMetaStoreUrl(getProject(), mergeStep.getId()).toString(),
            mergeStep.getDistMetaUrl());

    CubeInstance cubeInstance = cubeMgr.reloadCube(CUBE_NAME);
    NSparkUpdateMetaAndCleanupAfterMergeStep cleanStep = job.getCleanUpAfterMergeStep();
    job.getParams().forEach((key, value) -> {
        if (key.equalsIgnoreCase(MetadataConstants.P_SEGMENT_IDS)) {
            final List<String> needDeleteSegmentNames = cubeInstance.getMergingSegments(mergedSegment).stream()
                    .map(CubeSegment::getName).collect(Collectors.toList());
            Assert.assertEquals(needDeleteSegmentNames, Arrays.asList(cleanStep.getParam(MetadataConstants.P_SEGMENT_NAMES).split(",")));
        } else {
            Assert.assertEquals(value, mergeStep.getParam(key));
        }
    });
    Assert.assertEquals(config.getJobTmpMetaStoreUrl(getProject(), cleanStep.getId()).toString(),
            cleanStep.getDistMetaUrl());
}
 
Example 20
Source File: AfterMergeOrRefreshResourceMerger.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
public void merge(String cubeId, String segmentId, ResourceStore remoteResourceStore, String jobType) {

    CubeManager cubeManager = CubeManager.getInstance(getConfig());
    CubeInstance cubeInstance = cubeManager.getCubeByUuid(cubeId);
    CubeUpdate update = new CubeUpdate(cubeInstance.latestCopyForWrite());

    CubeManager distManager = CubeManager.getInstance(remoteResourceStore.getConfig());
    CubeInstance distCube = distManager.getCubeByUuid(cubeId).latestCopyForWrite();

    List<CubeSegment> toUpdateSegments = Lists.newArrayList();

    CubeSegment mergedSegment = distCube.getSegmentById(segmentId);
    mergedSegment.setStatus(SegmentStatusEnum.READY);
    Map<String, String> additionalInfo = mergedSegment.getAdditionalInfo();
    additionalInfo.put("storageType", "" + IStorageAware.ID_PARQUET);
    mergedSegment.setAdditionalInfo(additionalInfo);
    toUpdateSegments.add(mergedSegment);

    List<CubeSegment> toRemoveSegments = getToRemoveSegs(distCube, mergedSegment);
    Collections.sort(toRemoveSegments);
    makeSnapshotForNewSegment(mergedSegment, toRemoveSegments);

    if (String.valueOf(JobTypeEnum.INDEX_MERGE).equals(jobType)) {
        Optional<Long> reduce = toRemoveSegments.stream().map(CubeSegment::getSizeKB).filter(size -> size != -1)
                .reduce(Long::sum);
        Optional<Long> inputRecords = toRemoveSegments.stream().map(CubeSegment::getInputRecords).filter(records -> records != -1)
                .reduce(Long::sum);
        if (reduce.isPresent()) {
            long totalSourceSize = reduce.get();
            mergedSegment.setSizeKB(totalSourceSize);
            mergedSegment.setInputRecords(inputRecords.get());
            mergedSegment.setLastBuildTime(System.currentTimeMillis());
        }
    }

    update.setToRemoveSegs(toRemoveSegments.toArray(new CubeSegment[0]));
    update.setToUpdateSegs(toUpdateSegments.toArray(new CubeSegment[0]));

    try {
        cubeManager.updateCube(update);
    } catch (IOException e) {
        e.printStackTrace();
    }

}