Java Code Examples for com.clearspring.analytics.util.Lists#newArrayList()

The following examples show how to use com.clearspring.analytics.util.Lists#newArrayList() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ReplicationSystemProcedure.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
private static List<String> getTablesFromSchema(String schemaName) throws SQLException{

        List<String> tables = Lists.newArrayList();

        String sql = "select tablename from sys.systables t, sys.sysschemas s " +
                "where t.schemaid=s.schemaid and s.schemaname=?";
        Connection conn = SpliceAdmin.getDefaultConn();
        try(PreparedStatement ps = conn.prepareStatement(sql)) {
            ps.setString(1, schemaName);
            try(ResultSet rs = ps.executeQuery()) {
                while (rs.next()) {
                    tables.add(rs.getString(1));
                }
                return tables;
            }
        }
    }
 
Example 2
Source File: CsvSourceTest.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetSourceDataFromFactTable() {
    CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
    CubeInstance cube = cubeMgr.getCube(CUBE_NAME);
    TableDesc fact = MetadataConverter.extractFactTable(cube);
    List<ColumnDesc> colDescs = Lists.newArrayList();
    Iterator<ColumnDesc> iterator = fact.columns().iterator();
    while (iterator.hasNext()) {
        colDescs.add(iterator.next());
    }

    NSparkCubingEngine.NSparkCubingSource cubingSource = new CsvSource().adaptToBuildEngine(NSparkCubingEngine.NSparkCubingSource.class);
    Dataset<Row> cubeDS = cubingSource.getSourceData(fact, ss, Maps.newHashMap());
    cubeDS.take(10);
    StructType schema = cubeDS.schema();
    for (int i = 0; i < colDescs.size(); i++) {
        StructField field = schema.fields()[i];
        Assert.assertEquals(field.name(), colDescs.get(i).columnName());
        Assert.assertEquals(field.dataType(), colDescs.get(i).dataType());
    }
}
 
Example 3
Source File: CoreClusterManagerIT.java    From heroic with Apache License 2.0 6 votes vote down vote up
private List<AsyncFuture<Void>> pingAllNodesInShard(
    ClusterManager clusterManager, ClusterShard shard
) {
    final List<AsyncFuture<Void>> futures = Lists.newArrayList();
    final List<ClusterNode> excludeIds = Lists.newArrayList();
    while (true) {
        Optional<ClusterManager.NodeResult<AsyncFuture<Void>>> ret =
            clusterManager.withNodeInShardButNotWithId(shard.getShard(), excludeIds::contains,
                excludeIds::add, ClusterNode.Group::ping);
        if (!ret.isPresent()) {
            // No more nodes available in shard, we're done
            return futures;
        }
        ClusterManager.NodeResult<AsyncFuture<Void>> result = ret.get();
        futures.add(result.getReturnValue());
    }
}
 
Example 4
Source File: ReplicationSystemProcedure.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
public static void GET_REPLICATED_WAL_POSITIONS(short peerId, ResultSet[] resultSets) throws StandardException, SQLException {
    ReplicationManager replicationManager = EngineDriver.driver().manager().getReplicationManager();
    List<String> walPositions = replicationManager.getReplicatedWalPositions(peerId);
    Connection conn = SpliceAdmin.getDefaultConn();
    LanguageConnectionContext lcc = conn.unwrap(EmbedConnection.class).getLanguageConnection();
    ResultColumnDescriptor[] rcds = {
            new GenericColumnDescriptor("walPosition", DataTypeDescriptor.getBuiltInDataTypeDescriptor(Types.VARCHAR))
    };
    List<ExecRow> rows = Lists.newArrayList();
    for (String walPosition : walPositions) {
        ExecRow template = new ValueRow(1);
        template.setRowArray(new DataValueDescriptor[]{new SQLVarchar()});
        template.getColumn(1).setValue(walPosition);
        rows.add(template);
    }
    IteratorNoPutResultSet inprs = new IteratorNoPutResultSet(rows, rcds, lcc.getLastActivation());
    inprs.openCore();
    resultSets[0] = new EmbedResultSet40(conn.unwrap(EmbedConnection.class), inprs, false, null, true);
}
 
Example 5
Source File: DataStoreUtils.java    From geowave with Apache License 2.0 6 votes vote down vote up
public static List<Index> loadIndices(final DataStore dataStore, final String indexNames) {
  List<Index> loadedIndices = Lists.newArrayList();
  // Is there a comma?
  final String[] indices = indexNames.split(",");
  final Index[] dataStoreIndices = dataStore.getIndices();
  for (final String idxName : indices) {
    boolean found = false;
    for (Index index : dataStoreIndices) {
      if (index.getName().equals(idxName)) {
        loadedIndices.add(index);
        found = true;
        break;
      }
    }
    if (!found) {
      throw new ParameterException("Unable to find index with name: " + idxName);
    }
  }
  return Collections.unmodifiableList(loadedIndices);
}
 
Example 6
Source File: ReplicationSystemProcedure.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
public static void GET_REPLICATED_WAL_POSITION(String wal, ResultSet[] resultSets) throws StandardException, SQLException {
    ReplicationManager replicationManager = EngineDriver.driver().manager().getReplicationManager();
    String walPosition = replicationManager.getReplicatedWalPosition(wal);
    Connection conn = SpliceAdmin.getDefaultConn();
    LanguageConnectionContext lcc = conn.unwrap(EmbedConnection.class).getLanguageConnection();
    ResultColumnDescriptor[] rcds = {
            new GenericColumnDescriptor("walPosition", DataTypeDescriptor.getBuiltInDataTypeDescriptor(Types.VARCHAR))
    };
    List<ExecRow> rows = Lists.newArrayList();
    ExecRow template = new ValueRow(1);
    template.setRowArray(new DataValueDescriptor[]{new SQLVarchar()});
    template.getColumn(1).setValue(walPosition);
    rows.add(template);
    IteratorNoPutResultSet inprs = new IteratorNoPutResultSet(rows, rcds, lcc.getLastActivation());
    inprs.openCore();
    resultSets[0] = new EmbedResultSet40(conn.unwrap(EmbedConnection.class), inprs, false, null, true);
}
 
Example 7
Source File: BackupRegionStatus.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
    startKey = ArrayUtil.readByteArray(in);
    endKey = ArrayUtil.readByteArray(in);
    status = ArrayUtil.readByteArray(in);
    int n = in.readInt();
    backupFiles = Lists.newArrayList();
    for (int i = 0; i < n; ++i) {
        backupFiles.add(in.readUTF());
    }
}
 
Example 8
Source File: BulkImportFunction.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
private void init(Iterator<BulkImportPartition> importPartitions) throws Exception {
    partitionMap = new HashMap<>();
    while (importPartitions.hasNext()) {
        BulkImportPartition partition = importPartitions.next();
        Long conglom = partition.getConglomerateId();
        List<BulkImportPartition> partitionList = partitionMap.get(conglom);
        if (partitionList == null) {
            partitionList = Lists.newArrayList();
            partitionMap.put(conglom, partitionList);
        }
        partitionList.add(partition);
    }
}
 
Example 9
Source File: ReplicationSystemProcedure.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
private static List<String> getSchemas() throws SQLException {
    List<String> schemas = Lists.newArrayList();

    String sql = "select schemaname from sys.sysschemas";
    Connection conn = SpliceAdmin.getDefaultConn();
    try (PreparedStatement ps = conn.prepareStatement(sql);
         ResultSet rs = ps.executeQuery()) {
        while (rs.next()) {
            schemas.add(rs.getString(1));
        }
        return schemas;
    }
}
 
Example 10
Source File: ReplicationSystemProcedure.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
private static List<TableDescriptor> getTableDescriptorsFromSchema(String schemaName) throws Exception{
    List<String> tables = getTablesFromSchema(schemaName);
    List<TableDescriptor> tableDescriptors = Lists.newArrayList();
    for (String table : tables) {
        TableDescriptor td = SpliceRegionAdmin.getTableDescriptor(schemaName, table);
        tableDescriptors.add(td);
    }
    return tableDescriptors;
}
 
Example 11
Source File: BulkLoadIndexDataSetWriter.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public DataSet<ExecRow> write() throws StandardException {

    List<Long> allCongloms = Lists.newArrayList();
    allCongloms.add(heapConglom);

    if (sampling) {
        sampleAndSplitIndex();
    }
    final List<BulkImportPartition> bulkLoadPartitions =
            getBulkImportPartitions(allCongloms, bulkLoadDirectory);
    String compressionAlgorithm = HConfiguration.getConfiguration().getCompressionAlgorithm();

    // Write to HFile
    HFileGenerationFunction hfileGenerationFunction =
            new BulkLoadIndexHFileGenerationFunction(operationContext, txn.getTxnId(),
                    heapConglom, compressionAlgorithm, bulkLoadPartitions, tableVersion, tentativeIndex);

    DataSet rowAndIndexes = dataSet
            .map(new IndexTransformFunction(tentativeIndex), null, false, true,
                    String.format("Create Index %s: Generate HFiles", indexName))
            .mapPartitions(new BulkLoadKVPairFunction(heapConglom), false, true,
                    String.format("Create Index %s: Generate HFiles", indexName));

    partitionUsingRDDSortUsingDataFrame(bulkLoadPartitions, rowAndIndexes, hfileGenerationFunction);
    bulkLoad(bulkLoadPartitions, bulkLoadDirectory, String.format("Create Index %s:", indexName));

    ValueRow valueRow=new ValueRow(1);
    valueRow.setColumn(1,new SQLLongint(operationContext.getRecordsWritten()));
    return new SparkDataSet<>(SpliceSpark.getContext().parallelize(Collections.singletonList(valueRow), 1));
}
 
Example 12
Source File: SpliceTableAdmin.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
private static List<String> getTables(String schemaName) throws SQLException {
    String sql = "select tablename from sys.systables t, sys.sysschemas s where s.schemaname=?" +
            " and s.schemaid=t.schemaid";
    Connection connection = SpliceAdmin.getDefaultConn();
    List<String> tables = Lists.newArrayList();
    try(PreparedStatement ps = connection.prepareStatement(sql)) {
        ps.setString(1, schemaName);
        try(ResultSet rs = ps.executeQuery()) {
            while(rs.next()) {
                tables.add(rs.getString(1));
            }
        }
        return tables;
    }
}
 
Example 13
Source File: SparkUHCDictionary.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<Integer, List<String>> call(String sequenceFilePath) throws Exception {
    Path path = new Path(sequenceFilePath);
    logger.info("Column absolute path is " + path.toString());
    if (!HadoopUtil.getFileSystem(path).exists(path)) {
        return new Tuple2<>(-1, null);
    }

    String columnName = path.getParent().getName();
    int index = -1;
    for (int i = 0;i < uhcColumns.size(); i++) {
        if (uhcColumns.get(i).getIdentity().equalsIgnoreCase(columnName)) {
            index = i;
            break;
        }
    }

    if (index == -1) {
        return new Tuple2<>(-1, null);
    }

    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(config)) {
        List<String> values = Lists.newArrayList();
        values.addAll(HadoopUtil.readDistinctColumnValues(sequenceFilePath));

        logger.info("UHC column " + columnName + " contains distinct values " + values);

        return new Tuple2<>(index, values);
    }
}
 
Example 14
Source File: MergeNonDistinctAggregatesFunctionForMixedRows.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
private void setup() {
    /**
     * With the rows split, the column positions recorded in aggregates are no longer valid,
     * so for multiple distinct aggregate case, we need to compose a new aggregates array with
     * column ids pointing to the new position in the split row.
     */
    GenericAggregateOperation op = (GenericAggregateOperation) operationContext.getOperation();
    SpliceGenericAggregator[] origAggregates = op.aggregates;
    int numOfGroupKeys = groupingKeys == null? 0 : groupingKeys.length;

    List<SpliceGenericAggregator> tmpAggregators = Lists.newArrayList();
    int numOfNonDistinctAggregates = 0;
    ClassFactory cf = op.getActivation().getLanguageConnectionContext().getLanguageConnectionFactory().getClassFactory();
    for (SpliceGenericAggregator aggregator : origAggregates) {
        AggregatorInfo aggInfo = aggregator.getAggregatorInfo();
        if (aggregator.isDistinct())
            continue;
        AggregatorInfo newAggInfo = new AggregatorInfo(aggInfo.getAggregateName()
                    , aggInfo.getAggregatorClassName()
                    , numOfGroupKeys + numOfNonDistinctAggregates * 3 + 2
                    , numOfGroupKeys + numOfNonDistinctAggregates * 3 + 1
                    , numOfGroupKeys + numOfNonDistinctAggregates * 3 + 3
                    , false
                    , aggInfo.getResultDescription());
        numOfNonDistinctAggregates++;

        tmpAggregators.add(new SpliceGenericAggregator(newAggInfo, cf));
    }
    if (tmpAggregators.size() > 0) {
        nonDistinctAggregates = new SpliceGenericAggregator[tmpAggregators.size()];
        tmpAggregators.toArray(nonDistinctAggregates);
    }

    distinctColumnId = numOfGroupKeys + 1;

    return;
}
 
Example 15
Source File: ReplicationSystemProcedure.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
private static List<TableDescriptor> getTableDescriptorsFromDatabase() throws Exception{
    List<Pair<String, String>> tables = getTablesFromDatabase();
    List<TableDescriptor> tableDescriptors = Lists.newArrayList();
    for (Pair<String, String> table : tables) {
        TableDescriptor td = SpliceRegionAdmin.getTableDescriptor(table.getFirst(), table.getSecond());
        tableDescriptors.add(td);
    }
    return tableDescriptors;
}
 
Example 16
Source File: ResourceDetectBeforeMergingJob.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void doExecute() throws Exception {
    logger.info("Start detect resource before merge.");
    String cubeId = getParam(MetadataConstants.P_CUBE_ID);

    final CubeManager cubeManager = CubeManager.getInstance(config);
    final CubeInstance cube = cubeManager.getCubeByUuid(cubeId);
    final CubeSegment mergedSeg = cube.getSegmentById(getParam(MetadataConstants.P_SEGMENT_IDS));
    final SegmentInfo mergedSegInfo = MetadataConverter.getSegmentInfo(cube, mergedSeg.getUuid(),
            mergedSeg.getName(), mergedSeg.getStorageLocationIdentifier());
    final List<CubeSegment> mergingSegments = cube.getMergingSegments(mergedSeg);
    final List<SegmentInfo> segmentInfos = Lists.newArrayList();
    Collections.sort(mergingSegments);
    for (CubeSegment cubeSegment : mergingSegments) {
        segmentInfos.add(MetadataConverter.getSegmentInfo(cube, cubeSegment.getUuid(), cubeSegment.getName(),
                cubeSegment.getStorageLocationIdentifier()));
    }
    infos.clearMergingSegments();
    infos.recordMergingSegments(segmentInfos);
    Map<Long, DFLayoutMergeAssist> mergeCuboidsAssist = CubeMergeJob.generateMergeAssist(segmentInfos, ss);
    ResourceDetectUtils.write(
            new Path(config.getJobTmpShareDir(project, jobId), ResourceDetectUtils.countDistinctSuffix()),
            ResourceDetectUtils
                    .findCountDistinctMeasure(JavaConversions.asJavaCollection(mergedSegInfo.toBuildLayouts())));
    Map<String, List<String>> resourcePaths = Maps.newHashMap();
    infos.clearSparkPlans();
    for (Map.Entry<Long, DFLayoutMergeAssist> entry : mergeCuboidsAssist.entrySet()) {
        Dataset<Row> afterMerge = entry.getValue().merge(config, getParam(MetadataConstants.P_CUBE_NAME));
        infos.recordSparkPlan(afterMerge.queryExecution().sparkPlan());
        List<Path> paths = JavaConversions
                .seqAsJavaList(ResourceDetectUtils.getPaths(afterMerge.queryExecution().sparkPlan()));
        List<String> pathStrs = paths.stream().map(Path::toString).collect(Collectors.toList());
        resourcePaths.put(String.valueOf(entry.getKey()), pathStrs);
    }
    ResourceDetectUtils.write(new Path(config.getJobTmpShareDir(project, jobId),
            mergedSeg.getUuid() + "_" + ResourceDetectUtils.fileName()), resourcePaths);
}
 
Example 17
Source File: BulkLoadIndexDataSetWriter.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
private void sampleAndSplitIndex() throws StandardException {
    Activation activation = operationContext.getActivation();
    LanguageConnectionContext lcc = activation.getLanguageConnectionContext();
    double sampleFraction = BulkLoadUtils.getSampleFraction(lcc);
    DataSet sampledDataSet = dataSet.sampleWithoutReplacement(sampleFraction);
    DataSet sampleRowAndIndexes = sampledDataSet
            .map(new IndexTransformFunction(tentativeIndex), null, false, true,
                    String.format("Create Index %s: Sample Data", indexName))
            .mapPartitions(new BulkLoadKVPairFunction(heapConglom), false, true,
                    String.format("Create Index %s: Sample Data", indexName));

    // collect statistics for encoded key/value, include size and histgram
    RowKeyStatisticsFunction statisticsFunction =
            new RowKeyStatisticsFunction(heapConglom, Lists.newArrayList());
    DataSet keyStatistics = sampleRowAndIndexes.mapPartitions(statisticsFunction);

    List<Tuple2<Long, Tuple2<Double, ColumnStatisticsImpl>>> result = keyStatistics.collect();

    // Calculate cut points for main table and index tables
    List<Tuple2<Long, byte[][]>> cutPoints = BulkLoadUtils.getCutPoints(sampleFraction, result);

    // dump cut points to file system for reference
    ImportUtils.dumpCutPoints(cutPoints, bulkLoadDirectory);

    if (cutPoints != null && cutPoints.size() > 0) {
        BulkLoadUtils.splitTables(cutPoints);
    }
}
 
Example 18
Source File: MemstoreAwareObserverTest.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
StubCompactionRequest () {
    super(Lists.newArrayList());
}
 
Example 19
Source File: RowKeyGenerator.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
private void init() {
    keys = Lists.newArrayList();
    fileNames = Lists.newArrayList();
}
 
Example 20
Source File: AfterMergeOrRefreshResourceMerger.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
public void merge(String cubeId, String segmentId, ResourceStore remoteResourceStore, String jobType) {

    CubeManager cubeManager = CubeManager.getInstance(getConfig());
    CubeInstance cubeInstance = cubeManager.getCubeByUuid(cubeId);
    CubeUpdate update = new CubeUpdate(cubeInstance.latestCopyForWrite());

    CubeManager distManager = CubeManager.getInstance(remoteResourceStore.getConfig());
    CubeInstance distCube = distManager.getCubeByUuid(cubeId).latestCopyForWrite();

    List<CubeSegment> toUpdateSegments = Lists.newArrayList();

    CubeSegment mergedSegment = distCube.getSegmentById(segmentId);
    mergedSegment.setStatus(SegmentStatusEnum.READY);
    Map<String, String> additionalInfo = mergedSegment.getAdditionalInfo();
    additionalInfo.put("storageType", "" + IStorageAware.ID_PARQUET);
    mergedSegment.setAdditionalInfo(additionalInfo);
    toUpdateSegments.add(mergedSegment);

    List<CubeSegment> toRemoveSegments = getToRemoveSegs(distCube, mergedSegment);
    Collections.sort(toRemoveSegments);
    makeSnapshotForNewSegment(mergedSegment, toRemoveSegments);

    if (String.valueOf(JobTypeEnum.INDEX_MERGE).equals(jobType)) {
        Optional<Long> reduce = toRemoveSegments.stream().map(CubeSegment::getSizeKB).filter(size -> size != -1)
                .reduce(Long::sum);
        Optional<Long> inputRecords = toRemoveSegments.stream().map(CubeSegment::getInputRecords).filter(records -> records != -1)
                .reduce(Long::sum);
        if (reduce.isPresent()) {
            long totalSourceSize = reduce.get();
            mergedSegment.setSizeKB(totalSourceSize);
            mergedSegment.setInputRecords(inputRecords.get());
            mergedSegment.setLastBuildTime(System.currentTimeMillis());
        }
    }

    update.setToRemoveSegs(toRemoveSegments.toArray(new CubeSegment[0]));
    update.setToUpdateSegs(toUpdateSegments.toArray(new CubeSegment[0]));

    try {
        cubeManager.updateCube(update);
    } catch (IOException e) {
        e.printStackTrace();
    }

}