Java Code Examples for com.clearspring.analytics.util.Lists#newArrayList()

The following examples show how to use com.clearspring.analytics.util.Lists#newArrayList() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: ReplicationSystemProcedure.java From spliceengine with GNU Affero General Public License v3.0

6 votes

private static List<String> getTablesFromSchema(String schemaName) throws SQLException{

        List<String> tables = Lists.newArrayList();

        String sql = "select tablename from sys.systables t, sys.sysschemas s " +
                "where t.schemaid=s.schemaid and s.schemaname=?";
        Connection conn = SpliceAdmin.getDefaultConn();
        try(PreparedStatement ps = conn.prepareStatement(sql)) {
            ps.setString(1, schemaName);
            try(ResultSet rs = ps.executeQuery()) {
                while (rs.next()) {
                    tables.add(rs.getString(1));
                }
                return tables;
            }
        }
    }

Example 2

Source File: CsvSourceTest.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Test
public void testGetSourceDataFromFactTable() {
    CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
    CubeInstance cube = cubeMgr.getCube(CUBE_NAME);
    TableDesc fact = MetadataConverter.extractFactTable(cube);
    List<ColumnDesc> colDescs = Lists.newArrayList();
    Iterator<ColumnDesc> iterator = fact.columns().iterator();
    while (iterator.hasNext()) {
        colDescs.add(iterator.next());
    }

    NSparkCubingEngine.NSparkCubingSource cubingSource = new CsvSource().adaptToBuildEngine(NSparkCubingEngine.NSparkCubingSource.class);
    Dataset<Row> cubeDS = cubingSource.getSourceData(fact, ss, Maps.newHashMap());
    cubeDS.take(10);
    StructType schema = cubeDS.schema();
    for (int i = 0; i < colDescs.size(); i++) {
        StructField field = schema.fields()[i];
        Assert.assertEquals(field.name(), colDescs.get(i).columnName());
        Assert.assertEquals(field.dataType(), colDescs.get(i).dataType());
    }
}

Example 3

Source File: CoreClusterManagerIT.java From heroic with Apache License 2.0

6 votes

private List<AsyncFuture<Void>> pingAllNodesInShard(
    ClusterManager clusterManager, ClusterShard shard
) {
    final List<AsyncFuture<Void>> futures = Lists.newArrayList();
    final List<ClusterNode> excludeIds = Lists.newArrayList();
    while (true) {
        Optional<ClusterManager.NodeResult<AsyncFuture<Void>>> ret =
            clusterManager.withNodeInShardButNotWithId(shard.getShard(), excludeIds::contains,
                excludeIds::add, ClusterNode.Group::ping);
        if (!ret.isPresent()) {
            // No more nodes available in shard, we're done
            return futures;
        }
        ClusterManager.NodeResult<AsyncFuture<Void>> result = ret.get();
        futures.add(result.getReturnValue());
    }
}

Example 4

Source File: ReplicationSystemProcedure.java From spliceengine with GNU Affero General Public License v3.0

6 votes

public static void GET_REPLICATED_WAL_POSITIONS(short peerId, ResultSet[] resultSets) throws StandardException, SQLException {
    ReplicationManager replicationManager = EngineDriver.driver().manager().getReplicationManager();
    List<String> walPositions = replicationManager.getReplicatedWalPositions(peerId);
    Connection conn = SpliceAdmin.getDefaultConn();
    LanguageConnectionContext lcc = conn.unwrap(EmbedConnection.class).getLanguageConnection();
    ResultColumnDescriptor[] rcds = {
            new GenericColumnDescriptor("walPosition", DataTypeDescriptor.getBuiltInDataTypeDescriptor(Types.VARCHAR))
    };
    List<ExecRow> rows = Lists.newArrayList();
    for (String walPosition : walPositions) {
        ExecRow template = new ValueRow(1);
        template.setRowArray(new DataValueDescriptor[]{new SQLVarchar()});
        template.getColumn(1).setValue(walPosition);
        rows.add(template);
    }
    IteratorNoPutResultSet inprs = new IteratorNoPutResultSet(rows, rcds, lcc.getLastActivation());
    inprs.openCore();
    resultSets[0] = new EmbedResultSet40(conn.unwrap(EmbedConnection.class), inprs, false, null, true);
}

Example 5

Source File: DataStoreUtils.java From geowave with Apache License 2.0

6 votes

public static List<Index> loadIndices(final DataStore dataStore, final String indexNames) {
  List<Index> loadedIndices = Lists.newArrayList();
  // Is there a comma?
  final String[] indices = indexNames.split(",");
  final Index[] dataStoreIndices = dataStore.getIndices();
  for (final String idxName : indices) {
    boolean found = false;
    for (Index index : dataStoreIndices) {
      if (index.getName().equals(idxName)) {
        loadedIndices.add(index);
        found = true;
        break;
      }
    }
    if (!found) {
      throw new ParameterException("Unable to find index with name: " + idxName);
    }
  }
  return Collections.unmodifiableList(loadedIndices);
}

Example 6

Source File: ReplicationSystemProcedure.java From spliceengine with GNU Affero General Public License v3.0

6 votes

public static void GET_REPLICATED_WAL_POSITION(String wal, ResultSet[] resultSets) throws StandardException, SQLException {
    ReplicationManager replicationManager = EngineDriver.driver().manager().getReplicationManager();
    String walPosition = replicationManager.getReplicatedWalPosition(wal);
    Connection conn = SpliceAdmin.getDefaultConn();
    LanguageConnectionContext lcc = conn.unwrap(EmbedConnection.class).getLanguageConnection();
    ResultColumnDescriptor[] rcds = {
            new GenericColumnDescriptor("walPosition", DataTypeDescriptor.getBuiltInDataTypeDescriptor(Types.VARCHAR))
    };
    List<ExecRow> rows = Lists.newArrayList();
    ExecRow template = new ValueRow(1);
    template.setRowArray(new DataValueDescriptor[]{new SQLVarchar()});
    template.getColumn(1).setValue(walPosition);
    rows.add(template);
    IteratorNoPutResultSet inprs = new IteratorNoPutResultSet(rows, rcds, lcc.getLastActivation());
    inprs.openCore();
    resultSets[0] = new EmbedResultSet40(conn.unwrap(EmbedConnection.class), inprs, false, null, true);
}

Example 7

Source File: BackupRegionStatus.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@Override
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
    startKey = ArrayUtil.readByteArray(in);
    endKey = ArrayUtil.readByteArray(in);
    status = ArrayUtil.readByteArray(in);
    int n = in.readInt();
    backupFiles = Lists.newArrayList();
    for (int i = 0; i < n; ++i) {
        backupFiles.add(in.readUTF());
    }
}

Example 8

Source File: BulkImportFunction.java From spliceengine with GNU Affero General Public License v3.0

5 votes

private void init(Iterator<BulkImportPartition> importPartitions) throws Exception {
    partitionMap = new HashMap<>();
    while (importPartitions.hasNext()) {
        BulkImportPartition partition = importPartitions.next();
        Long conglom = partition.getConglomerateId();
        List<BulkImportPartition> partitionList = partitionMap.get(conglom);
        if (partitionList == null) {
            partitionList = Lists.newArrayList();
            partitionMap.put(conglom, partitionList);
        }
        partitionList.add(partition);
    }
}

Example 9

Source File: ReplicationSystemProcedure.java From spliceengine with GNU Affero General Public License v3.0

5 votes

private static List<String> getSchemas() throws SQLException {
    List<String> schemas = Lists.newArrayList();

    String sql = "select schemaname from sys.sysschemas";
    Connection conn = SpliceAdmin.getDefaultConn();
    try (PreparedStatement ps = conn.prepareStatement(sql);
         ResultSet rs = ps.executeQuery()) {
        while (rs.next()) {
            schemas.add(rs.getString(1));
        }
        return schemas;
    }
}

Example 10

Source File: ReplicationSystemProcedure.java From spliceengine with GNU Affero General Public License v3.0

5 votes

private static List<TableDescriptor> getTableDescriptorsFromSchema(String schemaName) throws Exception{
    List<String> tables = getTablesFromSchema(schemaName);
    List<TableDescriptor> tableDescriptors = Lists.newArrayList();
    for (String table : tables) {
        TableDescriptor td = SpliceRegionAdmin.getTableDescriptor(schemaName, table);
        tableDescriptors.add(td);
    }
    return tableDescriptors;
}

Example 11

Source File: BulkLoadIndexDataSetWriter.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@Override
public DataSet<ExecRow> write() throws StandardException {

    List<Long> allCongloms = Lists.newArrayList();
    allCongloms.add(heapConglom);

    if (sampling) {
        sampleAndSplitIndex();
    }
    final List<BulkImportPartition> bulkLoadPartitions =
            getBulkImportPartitions(allCongloms, bulkLoadDirectory);
    String compressionAlgorithm = HConfiguration.getConfiguration().getCompressionAlgorithm();

    // Write to HFile
    HFileGenerationFunction hfileGenerationFunction =
            new BulkLoadIndexHFileGenerationFunction(operationContext, txn.getTxnId(),
                    heapConglom, compressionAlgorithm, bulkLoadPartitions, tableVersion, tentativeIndex);

    DataSet rowAndIndexes = dataSet
            .map(new IndexTransformFunction(tentativeIndex), null, false, true,
                    String.format("Create Index %s: Generate HFiles", indexName))
            .mapPartitions(new BulkLoadKVPairFunction(heapConglom), false, true,
                    String.format("Create Index %s: Generate HFiles", indexName));

    partitionUsingRDDSortUsingDataFrame(bulkLoadPartitions, rowAndIndexes, hfileGenerationFunction);
    bulkLoad(bulkLoadPartitions, bulkLoadDirectory, String.format("Create Index %s:", indexName));

    ValueRow valueRow=new ValueRow(1);
    valueRow.setColumn(1,new SQLLongint(operationContext.getRecordsWritten()));
    return new SparkDataSet<>(SpliceSpark.getContext().parallelize(Collections.singletonList(valueRow), 1));
}

Example 12

Source File: SpliceTableAdmin.java From spliceengine with GNU Affero General Public License v3.0

5 votes

private static List<String> getTables(String schemaName) throws SQLException {
    String sql = "select tablename from sys.systables t, sys.sysschemas s where s.schemaname=?" +
            " and s.schemaid=t.schemaid";
    Connection connection = SpliceAdmin.getDefaultConn();
    List<String> tables = Lists.newArrayList();
    try(PreparedStatement ps = connection.prepareStatement(sql)) {
        ps.setString(1, schemaName);
        try(ResultSet rs = ps.executeQuery()) {
            while(rs.next()) {
                tables.add(rs.getString(1));
            }
        }
        return tables;
    }
}

Example 13

Source File: SparkUHCDictionary.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

@Override
public Tuple2<Integer, List<String>> call(String sequenceFilePath) throws Exception {
    Path path = new Path(sequenceFilePath);
    logger.info("Column absolute path is " + path.toString());
    if (!HadoopUtil.getFileSystem(path).exists(path)) {
        return new Tuple2<>(-1, null);
    }

    String columnName = path.getParent().getName();
    int index = -1;
    for (int i = 0;i < uhcColumns.size(); i++) {
        if (uhcColumns.get(i).getIdentity().equalsIgnoreCase(columnName)) {
            index = i;
            break;
        }
    }

    if (index == -1) {
        return new Tuple2<>(-1, null);
    }

    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(config)) {
        List<String> values = Lists.newArrayList();
        values.addAll(HadoopUtil.readDistinctColumnValues(sequenceFilePath));

        logger.info("UHC column " + columnName + " contains distinct values " + values);

        return new Tuple2<>(index, values);
    }
}

Example 14

Source File: MergeNonDistinctAggregatesFunctionForMixedRows.java From spliceengine with GNU Affero General Public License v3.0

5 votes

private void setup() {
    /**
     * With the rows split, the column positions recorded in aggregates are no longer valid,
     * so for multiple distinct aggregate case, we need to compose a new aggregates array with
     * column ids pointing to the new position in the split row.
     */
    GenericAggregateOperation op = (GenericAggregateOperation) operationContext.getOperation();
    SpliceGenericAggregator[] origAggregates = op.aggregates;
    int numOfGroupKeys = groupingKeys == null? 0 : groupingKeys.length;

    List<SpliceGenericAggregator> tmpAggregators = Lists.newArrayList();
    int numOfNonDistinctAggregates = 0;
    ClassFactory cf = op.getActivation().getLanguageConnectionContext().getLanguageConnectionFactory().getClassFactory();
    for (SpliceGenericAggregator aggregator : origAggregates) {
        AggregatorInfo aggInfo = aggregator.getAggregatorInfo();
        if (aggregator.isDistinct())
            continue;
        AggregatorInfo newAggInfo = new AggregatorInfo(aggInfo.getAggregateName()
                    , aggInfo.getAggregatorClassName()
                    , numOfGroupKeys + numOfNonDistinctAggregates * 3 + 2
                    , numOfGroupKeys + numOfNonDistinctAggregates * 3 + 1
                    , numOfGroupKeys + numOfNonDistinctAggregates * 3 + 3
                    , false
                    , aggInfo.getResultDescription());
        numOfNonDistinctAggregates++;

        tmpAggregators.add(new SpliceGenericAggregator(newAggInfo, cf));
    }
    if (tmpAggregators.size() > 0) {
        nonDistinctAggregates = new SpliceGenericAggregator[tmpAggregators.size()];
        tmpAggregators.toArray(nonDistinctAggregates);
    }

    distinctColumnId = numOfGroupKeys + 1;

    return;
}

Example 15

Source File: ReplicationSystemProcedure.java From spliceengine with GNU Affero General Public License v3.0

5 votes

private static List<TableDescriptor> getTableDescriptorsFromDatabase() throws Exception{
    List<Pair<String, String>> tables = getTablesFromDatabase();
    List<TableDescriptor> tableDescriptors = Lists.newArrayList();
    for (Pair<String, String> table : tables) {
        TableDescriptor td = SpliceRegionAdmin.getTableDescriptor(table.getFirst(), table.getSecond());
        tableDescriptors.add(td);
    }
    return tableDescriptors;
}

Example 16

Source File: ResourceDetectBeforeMergingJob.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

@Override
protected void doExecute() throws Exception {
    logger.info("Start detect resource before merge.");
    String cubeId = getParam(MetadataConstants.P_CUBE_ID);

    final CubeManager cubeManager = CubeManager.getInstance(config);
    final CubeInstance cube = cubeManager.getCubeByUuid(cubeId);
    final CubeSegment mergedSeg = cube.getSegmentById(getParam(MetadataConstants.P_SEGMENT_IDS));
    final SegmentInfo mergedSegInfo = MetadataConverter.getSegmentInfo(cube, mergedSeg.getUuid(),
            mergedSeg.getName(), mergedSeg.getStorageLocationIdentifier());
    final List<CubeSegment> mergingSegments = cube.getMergingSegments(mergedSeg);
    final List<SegmentInfo> segmentInfos = Lists.newArrayList();
    Collections.sort(mergingSegments);
    for (CubeSegment cubeSegment : mergingSegments) {
        segmentInfos.add(MetadataConverter.getSegmentInfo(cube, cubeSegment.getUuid(), cubeSegment.getName(),
                cubeSegment.getStorageLocationIdentifier()));
    }
    infos.clearMergingSegments();
    infos.recordMergingSegments(segmentInfos);
    Map<Long, DFLayoutMergeAssist> mergeCuboidsAssist = CubeMergeJob.generateMergeAssist(segmentInfos, ss);
    ResourceDetectUtils.write(
            new Path(config.getJobTmpShareDir(project, jobId), ResourceDetectUtils.countDistinctSuffix()),
            ResourceDetectUtils
                    .findCountDistinctMeasure(JavaConversions.asJavaCollection(mergedSegInfo.toBuildLayouts())));
    Map<String, List<String>> resourcePaths = Maps.newHashMap();
    infos.clearSparkPlans();
    for (Map.Entry<Long, DFLayoutMergeAssist> entry : mergeCuboidsAssist.entrySet()) {
        Dataset<Row> afterMerge = entry.getValue().merge(config, getParam(MetadataConstants.P_CUBE_NAME));
        infos.recordSparkPlan(afterMerge.queryExecution().sparkPlan());
        List<Path> paths = JavaConversions
                .seqAsJavaList(ResourceDetectUtils.getPaths(afterMerge.queryExecution().sparkPlan()));
        List<String> pathStrs = paths.stream().map(Path::toString).collect(Collectors.toList());
        resourcePaths.put(String.valueOf(entry.getKey()), pathStrs);
    }
    ResourceDetectUtils.write(new Path(config.getJobTmpShareDir(project, jobId),
            mergedSeg.getUuid() + "_" + ResourceDetectUtils.fileName()), resourcePaths);
}

Example 17

Source File: BulkLoadIndexDataSetWriter.java From spliceengine with GNU Affero General Public License v3.0

5 votes

private void sampleAndSplitIndex() throws StandardException {
    Activation activation = operationContext.getActivation();
    LanguageConnectionContext lcc = activation.getLanguageConnectionContext();
    double sampleFraction = BulkLoadUtils.getSampleFraction(lcc);
    DataSet sampledDataSet = dataSet.sampleWithoutReplacement(sampleFraction);
    DataSet sampleRowAndIndexes = sampledDataSet
            .map(new IndexTransformFunction(tentativeIndex), null, false, true,
                    String.format("Create Index %s: Sample Data", indexName))
            .mapPartitions(new BulkLoadKVPairFunction(heapConglom), false, true,
                    String.format("Create Index %s: Sample Data", indexName));

    // collect statistics for encoded key/value, include size and histgram
    RowKeyStatisticsFunction statisticsFunction =
            new RowKeyStatisticsFunction(heapConglom, Lists.newArrayList());
    DataSet keyStatistics = sampleRowAndIndexes.mapPartitions(statisticsFunction);

    List<Tuple2<Long, Tuple2<Double, ColumnStatisticsImpl>>> result = keyStatistics.collect();

    // Calculate cut points for main table and index tables
    List<Tuple2<Long, byte[][]>> cutPoints = BulkLoadUtils.getCutPoints(sampleFraction, result);

    // dump cut points to file system for reference
    ImportUtils.dumpCutPoints(cutPoints, bulkLoadDirectory);

    if (cutPoints != null && cutPoints.size() > 0) {
        BulkLoadUtils.splitTables(cutPoints);
    }
}

Example 18

Source File: MemstoreAwareObserverTest.java From spliceengine with GNU Affero General Public License v3.0

4 votes

StubCompactionRequest () {
    super(Lists.newArrayList());
}

Example 19

Source File: RowKeyGenerator.java From spliceengine with GNU Affero General Public License v3.0

4 votes

private void init() {
    keys = Lists.newArrayList();
    fileNames = Lists.newArrayList();
}

Example 20

Source File: AfterMergeOrRefreshResourceMerger.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

@Override
public void merge(String cubeId, String segmentId, ResourceStore remoteResourceStore, String jobType) {

    CubeManager cubeManager = CubeManager.getInstance(getConfig());
    CubeInstance cubeInstance = cubeManager.getCubeByUuid(cubeId);
    CubeUpdate update = new CubeUpdate(cubeInstance.latestCopyForWrite());

    CubeManager distManager = CubeManager.getInstance(remoteResourceStore.getConfig());
    CubeInstance distCube = distManager.getCubeByUuid(cubeId).latestCopyForWrite();

    List<CubeSegment> toUpdateSegments = Lists.newArrayList();

    CubeSegment mergedSegment = distCube.getSegmentById(segmentId);
    mergedSegment.setStatus(SegmentStatusEnum.READY);
    Map<String, String> additionalInfo = mergedSegment.getAdditionalInfo();
    additionalInfo.put("storageType", "" + IStorageAware.ID_PARQUET);
    mergedSegment.setAdditionalInfo(additionalInfo);
    toUpdateSegments.add(mergedSegment);

    List<CubeSegment> toRemoveSegments = getToRemoveSegs(distCube, mergedSegment);
    Collections.sort(toRemoveSegments);
    makeSnapshotForNewSegment(mergedSegment, toRemoveSegments);

    if (String.valueOf(JobTypeEnum.INDEX_MERGE).equals(jobType)) {
        Optional<Long> reduce = toRemoveSegments.stream().map(CubeSegment::getSizeKB).filter(size -> size != -1)
                .reduce(Long::sum);
        Optional<Long> inputRecords = toRemoveSegments.stream().map(CubeSegment::getInputRecords).filter(records -> records != -1)
                .reduce(Long::sum);
        if (reduce.isPresent()) {
            long totalSourceSize = reduce.get();
            mergedSegment.setSizeKB(totalSourceSize);
            mergedSegment.setInputRecords(inputRecords.get());
            mergedSegment.setLastBuildTime(System.currentTimeMillis());
        }
    }

    update.setToRemoveSegs(toRemoveSegments.toArray(new CubeSegment[0]));
    update.setToUpdateSegs(toUpdateSegments.toArray(new CubeSegment[0]));

    try {
        cubeManager.updateCube(update);
    } catch (IOException e) {
        e.printStackTrace();
    }

}