Java Code Examples for org.apache.spark.api.java.JavaPairRDD#toLocalIterator()

The following examples show how to use org.apache.spark.api.java.JavaPairRDD#toLocalIterator() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkTableChecker.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
private List<String> reportInvalidIndexes(JavaPairRDD rdd) throws StandardException {
    List<String> messages = Lists.newLinkedList();

    int i = 0;
    messages.add(String.format("The following %d indexes are invalid:", invalidIndexCount));
    Iterator itr = rdd.toLocalIterator();
    while (itr.hasNext()) {
        Tuple2<String, Tuple2<byte[], ExecRow>> tuple = (Tuple2<String, Tuple2<byte[], ExecRow>>) itr.next();

        if (i >= maxCheckTableError) {
            messages.add("...");
            break;
        }
        byte[] key = tuple._2._1;
        messages.add(tuple._2._2 + "@" + Bytes.toHex(key) + "=>" + tuple._1);
        i++;
    }

    return  messages;
}
 
Example 2
Source File: SparkTableChecker.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
private List<String> fixInvalidIndexes(JavaPairRDD rdd) throws StandardException{
    try {
        List<String> messages = Lists.newLinkedList();
        int i = 0;
        WriteCoordinator writeCoordinator = PipelineDriver.driver().writeCoordinator();
        WriteConfiguration writeConfiguration = writeCoordinator.defaultWriteConfiguration();
        Partition indexPartition = SIDriver.driver().getTableFactory().getTable(Long.toString(conglomerate));
        RecordingCallBuffer<KVPair> writeBuffer = writeCoordinator.writeBuffer(indexPartition, txn, null, writeConfiguration);

        messages.add(String.format("The following %d indexes are deleted:", invalidIndexCount));
        Iterator itr = rdd.toLocalIterator();
        while (itr.hasNext()) {
            Tuple2<String, Tuple2<byte[], ExecRow>> tuple = (Tuple2<String, Tuple2<byte[], ExecRow>>) itr.next();
            byte[] key = tuple._2._1;
            writeBuffer.add(new KVPair(key, new byte[0], KVPair.Type.DELETE));
            if (i == maxCheckTableError) {
                messages.add("...");
            }
            if (i > maxCheckTableError) {
                continue;
            }
            messages.add(tuple._2._2 + "=>" + tuple._1);
            i++;
        }
        writeBuffer.flushBuffer();
        return messages;
    }
    catch (Exception e) {
        throw StandardException.plainWrapException(e);
    }
}
 
Example 3
Source File: SparkTableChecker.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
private List<String> reportMissingIndexes(JavaPairRDD rdd, boolean fix) throws StandardException {

        if (fix) {
            PairDataSet dsToWrite =  new SparkPairDataSet(rdd)
                    .map(new AddKeyFunction())
                    .map(new IndexTransformFunction(tentativeIndex), null, false, true, "Prepare Index")
                    .index(new KVPairFunction(), false, true, "Add missing indexes");
            DataSetWriter writer = dsToWrite.directWriteData()
                    .destConglomerate(tentativeIndex.getIndex().getConglomerate())
                    .txn(txn)
                    .build();
            writer.write();
        }

        List<String> messages = Lists.newLinkedList();
        if (fix) {
            messages.add(String.format("Created indexes for the following %d rows from base table %s.%s:",
                    missingIndexCount, schemaName, tableName));
        }
        else {
            messages.add(String.format("The following %d rows from base table %s.%s are not indexed:",
                    missingIndexCount, schemaName, tableName));

        }
        int i = 0;
        Iterator itr = rdd.toLocalIterator();
        while (itr.hasNext()) {

            if (i >= maxCheckTableError) {
                messages.add("...");
                break;
            }

            Tuple2<String, ExecRow> tuple = (Tuple2<String, ExecRow>) itr.next();
            if (tableKeyTemplate.nColumns() > 0) {
                byte[] key = Bytes.fromHex(tuple._1);
                tableKeyDecoder.set(key, 0, key.length);
                tableKeyDecoder.decode(tableKeyTemplate);
                messages.add(tableKeyTemplate.getClone().toString());
            }
            else {
                messages.add(tuple._1);
            }

            i++;
        }
        return messages;
    }