Java Code Examples for org.apache.spark.sql.sources.v2.DataSourceOptions#getInt()

The following examples show how to use org.apache.spark.sql.sources.v2.DataSourceOptions#getInt() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: SimpleRowDataSource.java From spark-data-sources with MIT License

5 votes

/**
 * Spark calls this to create the reader. Notice how it pulls the host and port
 * on which ExampleDB is listening from the supplied options.
 * @param options
 * @return
 */
@Override
public DataSourceReader createReader(DataSourceOptions options) {
    String host = options.get("host").orElse("localhost");
    int port = options.getInt("port", -1);
    return new Reader(host, port);
}

Example 2

Source File: ParallelRowReadWriteDataSource.java From spark-data-sources with MIT License

5 votes

/**
 * Spark calls this to create the reader. Notice how it pulls the host and port
 * on which ExampleDB is listening, as well as a table name, from the supplied options.
 * @param options
 * @return
 */
@Override
public DataSourceReader createReader(DataSourceOptions options) {
    String host = options.get("host").orElse("localhost");
    int port = options.getInt("port", -1);
    String table = options.get("table").orElse("unknownTable"); // TODO: throw
    int partitions = Integer.parseInt(options.get("partitions").orElse("0"));
    return new Reader(host, port, table, partitions);
}

Example 3

Source File: FlexibleRowDataSource.java From spark-data-sources with MIT License

5 votes

/**
 * Spark calls this to create the reader. Notice how it pulls the host and port
 * on which ExampleDB is listening, as well as a table name, from the supplied options.
 * @param options
 * @return
 */
@Override
public DataSourceReader createReader(DataSourceOptions options) {
    String host = options.get("host").orElse("localhost");
    int port = options.getInt("port", -1);
    String table = options.get("table").orElse("unknownTable"); // TODO: throw
    return new Reader(host, port, table);
}

Example 4

Source File: ParallelRowDataSource.java From spark-data-sources with MIT License

5 votes

/**
 * Spark calls this to create the reader. Notice how it pulls the host and port
 * on which ExampleDB is listening, as well as a table name, from the supplied options.
 * @param options
 * @return
 */
@Override
public DataSourceReader createReader(DataSourceOptions options) {
    String host = options.get("host").orElse("localhost");
    int port = options.getInt("port", -1);
    String table = options.get("table").orElse("unknownTable"); // TODO: throw
    int partitions = Integer.parseInt(options.get("partitions").orElse("0"));
    return new Reader(host, port, table, partitions);
}

Example 5

Source File: PartitioningRowDataSource.java From spark-data-sources with MIT License

5 votes

/**
 * Spark calls this to create the reader. Notice how it pulls the host and port
 * on which ExampleDB is listening, as well as a table name, from the supplied options.
 * @param options
 * @return
 */
@Override
public DataSourceReader createReader(DataSourceOptions options) {
    String host = options.get("host").orElse("localhost");
    int port = options.getInt("port", -1);
    String table = options.get("table").orElse("unknownTable"); // TODO: throw
    int partitions = Integer.parseInt(options.get("partitions").orElse("0"));
    return new Reader(host, port, table, partitions);
}

Example 6

Source File: ParallelRowReadWriteDataSource.java From spark-data-sources with MIT License

4 votes

/**
 * Spark calls this to create the writer. The data source options are used
 * in the same way as above.
 * @param jobId
 * @param schema
 * @param mode
 * @param options
 * @return
 */
@Override
public Optional<DataSourceWriter> createWriter(
        String jobId, StructType schema, SaveMode mode, DataSourceOptions options)
{
    // TODO: ned to distinguish between creating the table for the first time
    // TODO: (just validate schema and create) vs appending (compare schema)

    // TODO: log JobId here and elsewhere whent he partitionId etc are logged

    String host = options.get("host").orElse("localhost");
    int port = options.getInt("port", -1);
    String table = options.get("table").orElse("unknownTable"); // TODO: throw
    int partitions = Integer.parseInt(options.get("partitions").orElse("0"));

    edb.common.Schema dbSchema = DBClientWrapper.sparkToDbSchema(schema);

    boolean truncateOnCommit = false;

    DBClientWrapper db = new DBClientWrapper(host, port);
    db.connect();
    if (db.tableExists(table)) {
        switch (mode) {
            case ErrorIfExists: {
                // check existence and throw if needed
                throw new RuntimeException("data already exists");
            }
            case Append: {
                // just check schema compatibility
                try {
                    Schema actualSchema = db.getDBSchema(table);
                    if (!dbSchema.isCompatible(actualSchema)) {
                        throw new RuntimeException("Appending to table with incompatible schema");
                    }
                } catch (UnknownTableException ute) {
                    throw new RuntimeException(ute);
                }
                break;
            }
            case Overwrite: {
                // two options if table exists: truncate it now or truncate it later
                truncateOnCommit = true;
                break;
            }
            case Ignore: {
                // check existence and declare victory
                return Optional.empty();
            }
            default:
        }
    } else {
        db.createTable(table, dbSchema);
    }

    return Optional.of(new Writer(host, port, table, partitions, dbSchema, truncateOnCommit));
}