java source code of MongodbSourceTask

package org.apache.kafka.connect.mongodb;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.kafka.common.errors.InterruptException;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.mongodb.converter.StringStructConverter;
import org.apache.kafka.connect.mongodb.converter.StructConverter;
import org.apache.kafka.connect.source.SourceRecord;
import org.apache.kafka.connect.source.SourceTask;
import org.bson.BsonTimestamp;
import org.bson.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * MongodbSourceTask is a Task that reads mutations from a mongodb for storage in Kafka.
 *
 * @author Andrea Patelli
 */
public class MongodbSourceTask extends SourceTask {
    private final static Logger log = LoggerFactory.getLogger(MongodbSourceTask.class);

    private String uri;
    private String host;
    private Integer port;
    private String schemaName;
    private Integer batchSize;
    private String topicPrefix;
    private StructConverter structConverter;
    private List<String> databases;
    private static Map<String, Schema> schemas = null;

    private MongodbReader reader;


    Map<Map<String, String>, Map<String, Object>> offsets = new HashMap<>(0);


    @Override
    public String version() {
        return new MongodbSourceConnector().version();
    }

    /**
     * Start the Task. Handles configuration parsing and one-time setup of the Task.
     *
     * @param map initial configuration
     */
    @Override
    public void start(Map<String, String> map) {
    	if(map.containsKey(MongodbSourceConfig.PORT)){
	        try {
	            port = Integer.parseInt(map.get(MongodbSourceConfig.PORT));
	        } catch (Exception e) {
	            throw new ConnectException(MongodbSourceConfig.PORT + " config should be an Integer");
	        }
    	}
    	
        try {
            batchSize = Integer.parseInt(map.get(MongodbSourceConfig.BATCH_SIZE));
        } catch (Exception e) {
            throw new ConnectException(MongodbSourceConfig.BATCH_SIZE + " config should be an Integer");
        }

        schemaName = map.get(MongodbSourceConfig.SCHEMA_NAME);
        topicPrefix = map.get(MongodbSourceConfig.TOPIC_PREFIX);
        uri = map.get(MongodbSourceConfig.URI);
        host = map.get(MongodbSourceConfig.HOST);
        
        try{
            String structConverterClass = map.get(MongodbSourceConfig.CONVERTER_CLASS);
            if(structConverterClass == null || structConverterClass.isEmpty()){
            	structConverterClass = StringStructConverter.class.getName();
            }
            structConverter = (StructConverter) Class.forName(structConverterClass).newInstance();
        }
        catch(Exception e){
        	throw new ConnectException(MongodbSourceConfig.CONVERTER_CLASS + " config should be a class of type StructConverter");
        }
        
        databases = Arrays.asList(map.get(MongodbSourceConfig.DATABASES).split(","));

        log.trace("Creating schema");
        if (schemas == null) {
            schemas = new HashMap<>();
        }

        for (String db : databases) {
            db = db.replaceAll("[\\s.]", "_");
            if (schemas.get(db) == null)
                schemas.put(db,
                        SchemaBuilder
                                .struct()
                                .name(schemaName.concat("_").concat(db))
                                .field("timestamp", Schema.OPTIONAL_INT32_SCHEMA)
                                .field("order", Schema.OPTIONAL_INT32_SCHEMA)
                                .field("operation", Schema.OPTIONAL_STRING_SCHEMA)
                                .field("database", Schema.OPTIONAL_STRING_SCHEMA)
                                .field("object", Schema.OPTIONAL_STRING_SCHEMA)
                                .build());
        }

        loadOffsets();
        
        if(uri != null){
        	reader = new MongodbReader(uri, databases, batchSize, offsets);
        }
        else{
        	reader = new MongodbReader(host, port, databases, batchSize, offsets);
        }
        reader.run();
    }

    /**
     * Poll this MongodbSourceTask for new records.
     *
     * @return a list of source records
     * @throws InterruptException
     */
    @Override
    public List<SourceRecord> poll() throws InterruptException {
        List<SourceRecord> records = new ArrayList<>();
        while (!reader.isEmpty()) {
        	Document message = reader.pool();
            Struct messageStruct = getStruct(message);
            String topic = getTopic(message);
            String db = getDB(message);
            String timestamp = getTimestamp(message);
            records.add(new SourceRecord(Collections.singletonMap("mongodb", db), Collections.singletonMap(db, timestamp), topic, messageStruct.schema(), messageStruct));
            log.trace(message.toString());
        }


        return records;
    }

    /**
     * Signal this SourceTask to stop
     */
    @Override
    public void stop() {
    	if(reader != null){
    		reader.stop();
    	}
    }

    /**
     * Retrieves a topic on which the message should be written.
     *
     * @param message from which retrieve the topic
     * @return parsed String representing the topic
     */
    private String getTopic(Document message) {
        String database = ((String) message.get("ns")).replaceAll("[\\s.]", "_");
        if (topicPrefix != null && !topicPrefix.isEmpty()) {
            return new StringBuilder()
                    .append(topicPrefix)
                    .append("_")
                    .append(database)
                    .toString();
        }
        return database;
    }

    /**
     * Retrieves the database from which the message has been read.
     *
     * @param message from which retrieve the database
     * @return the database name, as a String
     */
    private String getDB(Document message) {
        return (String) message.get("ns");
    }

    /**
     * Calculates the timestamp of the message.
     *
     * @param message from which retrieve the timestamp
     * @return BsonTimestamp formatted as a String (seconds+inc)
     */
    private String getTimestamp(Document message) {
        BsonTimestamp timestamp = (BsonTimestamp) message.get("ts");
        return new StringBuilder()
                .append(timestamp.getTime())
                .append("_")
                .append(timestamp.getInc())
                .toString();
    }

    /**
     * Creates a struct from a Mongodb message using configured {@link StructConverter}.
     *
     * @param message to parse
     * @return message formatted as a Struct
     */
    private Struct getStruct(Document message) {
    	final Schema schema = schemas.get(getDB(message).replaceAll("[\\s.]", "_"));
    	return structConverter.toStruct(message, schema);
    }

    /**
     * Loads the current saved offsets.
     */
    private void loadOffsets() {
        List<Map<String, String>> partitions = new ArrayList<>();
        for (String db : databases) {
            Map<String, String> partition = Collections.singletonMap("mongodb", db);
            partitions.add(partition);
        }
        offsets.putAll(context.offsetStorageReader().offsets(partitions));
    }
}