 * Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 * SPDX-License-Identifier: Apache-2.0

package samples;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration;
import com.amazonaws.services.kinesis.model.PutRecordRequest;
import samples.KinesisMessageModel;
import samples.StreamSource;

 * This class is a data source for supplying input to the Amazon Kinesis stream. It reads lines from the
 * input file specified in the constructor and batches up records before emitting them.
public class BatchedStreamSource extends StreamSource {
    private static Log LOG = LogFactory.getLog(BatchedStreamSource.class);

    private static int NUM_BYTES_PER_PUT_REQUEST = 50000;
    List<KinesisMessageModel> buffer;

    public BatchedStreamSource(KinesisConnectorConfiguration config, String inputFile) {
        this(config, inputFile, false);

    public BatchedStreamSource(KinesisConnectorConfiguration config, String inputFile, boolean loopOverStreamSource) {
        super(config, inputFile, loopOverStreamSource);
        buffer = new ArrayList<KinesisMessageModel>();

    protected void processInputStream(InputStream inputStream, int iteration) throws IOException {
        try (BufferedReader br = new BufferedReader(new InputStreamReader(inputStream))) {
            String line;
            int lines = 0;

            while ((line = br.readLine()) != null) {
                KinesisMessageModel kinesisMessageModel = objectMapper.readValue(line, KinesisMessageModel.class);
                if (numBytesInBuffer() > NUM_BYTES_PER_PUT_REQUEST) {
                     * We need to remove the last record to ensure this data blob is accepted by the Amazon Kinesis
                     * client which restricts the data blob to be less than 50 KB.
                    KinesisMessageModel lastRecord = buffer.remove(buffer.size() - 1);
                     * We add it back so it will be part of the next batch.
            if (!buffer.isEmpty()) {

            LOG.info("Added " + lines + " records to stream source.");

    private byte[] bufferToBytes() throws IOException {
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        ObjectOutputStream oos = new ObjectOutputStream(bos);
        return bos.toByteArray();

    private int numBytesInBuffer() throws IOException {
        return bufferToBytes().length;

    private void flushBuffer() throws IOException {
        PutRecordRequest putRecordRequest = new PutRecordRequest();