/**
 * This file is part of Graylog.
 *
 * Graylog is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Graylog is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Graylog.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.graylog.collector.file.splitters;

import com.google.common.collect.AbstractIterator;
import io.netty.buffer.ByteBuf;

import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class PatternChunkSplitter extends ContentSplitter {

    private Pattern pattern;

    public PatternChunkSplitter(String pattern) {
        this.pattern = Pattern.compile(pattern, Pattern.MULTILINE);
    }

    @Override
    public Iterable<String> split(final ByteBuf buffer, final Charset charset, final boolean includeRemainingData) {
        return () -> new AbstractIterator<String>() {
            // TODO Might throw an exception if multibyte charset is used and buffer is not complete.
            //      Use CharsetDecoder to create a CharBuffer and match on that!
            private final String inputAsString = buffer.toString(charset);
            final Matcher matcher = pattern.matcher(inputAsString);
            private int positionInString = 0;

            @Override
            protected String computeNext() {
                try {
                    if (!buffer.isReadable()) {
                        return endOfData();
                    }
                    if (matcher.find()) {
                        int firstByte = matcher.start();
                        if (firstByte == 0) {
                            // advance further, the buffer begins with our pattern.
                            if (matcher.find()) {
                                firstByte = matcher.start();
                            } else {
                                if (!includeRemainingData) {
                                    // couldn't find the end of the entry (i.e. there wasn't a next line yet)
                                    return endOfData();
                                } else {
                                    // couldn't find another line, but we are asked to finish up, include everything that remains
                                    return getRemainingContent();
                                }
                            }
                        }
                        if (firstByte == 0) {
                            // still haven't found a non-zero length string, keep waiting for more data.
                            return endOfData();
                        }
                        final String substring = inputAsString.substring(positionInString, firstByte);
                        positionInString = firstByte;
                        buffer.skipBytes(substring.getBytes(charset).length); // TODO performance
                        return substring;
                    } else {
                        if (includeRemainingData) {
                            return getRemainingContent();
                        }
                        return endOfData();
                    }
                } catch (IllegalStateException e) {
                    // the cause contains the CharacterCodingException from the ChannelBuffer.toString() methods
                    // this usually means the buffer ended with an incomplete encoding of a unicode character.
                    // WHY U SO SUCK CHARACTER ENCODINGS?
                    // we need to wait until more data is available
                    return endOfData();
                } finally {
                    buffer.discardReadBytes();
                }
            }

            private String getRemainingContent() {
                final ByteBuf channelBuffer = buffer.readBytes(buffer.readableBytes());
                return channelBuffer.toString(charset);
            }
        };
    }
}