package de.jungblut.glove.impl;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.regex.Pattern;
import java.util.stream.Stream;

import com.google.common.base.Preconditions;

import de.jungblut.glove.GloveStreamReader;
import de.jungblut.glove.util.StringVectorPair;
import de.jungblut.math.DoubleVector;
import de.jungblut.math.dense.DenseDoubleVector;

public class GloveTextReader implements GloveStreamReader {

  private static final Pattern SPLIT_WHITESPACE = Pattern.compile(" ");

  @Override
  public Stream<StringVectorPair> stream(Path input) throws IOException {

    final Stream<String> lines = Files.lines(input);
    int[] expectedSize = new int[] { -1 };
    Stream<StringVectorPair> pairs = lines.map((line) -> process(line)).map(
        (pair) -> {
          Preconditions.checkNotNull(pair.value, "word was null");
          if (expectedSize[0] == -1) {
            expectedSize[0] = pair.vector.getDimension();
          } else {
            Preconditions.checkArgument(
                expectedSize[0] == pair.vector.getDimension(),
                "found inconsistency. Expected size " + expectedSize[0]
                    + " but found " + pair.vector.getDimension());
          }
          return pair;
        });

    pairs.onClose(() -> lines.close());

    return pairs;
  }

  private StringVectorPair process(String line) {
    String[] split = SPLIT_WHITESPACE.split(line);
    String name = split[0];

    DoubleVector vec = new DenseDoubleVector(split.length - 1);
    for (int i = 1; i < split.length; i++) {
      vec.set(i - 1, Double.parseDouble(split[i]));
    }

    return new StringVectorPair(name, vec);
  }
}