java source code of MoreStreams

/*****************************************************************************
 * ------------------------------------------------------------------------- *
 * Licensed under the Apache License, Version 2.0 (the "License");           *
 * you may not use this file except in compliance with the License.          *
 * You may obtain a copy of the License at                                   *
 *                                                                           *
 * http://www.apache.org/licenses/LICENSE-2.0                                *
 *                                                                           *
 * Unless required by applicable law or agreed to in writing, software       *
 * distributed under the License is distributed on an "AS IS" BASIS,         *
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  *
 * See the License for the specific language governing permissions and       *
 * limitations under the License.                                            *
 *****************************************************************************/
package com.google.mu.util.stream;

import static java.util.Objects.requireNonNull;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Queue;
import java.util.Spliterator;
import java.util.Spliterators.AbstractSpliterator;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import com.google.mu.function.CheckedConsumer;

/**
 * Static utilities pertaining to {@link Stream} in addition to relevant utilities in Jdk and Guava.
 *
 * @since 1.1
 */
public final class MoreStreams {
  /**
   * Returns a Stream produced by iterative application of {@code step} to the initial
   * {@code seed}, producing a Stream consisting of seed, elements of step(seed),
   * elements of step(x) for each x in step(seed), etc.
   * (If the result stream returned by the {@code step} function is null an empty stream is used,
   * instead.)
   *
   * <p>While {@code Stream.generate(supplier)} can be used to generate infinite streams,
   * it's not as easy to generate a <em>finite</em> stream unless the size can be pre-determined.
   * This method can be used to generate finite streams: just return an empty stream when the
   * {@code step} determines that there's no more elements to be generated.
   *
   * <p>A typical group of use cases are BFS traversal algorithms.
   * For example, to stream the tree nodes in BFS order: <pre>{@code
   *   Stream<Node> bfs(Node root) {
   *     return generate(root, node -> node.children().stream());
   *   }
   * }</pre>
   *
   * It's functionally equivalent to the following common imperative code: <pre>{@code
   *   List<Node> bfs(Node root) {
   *     List<Node> result = new ArrayList<>();
   *     Queue<Node> queue = new ArrayDeque<>();
   *     queue.add(root);
   *     while (!queue.isEmpty()) {
   *       Node node = queue.remove();
   *       result.add(node);
   *       queue.addAll(node.children());
   *     }
   *     return result;
   *   }
   * }</pre>
   *
   * A BFS 2-D grid traversal algorithm: <pre>{@code
   *   Stream<Cell> bfs(Cell startingCell) {
   *     Set<Cell> visited = new HashSet<>();
   *     visited.add(startingCell);
   *     return generate(startingCell, c -> c.neighbors().filter(visited::add));
   *   }
   * }</pre>
   *
   * <p>At every step, 0, 1 or more elements can be generated into the resulting stream.
   * As discussed above, returning an empty stream leads to eventual termination of the stream;
   * returning 1-element stream is equivalent to {@code Stream.generate(supplier)};
   * while returning more than one elements allows a single element to fan out to multiple
   * elements.
   *
   * @since 1.9
   */
  public static <T> Stream<T> generate(
      T seed, Function<? super T, ? extends Stream<? extends T>> step) {
    requireNonNull(step);
    Queue<Stream<? extends T>> queue = new ArrayDeque<>();
    queue.add(Stream.of(seed));
    return whileNotNull(queue::poll)
        .flatMap(seeds -> seeds.peek(
            v -> {
              Stream<? extends T> fanout = step.apply(v);
              if (fanout != null) {
                queue.add(fanout);
              }
            }));
  }

  /**
   * Flattens {@code streamOfStream} and returns an unordered sequential stream of the nested
   * elements.
   *
   * <p>Logically, {@code stream.flatMap(fanOut)} is equivalent to
   * {@code MoreStreams.flatten(stream.map(fanOut))}.
   * Due to this <a href="https://bugs.openjdk.java.net/browse/JDK-8075939">JDK bug</a>,
   * {@code flatMap()} uses {@code forEach()} internally and doesn't support short-circuiting for
   * the passed-in stream. {@code flatten()} supports short-circuiting and can be used to
   * flatten infinite streams.
   *
   * @since 1.9
   */
  public static <T> Stream<T> flatten(Stream<? extends Stream<? extends T>> streamOfStream) {
    return mapBySpliterator(streamOfStream.sequential(), 0, FlattenedSpliterator<T>::new);
  }

  /**
   * Iterates through {@code stream} <em>only once</em>. It's strongly recommended
   * to avoid assigning the return value to a variable or passing it to any other method because
   * the returned {@code Iterable}'s {@link Iterable#iterator iterator()} method can only be called
   * once. Instead, always use it together with a for-each loop, as in:
   *
   * <pre>{@code
   *   for (Foo foo : iterateOnce(stream)) {
   *     ...
   *     if (...) continue;
   *     if (...) break;
   *     ...
   *   }
   * }</pre>
   *
   * The above is equivalent to manually doing:
   *
   * <pre>{@code
   *   Iterable<Foo> foos = stream::iterator;
   *   for (Foo foo : foos) {
   *     ...
   *   }
   * }</pre>
   * except using this API eliminates the need for a named variable that escapes the scope of the
   * for-each loop. And code is more readable too.
   *
   * <p>Note that {@link #iterateThrough iterateThrough()} should be preferred whenever possible
   * due to the caveats mentioned above. This method is still useful when the loop body needs to
   * use control flows such as {@code break} or {@code return}.
   */
  public static <T> Iterable<T> iterateOnce(Stream<T> stream) {
    return stream::iterator;
  }

  /**
   * Iterates through {@code stream} sequentially and passes each element to {@code consumer}
   * with exceptions propagated. For example:
   *
   * <pre>{@code
   *   void writeAll(Stream<?> stream, ObjectOutput out) throws IOException {
   *     iterateThrough(stream, out::writeObject);
   *   }
   * }</pre>
   */
  public static <T, E extends Throwable> void iterateThrough(
      Stream<? extends T> stream, CheckedConsumer<? super T, E> consumer) throws E {
    requireNonNull(consumer);
    for (T element : iterateOnce(stream)) {
      consumer.accept(element);
    }
  }

  /**
   * Dices {@code stream} into smaller chunks each with up to {@code maxSize} elements.
   *
   * <p>For a sequential stream, the first N-1 chunk's will contain exactly {@code maxSize}
   * elements and the last chunk may contain less (but never 0).
   * However for parallel streams, it's possible that the stream is split in roughly equal-sized
   * sub streams before being diced into smaller chunks, which then will result in more than one
   * chunks with less than {@code maxSize} elements.
   *
   * <p>This is an <a href="https://docs.oracle.com/javase/8/docs/api/java/util/stream/package-summary.html#StreamOps">
   * intermediary operation</a>.
   *
   * @param stream the source stream to be diced
   * @param maxSize the maximum size for each chunk
   * @return Stream of diced chunks each being a list of size up to {@code maxSize}
   * @throws IllegalStateException if {@code maxSize <= 0}
   */
  public static <T> Stream<List<T>> dice(Stream<? extends T> stream, int maxSize) {
    requireNonNull(stream);
    if (maxSize <= 0) throw new IllegalArgumentException();
    return mapBySpliterator(stream, Spliterator.NONNULL, it -> dice(it, maxSize));
  }

  /**
   * Dices {@code spliterator} into smaller chunks each with up to {@code maxSize} elements.
   *
   * @param spliterator the source spliterator to be diced
   * @param maxSize the maximum size for each chunk
   * @return Spliterator of diced chunks each being a list of size up to {@code maxSize}
   * @throws IllegalStateException if {@code maxSize <= 0}
   */
  public static <T> Spliterator<List<T>> dice(Spliterator<? extends T> spliterator, int maxSize) {
    requireNonNull(spliterator);
    if (maxSize <= 0) throw new IllegalArgumentException();
    return new DicedSpliterator<T>(spliterator, maxSize);
  }

  /**
   * Returns a collector that collects {@link Map} entries into a combined map. Duplicate keys cause {@link
   * IllegalStateException}. For example:
   *
   * <pre>{@code
   *   Map<FacultyId, Account> allFaculties = departments.stream()
   *       .map(Department::getFacultyMap)
   *       .collect(uniqueKeys());
   * }</pre>
   *
   * <p>Use {@link BiStream#groupingValuesFrom} if there are duplicate keys.
   *
   * @since 1.13
   */
  public static <K, V> Collector<Map<K, V>, ?, Map<K, V>> uniqueKeys() {
    return Collectors.collectingAndThen(
        BiStream.groupingValuesFrom(Map::entrySet,  (a, b) -> {
          throw new IllegalStateException("Duplicate keys not allowed: " + a);
        }),
        BiStream::toMap);
  }

  /**
   * Returns a collector that collects input elements into a list, which is then arranged by the
   * {@code arranger} function before being wrapped as <em>immutable</em> list result.
   * List elements are not allowed to be null.
   *
   * <p>Example usages: <ul>
   * <li>{@code stream.collect(toListAndThen(Collections::reverse))} to collect to reverse order.
   * <li>{@code stream.collect(toListAndThen(Collections::shuffle))} to collect and shuffle.
   * <li>{@code stream.collect(toListAndThen(Collections::sort))} to collect and sort.
   * </ul>
   *
   * @since 4.2
   */
  public static <T> Collector<T, ?, List<T>> toListAndThen(Consumer<? super List<T>> arranger) {
    requireNonNull(arranger);
    Collector<T, ?, List<T>> rejectingNulls =
        Collectors.mapping(Objects::requireNonNull, Collectors.toCollection(ArrayList::new));
    return Collectors.collectingAndThen(rejectingNulls, list -> {
      arranger.accept(list);
      return Collections.unmodifiableList(list);
    });
  }

  /**
   * Returns an infinite {@link Stream} starting from {@code firstIndex}.
   * Can be used together with {@link BiStream#zip} to iterate over a stream with index.
   * For example: {@code zip(indexesFrom(0), values)}.
   *
   * <p>To get a finite stream, use {@code indexesFrom(...).limit(size)}.
   *
   * <p>Note that while {@code indexesFrom(0)} will eventually incur boxing cost for every integer,
   * the JVM typically pre-caches small {@code Integer} instances (by default up to 127).
   *
   * @since 3.7
   */
  public static Stream<Integer> indexesFrom(int firstIndex) {
    return IntStream.iterate(firstIndex, i -> i + 1).boxed();
  }

  /**
   * Returns a (potentially infinite) stream of {@code collection} until {@code collection} becomes
   * empty.
   *
   * <p>The returned stream can be terminated by removing elements from the underlying collection
   * while the stream is being iterated.
   *
   * @since 3.8
   */
  public static <C extends Collection<?>> Stream<C> whileNotEmpty(C collection) {
    requireNonNull(collection);
    return whileNotNull(() -> collection.isEmpty() ? null : collection);
  }

  /**
   * Similar to {@link Stream#generate}, returns an infinite, sequential, unordered, and non-null
   * stream where each element is generated by the provided Supplier. The stream however will
   * terminate as soon as the Supplier returns null, in which case the null is treated as the
   * terminal condition and doesn't constitute a stream element.
   *
   * <p>For sequential iterations, {@code whileNotNll()} is usually more concise than implementing
   * {@link AbstractSpliterator} directly. The latter requires boilerplate that looks like this:
   *
   * <pre>{@code
   * return StreamSupport.stream(
   *     new AbstractSpliterator<T>(MAX_VALUE, NONNULL) {
   *       public boolean tryAdvance(Consumer<? super T> action) {
   *         if (hasData) {
   *           action.accept(data);
   *           return true;
   *         }
   *         return false;
   *       }
   *     }, false);
   * }</pre>
   *
   * Which is equivalent to the following one-liner using {@code whileNotNull()}:
   *
   * <pre>{@code
   * return whileNotNull(() -> hasData ? data : null);
   * }</pre>
   *
   * <p>Why null? Why not {@code Optional}? Wrapping every generated element of a stream in an
   * {@link Optional} carries considerable allocation cost. Also, while nulls are in general
   * discouraged, they are mainly a problem for users who have to remember to deal with them.
   * The stream returned by {@code whileNotNull()} on the other hand is guaranteed to never include
   * nulls that users have to worry about.
   *
   * <p>If you already have an {@code Optional} from a method return value, you can use {@code
   * whileNotNull(() -> optionalReturningMethod().orElse(null))}.
   *
   * <p>One may still need to implement {@code AbstractSpliterator} or {@link java.util.Iterator}
   * directly if null is a valid element (usually discouraged though).
   *
   * <p>If you have an imperative loop over a mutable queue or stack:
   *
   * <pre>{@code
   * while (!queue.isEmpty()) {
   *   int num = queue.poll();
   *   if (someCondition) {
   *     ...
   *   }
   * }
   * }</pre>
   *
   * it can be turned into a stream using {@code whileNotNull()}:
   *
   * <pre>{@code
   * whileNotNull(queue::poll).filter(someCondition)...
   * }</pre>
   *
   * @since 4.1
   */
  public static <T> Stream<T> whileNotNull(Supplier<? extends T> supplier) {
    requireNonNull(supplier);
    return StreamSupport.stream(
        new AbstractSpliterator<T>(Long.MAX_VALUE, Spliterator.NONNULL) {
          @Override public boolean tryAdvance(Consumer<? super T> action) {
            T element = supplier.get();
            if (element == null) return false;
            action.accept(element);
            return true;
          }
        }, false);
  }

  /**
   * Returns a collector that first copies all input elements into a new {@code Stream} and then
   * passes the stream to {@code toSink} function, which translates it to the final result.
   *
   * @since 3.6
   */
  static <T, R> Collector<T, ?, R> copying(Function<Stream<T>, R> toSink) {
    return Collectors.collectingAndThen(toStream(), toSink);
  }

  static <F, T> Stream<T> mapBySpliterator(
      Stream<F> stream, int characteristics,
      Function<? super Spliterator<F>, ? extends Spliterator<T>> mapper) {
    requireNonNull(mapper);
    Stream<T> mapped = StreamSupport.stream(
        () -> mapper.apply(stream.spliterator()), characteristics, stream.isParallel());
    mapped.onClose(stream::close);
    return mapped;
  }

  /** Copying input elements into another stream. */
  private static <T> Collector<T, ?, Stream<T>> toStream() {
    return Collector.of(
        Stream::<T>builder,
        Stream.Builder::add,
        (b1, b2) -> {
          b2.build().forEachOrdered(b1::add);
          return b1;
        },
        Stream.Builder::build);
  }

  private static <F, T> T splitThenWrap(
      Spliterator<F> from, Function<? super Spliterator<F>, ? extends T> wrapper) {
    Spliterator<F> it = from.trySplit();
    return it == null ? null : wrapper.apply(it);
  }

  private static final class DicedSpliterator<T> implements Spliterator<List<T>> {
    private final Spliterator<? extends T> underlying;
    private final int maxSize;

    DicedSpliterator(Spliterator<? extends T> underlying, int maxSize) {
      this.underlying = requireNonNull(underlying);
      this.maxSize = maxSize;
    }

    @Override public boolean tryAdvance(Consumer<? super List<T>> action) {
      requireNonNull(action);
      List<T> chunk = new ArrayList<>(chunkSize());
      for (int i = 0; i < maxSize && underlying.tryAdvance(chunk::add); i++) {}
      if (chunk.isEmpty()) return false;
      action.accept(chunk);
      return true;
    }

    @Override public Spliterator<List<T>> trySplit() {
      return splitThenWrap(underlying, it -> new DicedSpliterator<>(it, maxSize));
    }

    @Override public long estimateSize() {
      long size = underlying.estimateSize();
      return size == Long.MAX_VALUE ? Long.MAX_VALUE : estimateChunks(size);
    }

    @Override public long getExactSizeIfKnown() {
      return -1;
    }

    @Override public int characteristics() {
      return Spliterator.NONNULL;
    }

    private int chunkSize() {
      long estimate = underlying.estimateSize();
      if (estimate <= maxSize) return (int) estimate;
      // The user could set a large chunk size for an unknown-size stream, don't blow up memory.
      return estimate == Long.MAX_VALUE ? Math.min(maxSize, 8192) : maxSize;
    }

    private long estimateChunks(long size) {
      long lower = size / maxSize;
      return lower + ((size % maxSize == 0) ? 0 : 1);
    }
  }

  private static final class FlattenedSpliterator<T> implements Spliterator<T> {
    private final Spliterator<? extends Stream<? extends T>> blocks;
    private Spliterator<? extends T> currentBlock;
    private final Consumer<Stream<? extends T>> nextBlock = block -> {
      currentBlock = block.spliterator();
    };

    FlattenedSpliterator(Spliterator<? extends Stream<? extends T>> blocks) {
      this.blocks = requireNonNull(blocks);
    }

    private FlattenedSpliterator(
        Spliterator<? extends T> currentBlock, Spliterator<? extends Stream<? extends T>> blocks) {
      this.blocks = requireNonNull(blocks);
      this.currentBlock = currentBlock;
    }

    @Override public boolean tryAdvance(Consumer<? super T> action) {
      requireNonNull(action);
      if (currentBlock == null && !tryAdvanceBlock()) {
        return false;
      }
      boolean advanced = false;
      while ((!(advanced = currentBlock.tryAdvance(action))) && tryAdvanceBlock()) {}
      return advanced;
    }

    @Override public Spliterator<T> trySplit() {
      return splitThenWrap(blocks, it -> {
        Spliterator<T> result = new FlattenedSpliterator<>(currentBlock, it);
        currentBlock = null;
        return result;
      });
    }

    @Override public long estimateSize() {
      return Long.MAX_VALUE;
    }

    @Override public long getExactSizeIfKnown() {
      return -1;
    }

    @Override public int characteristics() {
      // While we maintain encounter order as long as 'blocks' does, returning an ordered stream
      // (which can be infinite) could surprise users when the user does things like
      // "parallel().limit(n)". It's sufficient for normal use cases to respect encounter order
      // without reporting order-ness.
      return 0;
    }

    private boolean tryAdvanceBlock() {
      return blocks.tryAdvance(nextBlock);
    }
  }

  private MoreStreams() {}
}