 * Copyright (C) 2016 Google Inc.
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.

package com.google.cloud.solutions.samples.timeseries.dataflow.utils.aggregation;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
import com.google.cloud.dataflow.sdk.coders.Coder;
import com.google.cloud.dataflow.sdk.coders.CoderException;
import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
import com.google.cloud.dataflow.sdk.coders.ListCoder;
import com.google.cloud.dataflow.sdk.coders.protobuf.ProtoCoder;
import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
import com.google.cloud.solutions.samples.timeseries.dataflow.application.proto.TimeSeriesProtos.TSAggValueProto;
import com.google.cloud.solutions.samples.timeseries.dataflow.application.proto.TimeSeriesProtos.TSProto;
import com.google.cloud.solutions.samples.timeseries.dataflow.utils.aggregation.TimeseriesUtils.TSTuple;
import com.google.protobuf.TextFormat;

 * This combiner is designed to emulate keyed state by retaining the closing value of the last aggregation and propagating that value to the new aggregation for the window
public class CompleteTimeSeriesAggCombiner
    KeyedCombineFn<String, TSAggValueProto, CompleteTimeSeriesAggCombiner.Accum, List<TSAggValueProto>>
    implements Serializable {

  private static final Logger LOG = LoggerFactory.getLogger(CompleteTimeSeriesAggCombiner.class);

  public static class Accum {
    public TSProto lastCandle;

    // In the aggregation step we may get many Accum and this is not associative so to catch the
    // issue

    public List<TSAggValueProto> candles = new ArrayList<TSAggValueProto>();


  public Accum createAccumulator(String key) {
    return new Accum();

  public Accum addInput(String key, Accum accum, TSAggValueProto input) {

    // Add all new candles to the accum, this is not associative, every candle needs to be saved

    LOG.debug("Adding Accum " + TextFormat.shortDebugString(input));

    return accum;

  public Accum mergeAccumulators(String key, Iterable<Accum> accums) {

    for (Accum a : accums) {

    Accum aggregatedAccum = new Accum();

    // We can get these in any order so need to sort
    for (Accum accum : accums) {

      // If LastCandle is present add it to the accum
      if (accum.lastCandle != null) {
        aggregatedAccum.lastCandle = accum.lastCandle;



    return aggregatedAccum;

  // We may have multiple accum's, so each one needs its close value set to the previous one in the
  // list before being emitted forward

  public List<TSAggValueProto> extractOutput(String key, Accum accum) {

    List<TSTuple> candles = new ArrayList<TSTuple>();

    List<TSAggValueProto> completeCandles = new ArrayList<TSAggValueProto>();

    for (TSAggValueProto c : accum.candles) {
      candles.add(new TSTuple(c));


    // Put the candles in ts order

    Iterator<TSTuple> it = candles.iterator();

    TSTuple currentValue = null;

    while (it.hasNext()) {
      currentValue = it.next();

      // Update the current candle with the last close if available

      TSAggValueProto aggValueProto = currentValue.tsAggValueProto;

      if (accum.lastCandle != null) {
        aggValueProto =
            TimeseriesUtils.addTSOpenValue(currentValue.tsAggValueProto, accum.lastCandle).build();

        // The close state can mutate as a result of the change in open position
        accum.lastCandle = aggValueProto.getCloseState();
      } else {
        // If there is no candle then this is the first window pane we are dealing with and we need
        // to add something so we will add the close
        // TODO In this situation we should be pulling from an outside source from Dataflow to get
        // the correct last value
        aggValueProto =

        accum.lastCandle = currentValue.tsAggValueProto.getCloseState();


    // Clear down the accum list

    return completeCandles;

  public static Accum compact(Accum accum) {

    List<TSAggValueProto> delList = new ArrayList<TSAggValueProto>();

    // We need to do compaction of the accum here and not in extractOutput as it may not be the same
    // object as that stored locally
    if (accum.lastCandle != null) {
      for (TSAggValueProto ts : accum.candles) {
        if (ts.getCloseState().getTime() <= accum.lastCandle.getTime()) {


    return accum;


  static public class CompleteCandleAccumCoder extends AtomicCoder<Accum> {

    private CompleteCandleAccumCoder() {};

    private static final CompleteCandleAccumCoder INSTANCE = new CompleteCandleAccumCoder();

    public static Coder<Accum> of(Class<Accum> c) {
      return INSTANCE;

    private static final Coder<TSProto> TSPROTO_CODER = ProtoCoder.of(TSProto.class);

    private static final Coder<List<TSAggValueProto>> LIST_CODER = ListCoder.of(ProtoCoder

    public void encode(Accum value, OutputStream outStream,
        com.google.cloud.dataflow.sdk.coders.Coder.Context context) throws CoderException,
        IOException {

      TSPROTO_CODER.encode(value.lastCandle, outStream, context.nested());
      LIST_CODER.encode(value.candles, outStream, context.nested());


    public Accum decode(InputStream inStream,
        com.google.cloud.dataflow.sdk.coders.Coder.Context context) throws CoderException,
        IOException {
      Accum accum = new Accum();
      accum.lastCandle = TSPROTO_CODER.decode(inStream, context.nested());
      accum.candles = LIST_CODER.decode(inStream, context.nested());
      return accum;
