/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package hivemall.sketch.bloom;

import java.io.IOException;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.bloom.DynamicBloomFilter;
import org.apache.hadoop.util.bloom.Filter;
import org.apache.hadoop.util.bloom.Key;

//@formatter:off
@Description(name = "bloom",
        value = "_FUNC_(string key) - Constructs a BloomFilter by aggregating a set of keys",
        extended = "CREATE TABLE satisfied_movies AS \n" + 
                "  SELECT bloom(movieid) as movies\n" + 
                "  FROM (\n" + 
                "    SELECT movieid\n" + 
                "    FROM ratings\n" + 
                "    GROUP BY movieid\n" + 
                "    HAVING avg(rating) >= 4.0\n" + 
                "  ) t;")
//@formatter:on
@SuppressWarnings("deprecation")
public final class BloomFilterUDAF extends UDAF {

    public static class Evaluator implements UDAFEvaluator {

        private Filter filter;
        private Key key;

        @Override
        public void init() {
            this.filter = BloomFilterUtils.newDynamicBloomFilter();
            this.key = new Key();
        }

        public boolean iterate(@Nullable Text keyStr) {
            if (keyStr == null) {
                return true;
            }
            if (filter == null) {
                init();
            }

            key.set(keyStr.copyBytes(), 1.0d);
            filter.add(key);

            return true;
        }

        @Nonnull
        public Text terminatePartial() throws HiveException {
            try {
                return BloomFilterUtils.serialize(filter, new Text());
            } catch (IOException e) {
                throw new HiveException(e);
            }
        }

        public boolean merge(@Nonnull Text partial) throws HiveException {
            final DynamicBloomFilter other;
            try {
                other = BloomFilterUtils.deserialize(partial, new DynamicBloomFilter());
            } catch (IOException e) {
                throw new HiveException(e);
            }

            if (filter == null) {
                this.filter = other;
            } else {
                filter.or(other);
            }
            return true;
        }

        @Nullable
        public Text terminate() throws HiveException {
            if (filter == null) {
                return null;
            }

            try {
                return BloomFilterUtils.serialize(filter, new Text());
            } catch (IOException e) {
                throw new HiveException(e);
            }
        }

    }
}