/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.parquet.cascading;

import cascading.flow.Flow;
import cascading.flow.FlowProcess;
import cascading.flow.hadoop.HadoopFlowConnector;
import cascading.operation.BaseOperation;
import cascading.operation.Function;
import cascading.operation.FunctionCall;
import cascading.pipe.Each;
import cascading.pipe.Pipe;
import cascading.scheme.Scheme;
import cascading.scheme.hadoop.TextLine;
import cascading.tap.Tap;
import cascading.tap.hadoop.Hfs;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;

import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.thrift.protocol.TCompactProtocol;
import org.apache.thrift.protocol.TProtocol;
import org.apache.thrift.protocol.TProtocolFactory;
import org.apache.thrift.transport.TIOStreamTransport;
import org.junit.Test;
import static org.junit.Assert.*;

import org.apache.parquet.hadoop.thrift.ThriftToParquetFileWriter;
import org.apache.parquet.hadoop.util.ContextUtil;
import org.apache.parquet.thrift.test.Name;

import java.io.File;
import java.io.ByteArrayOutputStream;
import java.util.HashMap;
import java.util.Map;

@Deprecated // The parquet-cascading module depends on Cascading 2.x, and is being superseded with parquet-cascading3 for Cascading 3.x
public class TestParquetTBaseScheme {
  final String txtInputPath = "target/test-classes/names.txt";
  final String parquetInputPath = "target/test/ParquetTBaseScheme/names-parquet-in";
  final String parquetOutputPath = "target/test/ParquetTBaseScheme/names-parquet-out";
  final String txtOutputPath = "target/test/ParquetTBaseScheme/names-txt-out";

  @Test
  public void testWrite() throws Exception {
    Path path = new Path(parquetOutputPath);
    JobConf jobConf = new JobConf();
    final FileSystem fs = path.getFileSystem(jobConf);
    if (fs.exists(path)) fs.delete(path, true);

    Scheme sourceScheme = new TextLine( new Fields( "first", "last" ) );
    Tap source = new Hfs(sourceScheme, txtInputPath);

    Scheme sinkScheme = new ParquetTBaseScheme(Name.class);
    Tap sink = new Hfs(sinkScheme, parquetOutputPath);

    Pipe assembly = new Pipe( "namecp" );
    assembly = new Each(assembly, new PackThriftFunction());
    HadoopFlowConnector hadoopFlowConnector = new HadoopFlowConnector();
    Flow flow  = hadoopFlowConnector.connect("namecp", source, sink, assembly);

    flow.complete();

    assertTrue(fs.exists(new Path(parquetOutputPath)));
    assertTrue(fs.exists(new Path(parquetOutputPath + "/_metadata")));
    assertTrue(fs.exists(new Path(parquetOutputPath + "/_common_metadata")));
  }

  @Test
  public void testRead() throws Exception {
    doRead(new ParquetTBaseScheme(Name.class));
  }

  @Test
  public void testReadWithoutClass() throws Exception {
    doRead(new ParquetTBaseScheme());
  }

  private void doRead(Scheme sourceScheme) throws Exception {
    createFileForRead();

    Path path = new Path(txtOutputPath);
    final FileSystem fs = path.getFileSystem(new Configuration());
    if (fs.exists(path)) fs.delete(path, true);

    Tap source = new Hfs(sourceScheme, parquetInputPath);

    Scheme sinkScheme = new TextLine(new Fields("first", "last"));
    Tap sink = new Hfs(sinkScheme, txtOutputPath);

    Pipe assembly = new Pipe( "namecp" );
    assembly = new Each(assembly, new UnpackThriftFunction());
    Flow flow  = new HadoopFlowConnector().connect("namecp", source, sink, assembly);

    flow.complete();
    String result = FileUtils.readFileToString(new File(txtOutputPath+"/part-00000"));
    assertEquals("Alice\tPractice\nBob\tHope\nCharlie\tHorse\n", result);
  }


  private void createFileForRead() throws Exception {
    final Path fileToCreate = new Path(parquetInputPath+"/names.parquet");

    final Configuration conf = new Configuration();
    final FileSystem fs = fileToCreate.getFileSystem(conf);
    if (fs.exists(fileToCreate)) fs.delete(fileToCreate, true);

    TProtocolFactory protocolFactory = new TCompactProtocol.Factory();
    TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0);
    ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(fileToCreate, ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, Name.class);

    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos));

    Name n1 = new Name();
    n1.setFirst_name("Alice");
    n1.setLast_name("Practice");
    Name n2 = new Name();
    n2.setFirst_name("Bob");
    n2.setLast_name("Hope");
    Name n3 = new Name();
    n3.setFirst_name("Charlie");
    n3.setLast_name("Horse");

    n1.write(protocol);
    w.write(new BytesWritable(baos.toByteArray()));
    baos.reset();
    n2.write(protocol);
    w.write(new BytesWritable(baos.toByteArray()));
    baos.reset();
    n3.write(protocol);
    w.write(new BytesWritable(baos.toByteArray()));
    w.close();
  }

  private static class PackThriftFunction extends BaseOperation implements Function {
    @Override
    public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
      TupleEntry arguments = functionCall.getArguments();
      Tuple result = new Tuple();

      Name name = new Name();
      name.setFirst_name(arguments.getString(0));
      name.setLast_name(arguments.getString(1));

      result.add(name);
      functionCall.getOutputCollector().add(result);
    }
  }

  private static class UnpackThriftFunction extends BaseOperation implements Function {
    @Override
    public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
      TupleEntry arguments = functionCall.getArguments();
      Tuple result = new Tuple();

      Name name = (Name) arguments.get(0);
      result.add(name.getFirst_name());
      result.add(name.getLast_name());
      functionCall.getOutputCollector().add(result);
    }
  }
}