/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package datafu.hourglass.test;

import java.io.IOException;
import java.io.OutputStream;
import java.lang.reflect.Method;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import java.util.TimeZone;

import junit.framework.Assert;

import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.Schema.Type;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;

import datafu.hourglass.avro.AvroDateRangeMetadata;
import datafu.hourglass.fs.DatePath;
import datafu.hourglass.fs.DateRange;
import datafu.hourglass.fs.PathUtils;
import datafu.hourglass.jobs.PartitionCollapsingExecutionPlanner;
import datafu.hourglass.test.util.DailyTrackingWriter;

public class PartitionCollapsingExecutionPlannerTests extends TestBase
{
  private Logger _log = Logger.getLogger(PartitionCollapsingTests.class);
  
  private Path _inputPath = new Path(getDataPath(), "input");
  private Path _outputPath = new Path(getDataPath(), "output");
  
  private Properties _props;
  
  private static final Schema EVENT_SCHEMA;
  
  private DailyTrackingWriter _eventWriter;
  
  private int _maxDaysToProcess;
  private boolean _reusePreviousOutput;
  private String _startDate;
  private String _endDate;
  private Integer _numDays;
  private PartitionCollapsingExecutionPlanner _planner;
  
  static
  {    
    EVENT_SCHEMA = Schemas.createRecordSchema(PartitionCollapsingTests.class, "Event",
                                              new Field("id", Schema.create(Type.LONG), "ID", null));
  }
  
  public PartitionCollapsingExecutionPlannerTests() throws IOException
  {
    super();
  }
  
  @BeforeClass
  public void beforeClass() throws Exception
  {
    super.beforeClass();
  }
  
  @AfterClass
  public void afterClass() throws Exception
  {
    super.afterClass();
  }
  
  @BeforeMethod
  public void beforeMethod(Method method) throws IOException
  {
    _log.info("*** Running " + method.getName());
    
    _log.info("*** Cleaning input and output paths");  
    getFileSystem().delete(_inputPath, true);
    getFileSystem().delete(_outputPath, true);
    getFileSystem().mkdirs(_inputPath);
    getFileSystem().mkdirs(_outputPath);
    
    _maxDaysToProcess = 365;
    _numDays = null;
    _startDate = null;
    _endDate = null;
    _reusePreviousOutput = false;

    _planner = null;
               
    _eventWriter = new DailyTrackingWriter(_inputPath,EVENT_SCHEMA,getFileSystem());
  }
  
  @Test
  public void exactlyThreeDays() throws IOException, InterruptedException, ClassNotFoundException
  {
    _numDays = 3;
    
    createInput(2012,10,1);
    createInput(2012,10,2);
    createInput(2012,10,3);
    
    createPlan();
    
    checkInputSize(3);
    checkForInput(2012,10,1);
    checkForInput(2012,10,2);
    checkForInput(2012,10,3);    

    checkNewInputSize(3);
    checkForNewInput(2012,10,1);
    checkForNewInput(2012,10,2);
    checkForNewInput(2012,10,3);
    
    checkOldInputSize(0);
    checkReusingOutput(false);
  }
  
  /**
   * Tests that the most recent data is used. 
   * 
   * @throws IOException
   * @throws InterruptedException
   * @throws ClassNotFoundException
   */
  @Test
  public void latestThreeDays() throws IOException, InterruptedException, ClassNotFoundException
  {
    _numDays = 3;
    
    createInput(2012,10,1);
    createInput(2012,10,2);
    createInput(2012,10,3);
    createInput(2012,10,4);
    
    createPlan();
    
    checkInputSize(3);    
    checkForInput(2012,10,2);
    checkForInput(2012,10,3);
    checkForInput(2012,10,4);
    
    checkNewInputSize(3);
    checkForNewInput(2012,10,2);
    checkForNewInput(2012,10,3);
    checkForNewInput(2012,10,4);
    
    checkOldInputSize(0);
    checkReusingOutput(false);
  }
  
  /**
   * Tests that the previous output can be reused, even when there are two new days since the previous
   * result.
   * 
   * @throws IOException
   * @throws InterruptedException
   * @throws ClassNotFoundException
   */
  @Test
  public void previousOutputReuse() throws IOException, InterruptedException, ClassNotFoundException
  {
    _numDays = 8;
    _reusePreviousOutput = true;
    
    createInput(2012,10,1);
    createInput(2012,10,2);
    createInput(2012,10,3);
    createInput(2012,10,4);
    createInput(2012,10,5);
    createInput(2012,10,6);
    createInput(2012,10,7);
    createInput(2012,10,8);
    createInput(2012,10,9);
    createInput(2012,10,10);
    
    createOutput(new DateRange(getDate(2012,10,1),getDate(2012,10,8)));
    
    createPlan();
    
    checkNewInputSize(2);
    checkForNewInput(2012,10,9);
    checkForNewInput(2012,10,10);
    
    checkOldInputSize(2);
    checkForOldInput(2012,10,1);
    checkForOldInput(2012,10,2);
    
    checkInputSize(4);    
    checkForInput(2012,10,1);
    checkForInput(2012,10,2);
    checkForInput(2012,10,9);
    checkForInput(2012,10,10);
    
    checkReusingOutput(true);
  }
  
  /**
   * Tests that the previous output will not be reused when the window size is small.
   * It is more work to reuse the previous output in this case because the old input has
   * to be subtracted from the previous result.
   * 
   * @throws IOException
   * @throws InterruptedException
   * @throws ClassNotFoundException
   */
  @Test
  public void previousOutputNoReuseSmallWindow() throws IOException, InterruptedException, ClassNotFoundException
  {
    _numDays = 2;
    _reusePreviousOutput = true;
    
    createInput(2012,10,1);
    createInput(2012,10,2);
    createInput(2012,10,3);
    
    createOutput(new DateRange(getDate(2012,10,1),getDate(2012,10,2)));
    
    createPlan();
    
    checkNewInputSize(2);
    checkForNewInput(2012,10,2);
    checkForNewInput(2012,10,3);
    
    checkOldInputSize(0);
    
    checkInputSize(2);    
    checkForInput(2012,10,2);
    checkForInput(2012,10,3);
    
    checkReusingOutput(false);
  }
  
  /**
   * Tests that the previous output won't be reused when it is too old.  This would require subtracting off
   * all the old input data, then adding the new data.  It is better to just use the new data and not reuse
   * the previous output.
   * 
   * @throws IOException
   * @throws InterruptedException
   * @throws ClassNotFoundException
   */
  @Test
  public void previousOutputNoReuseTooOld() throws IOException, InterruptedException, ClassNotFoundException
  {
    _numDays = 8;
    _reusePreviousOutput = true;
    
    for (int i=1; i<=20; i++)
    {
      createInput(2012,10,i);
    }
    
    // previous output too old to be useful
    createOutput(new DateRange(getDate(2012,10,1),getDate(2012,10,8)));
    
    createPlan();
    
    checkNewInputSize(8);
    for (int i=13; i<=20; i++)
    {
      checkForNewInput(2012,10,i);
    }
    
    checkOldInputSize(0);
    
    checkInputSize(8);    
    for (int i=13; i<=20; i++)
    {
      checkForInput(2012,10,i);
    }
    
    checkReusingOutput(false);
  }
  
  private void checkForInput(int year, int month, int day)
  {
    checkForPath(_planner.getInputsToProcess(),year,month,day);
  }
  
  private void checkForNewInput(int year, int month, int day)
  {
    checkForPath(_planner.getNewInputsToProcess(),year,month,day);
  }
  
  private void checkForOldInput(int year, int month, int day)
  {
    checkForPath(_planner.getOldInputsToProcess(),year,month,day);
  }
  
  private void checkForPath(List<DatePath> paths, int year, int month, int day)
  {
    Date date = getDate(year,month,day);
    DatePath datePath = DatePath.createNestedDatedPath(_inputPath.makeQualified(getFileSystem()),date);
    for (DatePath dp : paths)
    {
      if (dp.equals(datePath))
      {
        return;
      }
    }
    Assert.fail(String.format("Could not find %s",datePath));
  }
  
  private Date getDate(int year, int month, int day)
  {
    Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
    cal.set(Calendar.YEAR, year);
    cal.set(Calendar.MONTH, month-1);
    cal.set(Calendar.DAY_OF_MONTH, day);
    cal.set(Calendar.HOUR_OF_DAY, 0);
    cal.set(Calendar.MINUTE, 0);
    cal.set(Calendar.SECOND, 0);
    cal.set(Calendar.MILLISECOND, 0);
    return cal.getTime();
  }
  
  private void checkInputSize(int size)
  {
    Assert.assertEquals(size,_planner.getInputsToProcess().size());
  }
  
  private void checkNewInputSize(int size)
  {
    Assert.assertEquals(size,_planner.getNewInputsToProcess().size());
  }
  
  private void checkOldInputSize(int size)
  {
    Assert.assertEquals(size,_planner.getOldInputsToProcess().size());
  }
  
  private void checkReusingOutput(boolean reuse)
  {
    Assert.assertEquals(reuse, _planner.getPreviousOutputToProcess() != null);
  }
  
  private void createInput(int year, int month, int day) throws IOException
  {
    _eventWriter.open(year, month, day);
    _eventWriter.close();
  }
  
  private void createOutput(DateRange dateRange) throws IOException
  {
    DataFileWriter<GenericRecord> dataWriter;
    OutputStream outputStream;
    
    Path path = new Path(_outputPath,PathUtils.datedPathFormat.format(dateRange.getEndDate()));
    
    Schema ouputSchema = Schemas.createRecordSchema(PartitionCollapsingTests.class, "Output",
                                              new Field("id", Schema.create(Type.LONG), "ID", null));
    
    outputStream = getFileSystem().create(new Path(path, "part-00000.avro"));
    
    GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>();
    dataWriter = new DataFileWriter<GenericRecord>(writer);      
    
    dataWriter.setMeta(AvroDateRangeMetadata.METADATA_DATE_START,
                       Long.toString(dateRange.getBeginDate().getTime()));
    
    dataWriter.setMeta(AvroDateRangeMetadata.METADATA_DATE_END,
                       Long.toString(dateRange.getEndDate().getTime()));
    
    dataWriter.create(ouputSchema, outputStream);
        
    // empty file
    
    dataWriter.close();
    outputStream.close();
    dataWriter = null;
    outputStream = null; 
  }
  
  private void createPlan() throws IOException, InterruptedException, ClassNotFoundException
  {
    _props = newTestProperties();
    
    _planner = new PartitionCollapsingExecutionPlanner(getFileSystem(),_props);
    
    _planner.setNumDays(_numDays);
    _planner.setMaxToProcess(_maxDaysToProcess);
    _planner.setInputPaths(Arrays.asList(_inputPath));
    _planner.setOutputPath(_outputPath);
    _planner.setReusePreviousOutput(_reusePreviousOutput);
    
    if (_startDate != null)
    {
      try
      {
        _planner.setStartDate(PathUtils.datedPathFormat.parse(_startDate));
      }
      catch (ParseException e)
      {
        Assert.fail(e.toString());
      }
    }
    
    if (_endDate != null)
    {
      try
      {
        _planner.setEndDate(PathUtils.datedPathFormat.parse(_endDate));
      }
      catch (ParseException e)
      {
        Assert.fail(e.toString());
      }
    }
    
    _planner.createPlan();
  }
}