/**
* Copyright 2016 ZuInnoTe (Jörn Franke) <[email protected]>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/

package org.zuinnote.hadoop.office.format.mapred;

import java.io.IOException;

import org.apache.hadoop.mapred.InputSplit;


import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.RecordReader;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.ArrayWritable;

import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log;

import java.security.GeneralSecurityException;

import org.zuinnote.hadoop.office.format.common.HadoopOfficeReadConfiguration;
import org.zuinnote.hadoop.office.format.common.parser.*;

public class ExcelFileInputFormat extends AbstractSpreadSheetDocumentFileInputFormat<ArrayWritable> {

private static final Log LOGIF = LogFactory.getLog(ExcelFileInputFormat.class.getName());

@Override
public  RecordReader<Text,ArrayWritable> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
/** Create reader **/
try {
		 // send configuration option to ms excel. The format of the Excel (old vs new) is detected automaitcally
 		job.set(HadoopOfficeReadConfiguration.CONF_MIMETYPE,"ms-excel");
		return new ExcelRecordReader( (FileSplit) split,job,reporter);
	} catch (FormatNotUnderstoodException e) {
		// log
		LOGIF.error(e);
	} catch (GeneralSecurityException gse) {
		LOGIF.error(gse);
	}
return null;
}

@Override
public void configure (JobConf conf) {
		// not used
	} 

	

	/**
	 * Unfortunately, we cannot split Excel documents correctly. Apache POI/library requires full documents.
	 * Nevertheless, most of the time you have anyway small (smaller than default HDFS blocksize) Office documents that can be processed fast. 
	 * Hence, you should put them in Hadoop Archives (HAR) either uncompressed or compressed to reduce load on namenode.
	 *
	*/
@Override
protected boolean isSplitable(FileSystem fs, Path file) {
	return false;
}	


}