/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.orc;

import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo;
import org.apache.flink.api.common.typeinfo.SqlTimeTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.MapTypeInfo;
import org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.types.Row;

import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.orc.TypeDescription;

import java.lang.reflect.Array;
import java.math.BigDecimal;
import java.nio.charset.StandardCharsets;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
import java.util.function.DoubleFunction;
import java.util.function.Function;
import java.util.function.LongFunction;

/**
 * A class that provides utility methods for orc file reading.
 */
class OrcBatchReader {

	private static final long MILLIS_PER_DAY = 86400000; // = 24 * 60 * 60 * 1000
	private static final TimeZone LOCAL_TZ = TimeZone.getDefault();

	/**
	 * Converts an ORC schema to a Flink TypeInformation.
	 *
	 * @param schema The ORC schema.
	 * @return The TypeInformation that corresponds to the ORC schema.
	 */
	static TypeInformation schemaToTypeInfo(TypeDescription schema) {
		switch (schema.getCategory()) {
			case BOOLEAN:
				return BasicTypeInfo.BOOLEAN_TYPE_INFO;
			case BYTE:
				return BasicTypeInfo.BYTE_TYPE_INFO;
			case SHORT:
				return BasicTypeInfo.SHORT_TYPE_INFO;
			case INT:
				return BasicTypeInfo.INT_TYPE_INFO;
			case LONG:
				return BasicTypeInfo.LONG_TYPE_INFO;
			case FLOAT:
				return BasicTypeInfo.FLOAT_TYPE_INFO;
			case DOUBLE:
				return BasicTypeInfo.DOUBLE_TYPE_INFO;
			case DECIMAL:
				return BasicTypeInfo.BIG_DEC_TYPE_INFO;
			case STRING:
			case CHAR:
			case VARCHAR:
				return BasicTypeInfo.STRING_TYPE_INFO;
			case DATE:
				return SqlTimeTypeInfo.DATE;
			case TIMESTAMP:
				return SqlTimeTypeInfo.TIMESTAMP;
			case BINARY:
				return PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO;
			case STRUCT:
				List<TypeDescription> fieldSchemas = schema.getChildren();
				TypeInformation[] fieldTypes = new TypeInformation[fieldSchemas.size()];
				for (int i = 0; i < fieldSchemas.size(); i++) {
					fieldTypes[i] = schemaToTypeInfo(fieldSchemas.get(i));
				}
				String[] fieldNames = schema.getFieldNames().toArray(new String[]{});
				return new RowTypeInfo(fieldTypes, fieldNames);
			case LIST:
				TypeDescription elementSchema = schema.getChildren().get(0);
				TypeInformation<?> elementType = schemaToTypeInfo(elementSchema);
				// arrays of primitive types are handled as object arrays to support null values
				return ObjectArrayTypeInfo.getInfoFor(elementType);
			case MAP:
				TypeDescription keySchema = schema.getChildren().get(0);
				TypeDescription valSchema = schema.getChildren().get(1);
				TypeInformation<?> keyType = schemaToTypeInfo(keySchema);
				TypeInformation<?> valType = schemaToTypeInfo(valSchema);
				return new MapTypeInfo<>(keyType, valType);
			case UNION:
				throw new UnsupportedOperationException("UNION type is not supported yet.");
			default:
				throw new IllegalArgumentException("Unknown type " + schema);
		}
	}

	/**
	 * Fills an ORC batch into an array of Row.
	 *
	 * @param rows The batch of rows need to be filled.
	 * @param schema The schema of the ORC data.
	 * @param batch The ORC data.
	 * @param selectedFields The list of selected ORC fields.
	 * @return The number of rows that were filled.
	 */
	static int fillRows(Row[] rows, TypeDescription schema, VectorizedRowBatch batch, int[] selectedFields) {

		int rowsToRead = Math.min((int) batch.count(), rows.length);

		List<TypeDescription> fieldTypes = schema.getChildren();
		// read each selected field
		for (int fieldIdx = 0; fieldIdx < selectedFields.length; fieldIdx++) {
			int orcIdx = selectedFields[fieldIdx];
			readField(rows, fieldIdx, fieldTypes.get(orcIdx), batch.cols[orcIdx], rowsToRead);
		}
		return rowsToRead;
	}

	/**
	 * Reads a vector of data into an array of objects.
	 *
	 * @param vals The array that needs to be filled.
	 * @param fieldIdx If the vals array is an array of Row, the index of the field that needs to be filled.
	 *                 Otherwise a -1 must be passed and the data is directly filled into the array.
	 * @param schema The schema of the vector to read.
	 * @param vector The vector to read.
	 * @param childCount The number of vector entries to read.
	 */
	private static void readField(Object[] vals, int fieldIdx, TypeDescription schema, ColumnVector vector, int childCount) {

		// check the type of the vector to decide how to read it.
		switch (schema.getCategory()) {
			case BOOLEAN:
				if (vector.noNulls) {
					readNonNullLongColumn(vals, fieldIdx, (LongColumnVector) vector, childCount, OrcBatchReader::readBoolean);
				} else {
					readLongColumn(vals, fieldIdx, (LongColumnVector) vector, childCount, OrcBatchReader::readBoolean);
				}
				break;
			case BYTE:
				if (vector.noNulls) {
					readNonNullLongColumn(vals, fieldIdx, (LongColumnVector) vector, childCount, OrcBatchReader::readByte);
				} else {
					readLongColumn(vals, fieldIdx, (LongColumnVector) vector, childCount, OrcBatchReader::readByte);
				}
				break;
			case SHORT:
				if (vector.noNulls) {
					readNonNullLongColumn(vals, fieldIdx, (LongColumnVector) vector, childCount, OrcBatchReader::readShort);
				} else {
					readLongColumn(vals, fieldIdx, (LongColumnVector) vector, childCount, OrcBatchReader::readShort);
				}
				break;
			case INT:
				if (vector.noNulls) {
					readNonNullLongColumn(vals, fieldIdx, (LongColumnVector) vector, childCount, OrcBatchReader::readInt);
				} else {
					readLongColumn(vals, fieldIdx, (LongColumnVector) vector, childCount, OrcBatchReader::readInt);
				}
				break;
			case LONG:
				if (vector.noNulls) {
					readNonNullLongColumn(vals, fieldIdx, (LongColumnVector) vector, childCount, OrcBatchReader::readLong);
				} else {
					readLongColumn(vals, fieldIdx, (LongColumnVector) vector, childCount, OrcBatchReader::readLong);
				}
				break;
			case FLOAT:
				if (vector.noNulls) {
					readNonNullDoubleColumn(vals, fieldIdx, (DoubleColumnVector) vector, childCount, OrcBatchReader::readFloat);
				} else {
					readDoubleColumn(vals, fieldIdx, (DoubleColumnVector) vector, childCount, OrcBatchReader::readFloat);
				}
				break;
			case DOUBLE:
				if (vector.noNulls) {
					readNonNullDoubleColumn(vals, fieldIdx, (DoubleColumnVector) vector, childCount, OrcBatchReader::readDouble);
				} else {
					readDoubleColumn(vals, fieldIdx, (DoubleColumnVector) vector, childCount, OrcBatchReader::readDouble);
				}
				break;
			case CHAR:
			case VARCHAR:
			case STRING:
				if (vector.noNulls) {
					readNonNullBytesColumnAsString(vals, fieldIdx, (BytesColumnVector) vector, childCount);
				} else {
					readBytesColumnAsString(vals, fieldIdx, (BytesColumnVector) vector, childCount);
				}
				break;
			case DATE:
				if (vector.noNulls) {
					readNonNullLongColumnAsDate(vals, fieldIdx, (LongColumnVector) vector, childCount);
				} else {
					readLongColumnAsDate(vals, fieldIdx, (LongColumnVector) vector, childCount);
				}
				break;
			case TIMESTAMP:
				if (vector.noNulls) {
					readNonNullTimestampColumn(vals, fieldIdx, (TimestampColumnVector) vector, childCount);
				} else {
					readTimestampColumn(vals, fieldIdx, (TimestampColumnVector) vector, childCount);
				}
				break;
			case BINARY:
				if (vector.noNulls) {
					readNonNullBytesColumnAsBinary(vals, fieldIdx, (BytesColumnVector) vector, childCount);
				} else {
					readBytesColumnAsBinary(vals, fieldIdx, (BytesColumnVector) vector, childCount);
				}
				break;
			case DECIMAL:
				if (vector.noNulls) {
					readNonNullDecimalColumn(vals, fieldIdx, (DecimalColumnVector) vector, childCount);
				} else {
					readDecimalColumn(vals, fieldIdx, (DecimalColumnVector) vector, childCount);
				}
				break;
			case STRUCT:
				if (vector.noNulls) {
					readNonNullStructColumn(vals, fieldIdx, (StructColumnVector) vector, schema, childCount);
				} else {
					readStructColumn(vals, fieldIdx, (StructColumnVector) vector, schema, childCount);
				}
				break;
			case LIST:
				if (vector.noNulls) {
					readNonNullListColumn(vals, fieldIdx, (ListColumnVector) vector, schema, childCount);
				} else {
					readListColumn(vals, fieldIdx, (ListColumnVector) vector, schema, childCount);
				}
				break;
			case MAP:
				if (vector.noNulls) {
					readNonNullMapColumn(vals, fieldIdx, (MapColumnVector) vector, schema, childCount);
				} else {
					readMapColumn(vals, fieldIdx, (MapColumnVector) vector, schema, childCount);
				}
				break;
			case UNION:
				throw new UnsupportedOperationException("UNION type not supported yet");
			default:
				throw new IllegalArgumentException("Unknown type " + schema);
		}
	}

	private static <T> void readNonNullLongColumn(Object[] vals, int fieldIdx, LongColumnVector vector,
													int childCount, LongFunction<T> reader) {

		if (vector.isRepeating) { // fill complete column with first value
			T repeatingValue = reader.apply(vector.vector[0]);
			fillColumnWithRepeatingValue(vals, fieldIdx, repeatingValue, childCount);
		} else {
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					vals[i] = reader.apply(vector.vector[i]);
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					rows[i].setField(fieldIdx, reader.apply(vector.vector[i]));
				}
			}
		}
	}

	private static <T> void readNonNullDoubleColumn(Object[] vals, int fieldIdx, DoubleColumnVector vector,
													int childCount, DoubleFunction<T> reader) {

		if (vector.isRepeating) { // fill complete column with first value
			T repeatingValue = reader.apply(vector.vector[0]);
			fillColumnWithRepeatingValue(vals, fieldIdx, repeatingValue, childCount);
		} else {
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					vals[i] = reader.apply(vector.vector[i]);
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					rows[i].setField(fieldIdx, reader.apply(vector.vector[i]));
				}
			}
		}
	}

	private static void readNonNullBytesColumnAsString(Object[] vals, int fieldIdx, BytesColumnVector bytes, int childCount) {
		if (bytes.isRepeating) { // fill complete column with first value
			String repeatingValue = readString(bytes.vector[0], bytes.start[0], bytes.length[0]);
			fillColumnWithRepeatingValue(vals, fieldIdx, repeatingValue, childCount);
		} else {
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					vals[i] = readString(bytes.vector[i], bytes.start[i], bytes.length[i]);
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					rows[i].setField(fieldIdx, readString(bytes.vector[i], bytes.start[i], bytes.length[i]));
				}
			}
		}
	}

	private static void readNonNullBytesColumnAsBinary(Object[] vals, int fieldIdx, BytesColumnVector bytes, int childCount) {
		if (bytes.isRepeating) { // fill complete column with first value
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					// don't reuse repeating val to avoid object mutation
					vals[i] = readBinary(bytes.vector[0], bytes.start[0], bytes.length[0]);
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					// don't reuse repeating val to avoid object mutation
					rows[i].setField(fieldIdx, readBinary(bytes.vector[0], bytes.start[0], bytes.length[0]));
				}
			}
		} else {
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					vals[i] = readBinary(bytes.vector[i], bytes.start[i], bytes.length[i]);
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					rows[i].setField(fieldIdx, readBinary(bytes.vector[i], bytes.start[i], bytes.length[i]));
				}
			}
		}
	}

	private static void readNonNullLongColumnAsDate(Object[] vals, int fieldIdx, LongColumnVector vector, int childCount) {

		if (vector.isRepeating) { // fill complete column with first value
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					// do not reuse repeated value due to mutability of Date
					vals[i] = readDate(vector.vector[0]);
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					// do not reuse repeated value due to mutability of Date
					rows[i].setField(fieldIdx, readDate(vector.vector[0]));
				}
			}
		} else {
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					vals[i] = readDate(vector.vector[i]);
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					rows[i].setField(fieldIdx, readDate(vector.vector[i]));
				}
			}
		}
	}

	private static void readNonNullTimestampColumn(Object[] vals, int fieldIdx, TimestampColumnVector vector, int childCount) {

		if (vector.isRepeating) { // fill complete column with first value
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					// do not reuse value to prevent object mutation
					vals[i] = readTimestamp(vector.time[0], vector.nanos[0]);
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					// do not reuse value to prevent object mutation
					rows[i].setField(fieldIdx, readTimestamp(vector.time[0], vector.nanos[0]));
				}
			}
		} else {
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					vals[i] = readTimestamp(vector.time[i], vector.nanos[i]);
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					rows[i].setField(fieldIdx, readTimestamp(vector.time[i], vector.nanos[i]));
				}
			}
		}
	}

	private static void readNonNullDecimalColumn(Object[] vals, int fieldIdx, DecimalColumnVector vector, int childCount) {

		if (vector.isRepeating) { // fill complete column with first value
			fillColumnWithRepeatingValue(vals, fieldIdx, readBigDecimal(vector.vector[0]), childCount);
		} else {
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					vals[i] = readBigDecimal(vector.vector[i]);
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					rows[i].setField(fieldIdx, readBigDecimal(vector.vector[i]));
				}
			}
		}
	}

	private static void readNonNullStructColumn(Object[] vals, int fieldIdx, StructColumnVector structVector, TypeDescription schema, int childCount) {

		List<TypeDescription> childrenTypes = schema.getChildren();

		int numFields = childrenTypes.size();
		// create a batch of Rows to read the structs
		Row[] structs = new Row[childCount];
		// TODO: possible improvement: reuse existing Row objects
		for (int i = 0; i < childCount; i++) {
			structs[i] = new Row(numFields);
		}

		// read struct fields
		// we don't have to handle isRepeating because ORC assumes that it is propagated into the children.
		for (int i = 0; i < numFields; i++) {
			readField(structs, i, childrenTypes.get(i), structVector.fields[i], childCount);
		}

		if (fieldIdx == -1) { // set struct as an object
			System.arraycopy(structs, 0, vals, 0, childCount);
		} else { // set struct as a field of Row
			Row[] rows = (Row[]) vals;
			for (int i = 0; i < childCount; i++) {
				rows[i].setField(fieldIdx, structs[i]);
			}
		}
	}

	private static void readNonNullListColumn(Object[] vals, int fieldIdx, ListColumnVector list, TypeDescription schema, int childCount) {

		TypeDescription fieldType = schema.getChildren().get(0);
		// get class of list elements
		Class<?> classType = getClassForType(fieldType);

		if (list.isRepeating) {

			int offset = (int) list.offsets[0];
			int length = (int) list.lengths[0];
			// we only need to read until offset + length.
			int entriesToRead = offset + length;

			// read children
			Object[] children = (Object[]) Array.newInstance(classType, entriesToRead);
			readField(children, -1, fieldType, list.child, entriesToRead);

			// get function to copy list
			Function<Object, Object> copyList = getCopyFunction(schema);

			// create first list that will be copied
			Object[] first;
			if (offset == 0) {
				first = children;
			} else {
				first = (Object[]) Array.newInstance(classType, length);
				System.arraycopy(children, offset, first, 0, length);
			}

			// create copies of first list and set copies as result
			for (int i = 0; i < childCount; i++) {
				Object[] copy = (Object[]) copyList.apply(first);
				if (fieldIdx == -1) {
					vals[i] = copy;
				} else {
					((Row) vals[i]).setField(fieldIdx, copy);
				}
			}
		} else {

			// read children
			Object[] children = (Object[]) Array.newInstance(classType, list.childCount);
			readField(children, -1, fieldType, list.child, list.childCount);

			// fill lists with children
			for (int i = 0; i < childCount; i++) {
				int offset = (int) list.offsets[i];
				int length = (int) list.lengths[i];

				Object[] temp = (Object[]) Array.newInstance(classType, length);
				System.arraycopy(children, offset, temp, 0, length);
				if (fieldIdx == -1) {
					vals[i] = temp;
				} else {
					((Row) vals[i]).setField(fieldIdx, temp);
				}
			}
		}
	}

	private static void readNonNullMapColumn(Object[] vals, int fieldIdx, MapColumnVector mapsVector, TypeDescription schema, int childCount) {

		List<TypeDescription> fieldType = schema.getChildren();
		TypeDescription keyType = fieldType.get(0);
		TypeDescription valueType = fieldType.get(1);

		ColumnVector keys = mapsVector.keys;
		ColumnVector values = mapsVector.values;

		if (mapsVector.isRepeating) {
			// first map is repeated

			// get map copy function
			Function<Object, Object> copyMap = getCopyFunction(schema);

			// set all key and value entries except those of the first map to null
			int offset = (int) mapsVector.offsets[0];
			int length = (int) mapsVector.lengths[0];
			// we only need to read until offset + length.
			int entriesToRead = offset + length;

			Object[] keyRows = new Object[entriesToRead];
			Object[] valueRows = new Object[entriesToRead];

			// read map keys and values
			readField(keyRows, -1, keyType, keys, entriesToRead);
			readField(valueRows, -1, valueType, values, entriesToRead);

			// create first map that will be copied
			HashMap map = readHashMap(keyRows, valueRows, offset, length);

			// copy first map and set copy as result
			for (int i = 0; i < childCount; i++) {
				if (fieldIdx == -1) {
					vals[i] = copyMap.apply(map);
				} else {
					((Row) vals[i]).setField(fieldIdx, copyMap.apply(map));
				}
			}

		} else {

			Object[] keyRows = new Object[mapsVector.childCount];
			Object[] valueRows = new Object[mapsVector.childCount];

			// read map keys and values
			readField(keyRows, -1, keyType, keys, keyRows.length);
			readField(valueRows, -1, valueType, values, valueRows.length);

			long[] lengthVectorMap = mapsVector.lengths;
			int offset = 0;

			for (int i = 0; i < childCount; i++) {
				long numMapEntries = lengthVectorMap[i];
				HashMap map = readHashMap(keyRows, valueRows, offset, numMapEntries);
				offset += numMapEntries;

				if (fieldIdx == -1) {
					vals[i] = map;
				} else {
					((Row) vals[i]).setField(fieldIdx, map);
				}
			}
		}

	}

	private static <T> void readLongColumn(Object[] vals, int fieldIdx, LongColumnVector vector,
											int childCount, LongFunction<T> reader) {

		if (vector.isRepeating) { // fill complete column with first value
			if (vector.isNull[0]) {
				// fill vals with null values
				fillColumnWithRepeatingValue(vals, fieldIdx, null, childCount);
			} else {
				// read repeating non-null value by forwarding call.
				readNonNullLongColumn(vals, fieldIdx, vector, childCount, reader);
			}
		} else {
			boolean[] isNullVector = vector.isNull;
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						vals[i] = null;
					} else {
						vals[i] = reader.apply(vector.vector[i]);
					}
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						rows[i].setField(fieldIdx, null);
					} else {
						rows[i].setField(fieldIdx, reader.apply(vector.vector[i]));
					}
				}
			}
		}
	}

	private static <T> void readDoubleColumn(Object[] vals, int fieldIdx, DoubleColumnVector vector,
												int childCount, DoubleFunction<T> reader) {

		if (vector.isRepeating) { // fill complete column with first value
			if (vector.isNull[0]) {
				// fill vals with null values
				fillColumnWithRepeatingValue(vals, fieldIdx, null, childCount);
			} else {
				// read repeating non-null value by forwarding call
				readNonNullDoubleColumn(vals, fieldIdx, vector, childCount, reader);
			}
		} else {
			boolean[] isNullVector = vector.isNull;
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						vals[i] = null;
					} else {
						vals[i] = reader.apply(vector.vector[i]);
					}
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						rows[i].setField(fieldIdx, null);
					} else {
						rows[i].setField(fieldIdx, reader.apply(vector.vector[i]));
					}
				}
			}
		}
	}

	private static void readBytesColumnAsString(Object[] vals, int fieldIdx, BytesColumnVector bytes, int childCount) {

		if (bytes.isRepeating) { // fill complete column with first value
			if (bytes.isNull[0]) {
				// fill vals with null values
				fillColumnWithRepeatingValue(vals, fieldIdx, null, childCount);
			} else {
				// read repeating non-null value by forwarding call
				readNonNullBytesColumnAsString(vals, fieldIdx, bytes, childCount);
			}
		} else {
			boolean[] isNullVector = bytes.isNull;
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						vals[i] = null;
					} else {
						vals[i] = readString(bytes.vector[i], bytes.start[i], bytes.length[i]);
					}
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						rows[i].setField(fieldIdx, null);
					} else {
						rows[i].setField(fieldIdx, readString(bytes.vector[i], bytes.start[i], bytes.length[i]));
					}
				}
			}
		}
	}

	private static void readBytesColumnAsBinary(Object[] vals, int fieldIdx, BytesColumnVector bytes, int childCount) {

		if (bytes.isRepeating) { // fill complete column with first value
			if (bytes.isNull[0]) {
				// fill vals with null values
				fillColumnWithRepeatingValue(vals, fieldIdx, null, childCount);
			} else {
				// read repeating non-null value by forwarding call
				readNonNullBytesColumnAsBinary(vals, fieldIdx, bytes, childCount);
			}
		} else {
			boolean[] isNullVector = bytes.isNull;
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						vals[i] = null;
					} else {
						vals[i] = readBinary(bytes.vector[i], bytes.start[i], bytes.length[i]);
					}
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						rows[i].setField(fieldIdx, null);
					} else {
						rows[i].setField(fieldIdx, readBinary(bytes.vector[i], bytes.start[i], bytes.length[i]));
					}
				}
			}
		}
	}

	private static void readLongColumnAsDate(Object[] vals, int fieldIdx, LongColumnVector vector, int childCount) {

		if (vector.isRepeating) { // fill complete column with first value
			if (vector.isNull[0]) {
				// fill vals with null values
				fillColumnWithRepeatingValue(vals, fieldIdx, null, childCount);
			} else {
				// read repeating non-null value by forwarding call
				readNonNullLongColumnAsDate(vals, fieldIdx, vector, childCount);
			}
		} else {
			boolean[] isNullVector = vector.isNull;
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						vals[i] = null;
					} else {
						vals[i] = readDate(vector.vector[i]);
					}
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						rows[i].setField(fieldIdx, null);
					} else {
						rows[i].setField(fieldIdx, readDate(vector.vector[i]));
					}
				}
			}
		}
	}

	private static void readTimestampColumn(Object[] vals, int fieldIdx, TimestampColumnVector vector, int childCount) {

		if (vector.isRepeating) { // fill complete column with first value
			if (vector.isNull[0]) {
				// fill vals with null values
				fillColumnWithRepeatingValue(vals, fieldIdx, null, childCount);
			} else {
				// read repeating non-null value by forwarding call
				readNonNullTimestampColumn(vals, fieldIdx, vector, childCount);
			}
		} else {
			boolean[] isNullVector = vector.isNull;
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						vals[i] = null;
					} else {
						Timestamp ts = readTimestamp(vector.time[i], vector.nanos[i]);
						vals[i] = ts;
					}
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						rows[i].setField(fieldIdx, null);
					} else {
						Timestamp ts = readTimestamp(vector.time[i], vector.nanos[i]);
						rows[i].setField(fieldIdx, ts);
					}
				}
			}
		}
	}

	private static void readDecimalColumn(Object[] vals, int fieldIdx, DecimalColumnVector vector, int childCount) {

		if (vector.isRepeating) { // fill complete column with first value
			if (vector.isNull[0]) {
				// fill vals with null values
				fillColumnWithRepeatingValue(vals, fieldIdx, null, childCount);
			} else {
				// read repeating non-null value by forwarding call
				readNonNullDecimalColumn(vals, fieldIdx, vector, childCount);
			}
		} else {
			boolean[] isNullVector = vector.isNull;
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						vals[i] = null;
					} else {
						vals[i] = readBigDecimal(vector.vector[i]);
					}
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						rows[i].setField(fieldIdx, null);
					} else {
						rows[i].setField(fieldIdx, readBigDecimal(vector.vector[i]));
					}
				}
			}
		}
	}

	private static void readStructColumn(Object[] vals, int fieldIdx, StructColumnVector structVector, TypeDescription schema, int childCount) {

		List<TypeDescription> childrenTypes = schema.getChildren();

		int numFields = childrenTypes.size();

		// Early out if struct column is repeating and always null.
		// This is the only repeating case we need to handle.
		// ORC assumes that repeating values have been pushed to the children.
		if (structVector.isRepeating && structVector.isNull[0]) {
			if (fieldIdx < 0) {
				for (int i = 0; i < childCount; i++) {
					vals[i] = null;
				}
			} else {
				for (int i = 0; i < childCount; i++) {
					((Row) vals[i]).setField(fieldIdx, null);
				}
			}
			return;
		}

		// create a batch of Rows to read the structs
		Row[] structs = new Row[childCount];
		// TODO: possible improvement: reuse existing Row objects
		for (int i = 0; i < childCount; i++) {
			structs[i] = new Row(numFields);
		}

		// read struct fields
		for (int i = 0; i < numFields; i++) {
			ColumnVector fieldVector = structVector.fields[i];
			if (!fieldVector.isRepeating) {
				// Reduce fieldVector reads by setting all entries null where struct is null.
				if (fieldVector.noNulls) {
					// fieldVector had no nulls. Just use struct null information.
					System.arraycopy(structVector.isNull, 0, fieldVector.isNull, 0, structVector.isNull.length);
					structVector.fields[i].noNulls = false;
				} else {
					// fieldVector had nulls. Merge field nulls with struct nulls.
					for (int j = 0; j < structVector.isNull.length; j++) {
						structVector.fields[i].isNull[j] = structVector.isNull[j] || structVector.fields[i].isNull[j];
					}
				}
			}
			readField(structs, i, childrenTypes.get(i), structVector.fields[i], childCount);
		}

		boolean[] isNullVector = structVector.isNull;

		if (fieldIdx == -1) { // set struct as an object
			for (int i = 0; i < childCount; i++) {
				if (isNullVector[i]) {
					vals[i] = null;
				} else {
					vals[i] = structs[i];
				}
			}
		} else { // set struct as a field of Row
			Row[] rows = (Row[]) vals;
			for (int i = 0; i < childCount; i++) {
				if (isNullVector[i]) {
					rows[i].setField(fieldIdx, null);
				} else {
					rows[i].setField(fieldIdx, structs[i]);
				}
			}
		}
	}

	private static void readListColumn(Object[] vals, int fieldIdx, ListColumnVector list, TypeDescription schema, int childCount) {

		TypeDescription fieldType = schema.getChildren().get(0);
		// get class of list elements
		Class<?> classType = getClassForType(fieldType);

		if (list.isRepeating) {
			// list values are repeating. we only need to read the first list and copy it.

			if (list.isNull[0]) {
				// Even better. The first list is null and so are all lists are null
				for (int i = 0; i < childCount; i++) {
					if (fieldIdx == -1) {
						vals[i] = null;
					} else {
						((Row) vals[i]).setField(fieldIdx, null);
					}
				}

			} else {
				// Get function to copy list
				Function<Object, Object> copyList = getCopyFunction(schema);

				int offset = (int) list.offsets[0];
				int length = (int) list.lengths[0];
				// we only need to read until offset + length.
				int entriesToRead = offset + length;

				// read entries
				Object[] children = (Object[]) Array.newInstance(classType, entriesToRead);
				readField(children, -1, fieldType, list.child, entriesToRead);

				// create first list which will be copied
				Object[] temp;
				if (offset == 0) {
					temp = children;
				} else {
					temp = (Object[]) Array.newInstance(classType, length);
					System.arraycopy(children, offset, temp, 0, length);
				}

				// copy repeated list and set copy as result
				for (int i = 0; i < childCount; i++) {
					Object[] copy = (Object[]) copyList.apply(temp);
					if (fieldIdx == -1) {
						vals[i] = copy;
					} else {
						((Row) vals[i]).setField(fieldIdx, copy);
					}
				}
			}

		} else {
			if (!list.child.isRepeating) {
				boolean[] childIsNull = new boolean[list.childCount];
				Arrays.fill(childIsNull, true);
				// forward info of null lists into child vector
				for (int i = 0; i < childCount; i++) {
					// preserve isNull info of entries of non-null lists
					if (!list.isNull[i]) {
						int offset = (int) list.offsets[i];
						int length = (int) list.lengths[i];
						System.arraycopy(list.child.isNull, offset, childIsNull, offset, length);
					}
				}
				// override isNull of children vector
				list.child.isNull = childIsNull;
				list.child.noNulls = false;
			}

			// read children
			Object[] children = (Object[]) Array.newInstance(classType, list.childCount);
			readField(children, -1, fieldType, list.child, list.childCount);

			Object[] temp;
			// fill lists with children
			for (int i = 0; i < childCount; i++) {

				if (list.isNull[i]) {
					temp = null;
				} else {
					int offset = (int) list.offsets[i];
					int length = (int) list.lengths[i];

					temp = (Object[]) Array.newInstance(classType, length);
					System.arraycopy(children, offset, temp, 0, length);
				}

				if (fieldIdx == -1) {
					vals[i] = temp;
				} else {
					((Row) vals[i]).setField(fieldIdx, temp);
				}
			}
		}
	}

	private static void readMapColumn(Object[] vals, int fieldIdx, MapColumnVector map, TypeDescription schema, int childCount) {

		List<TypeDescription> fieldType = schema.getChildren();
		TypeDescription keyType = fieldType.get(0);
		TypeDescription valueType = fieldType.get(1);

		ColumnVector keys = map.keys;
		ColumnVector values = map.values;

		if (map.isRepeating) {
			// map values are repeating. we only need to read the first map and copy it.

			if (map.isNull[0]) {
				// Even better. The first map is null and so are all maps are null
				for (int i = 0; i < childCount; i++) {
					if (fieldIdx == -1) {
						vals[i] = null;
					} else {
						((Row) vals[i]).setField(fieldIdx, null);
					}
				}

			} else {
				// Get function to copy map
				Function<Object, Object> copyMap = getCopyFunction(schema);

				int offset = (int) map.offsets[0];
				int length = (int) map.lengths[0];
				// we only need to read until offset + length.
				int entriesToRead = offset + length;

				Object[] keyRows = new Object[entriesToRead];
				Object[] valueRows = new Object[entriesToRead];

				// read map keys and values
				readField(keyRows, -1, keyType, keys, entriesToRead);
				readField(valueRows, -1, valueType, values, entriesToRead);

				// create first map which will be copied
				HashMap temp = readHashMap(keyRows, valueRows, offset, length);

				// copy repeated map and set copy as result
				for (int i = 0; i < childCount; i++) {
					if (fieldIdx == -1) {
						vals[i] = copyMap.apply(temp);
					} else {
						((Row) vals[i]).setField(fieldIdx, copyMap.apply(temp));
					}
				}
			}
		} else {
			// ensure only keys and values that are referenced by non-null maps are set to non-null

			if (!keys.isRepeating) {
				// propagate is null info of map into keys vector
				boolean[] keyIsNull = new boolean[map.childCount];
				Arrays.fill(keyIsNull, true);
				for (int i = 0; i < childCount; i++) {
					// preserve isNull info for keys of non-null maps
					if (!map.isNull[i]) {
						int offset = (int) map.offsets[i];
						int length = (int) map.lengths[i];
						System.arraycopy(keys.isNull, offset, keyIsNull, offset, length);
					}
				}
				// override isNull of keys vector
				keys.isNull = keyIsNull;
				keys.noNulls = false;
			}
			if (!values.isRepeating) {
				// propagate is null info of map into values vector
				boolean[] valIsNull = new boolean[map.childCount];
				Arrays.fill(valIsNull, true);
				for (int i = 0; i < childCount; i++) {
					// preserve isNull info for vals of non-null maps
					if (!map.isNull[i]) {
						int offset = (int) map.offsets[i];
						int length = (int) map.lengths[i];
						System.arraycopy(values.isNull, offset, valIsNull, offset, length);
					}
				}
				// override isNull of values vector
				values.isNull = valIsNull;
				values.noNulls = false;
			}

			Object[] keyRows = new Object[map.childCount];
			Object[] valueRows = new Object[map.childCount];

			// read map keys and values
			readField(keyRows, -1, keyType, keys, keyRows.length);
			readField(valueRows, -1, valueType, values, valueRows.length);

			boolean[] isNullVector = map.isNull;
			long[] lengths = map.lengths;
			long[] offsets = map.offsets;

			if (fieldIdx == -1) { // set map as an object
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						vals[i] = null;
					} else {
						vals[i] = readHashMap(keyRows, valueRows, (int) offsets[i], lengths[i]);
					}
				}
			} else { // set map as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					if (isNullVector[i]) {
						rows[i].setField(fieldIdx, null);
					} else {
						rows[i].setField(fieldIdx, readHashMap(keyRows, valueRows, (int) offsets[i], lengths[i]));
					}
				}
			}
		}
	}

	/**
	 * Sets a repeating value to all objects or row fields of the passed vals array.
	 *
	 * @param vals The array of objects or Rows.
	 * @param fieldIdx If the objs array is an array of Row, the index of the field that needs to be filled.
	 *                 Otherwise a -1 must be passed and the data is directly filled into the array.
	 * @param repeatingValue The value that is set.
	 * @param childCount The number of times the value is set.
	 */
	private static void fillColumnWithRepeatingValue(Object[] vals, int fieldIdx, Object repeatingValue, int childCount) {

		if (fieldIdx == -1) {
			// set value as an object
			Arrays.fill(vals, 0, childCount, repeatingValue);
		} else {
			// set value as a field of Row
			Row[] rows = (Row[]) vals;
			for (int i = 0; i < childCount; i++) {
				rows[i].setField(fieldIdx, repeatingValue);
			}
		}
	}

	private static Class<?> getClassForType(TypeDescription schema) {

		// check the type of the vector to decide how to read it.
		switch (schema.getCategory()) {
			case BOOLEAN:
				return Boolean.class;
			case BYTE:
				return Byte.class;
			case SHORT:
				return Short.class;
			case INT:
				return Integer.class;
			case LONG:
				return Long.class;
			case FLOAT:
				return Float.class;
			case DOUBLE:
				return Double.class;
			case CHAR:
			case VARCHAR:
			case STRING:
				return String.class;
			case DATE:
				return Date.class;
			case TIMESTAMP:
				return Timestamp.class;
			case BINARY:
				return byte[].class;
			case DECIMAL:
				return BigDecimal.class;
			case STRUCT:
				return Row.class;
			case LIST:
				Class<?> childClass = getClassForType(schema.getChildren().get(0));
				return Array.newInstance(childClass, 0).getClass();
			case MAP:
				return HashMap.class;
			case UNION:
				throw new UnsupportedOperationException("UNION type not supported yet");
			default:
				throw new IllegalArgumentException("Unknown type " + schema);
		}
	}

	private static Boolean readBoolean(long l) {
		return l != 0;
	}

	private static Byte readByte(long l) {
		return (byte) l;
	}

	private static Short readShort(long l) {
		return (short) l;
	}

	private static Integer readInt(long l) {
		return (int) l;
	}

	private static Long readLong(long l) {
		return l;
	}

	private static Float readFloat(double d) {
		return (float) d;
	}

	private static Double readDouble(double d) {
		return d;
	}

	private static Date readDate(long l) {
		// day to milliseconds
		final long t = l * MILLIS_PER_DAY;
		// adjust by local timezone
		return new java.sql.Date(t - LOCAL_TZ.getOffset(t));
	}

	private static String readString(byte[] bytes, int start, int length) {
		return new String(bytes, start, length, StandardCharsets.UTF_8);
	}

	private static byte[] readBinary(byte[] src, int srcPos, int length) {
		byte[] result = new byte[length];
		System.arraycopy(src, srcPos, result, 0, length);
		return result;
	}

	private static BigDecimal readBigDecimal(HiveDecimalWritable hiveDecimalWritable) {
		HiveDecimal hiveDecimal = hiveDecimalWritable.getHiveDecimal();
		return hiveDecimal.bigDecimalValue();
	}

	private static Timestamp readTimestamp(long time, int nanos) {
		Timestamp ts = new Timestamp(time);
		ts.setNanos(nanos);
		return ts;
	}

	private static HashMap readHashMap(Object[] keyRows, Object[] valueRows, int offset, long length) {
		HashMap<Object, Object> resultMap = new HashMap<>();
		for (int j = 0; j < length; j++) {
			resultMap.put(keyRows[offset], valueRows[offset]);
			offset++;
		}
		return resultMap;
	}

	@SuppressWarnings("unchecked")
	private static Function<Object, Object> getCopyFunction(TypeDescription schema) {
		// check the type of the vector to decide how to read it.
		switch (schema.getCategory()) {
			case BOOLEAN:
			case BYTE:
			case SHORT:
			case INT:
			case LONG:
			case FLOAT:
			case DOUBLE:
			case CHAR:
			case VARCHAR:
			case STRING:
			case DECIMAL:
				return OrcBatchReader::returnImmutable;
			case DATE:
				return OrcBatchReader::copyDate;
			case TIMESTAMP:
				return OrcBatchReader::copyTimestamp;
			case BINARY:
				return OrcBatchReader::copyBinary;
			case STRUCT:
				List<TypeDescription> fieldTypes = schema.getChildren();
				Function<Object, Object>[] copyFields = new Function[fieldTypes.size()];
				for (int i = 0; i < fieldTypes.size(); i++) {
					copyFields[i] = getCopyFunction(fieldTypes.get(i));
				}
				return new CopyStruct(copyFields);
			case LIST:
				TypeDescription entryType = schema.getChildren().get(0);
				Function<Object, Object> copyEntry = getCopyFunction(entryType);
				Class entryClass = getClassForType(entryType);
				return new CopyList(copyEntry, entryClass);
			case MAP:
				TypeDescription keyType = schema.getChildren().get(0);
				TypeDescription valueType = schema.getChildren().get(1);
				Function<Object, Object> copyKey = getCopyFunction(keyType);
				Function<Object, Object> copyValue = getCopyFunction(valueType);
				return new CopyMap(copyKey, copyValue);
			case UNION:
				throw new UnsupportedOperationException("UNION type not supported yet");
			default:
				throw new IllegalArgumentException("Unknown type " + schema);
		}
	}

	private static Object returnImmutable(Object o) {
		return o;
	}

	private static Date copyDate(Object o) {
		if (o == null) {
			return null;
		} else {
			long date = ((Date) o).getTime();
			return new Date(date);
		}
	}

	private static Timestamp copyTimestamp(Object o) {
		if (o == null) {
			return null;
		} else {
			long millis = ((Timestamp) o).getTime();
			int nanos = ((Timestamp) o).getNanos();
			Timestamp copy = new Timestamp(millis);
			copy.setNanos(nanos);
			return copy;
		}
	}

	private static byte[] copyBinary(Object o) {
		if (o == null) {
			return null;
		} else {
			int length = ((byte[]) o).length;
			return Arrays.copyOf((byte[]) o, length);
		}
	}

	private static class CopyStruct implements Function<Object, Object> {

		private final Function<Object, Object>[] copyFields;

		CopyStruct(Function<Object, Object>[] copyFields) {
			this.copyFields = copyFields;
		}

		@Override
		public Object apply(Object o) {
			if (o == null) {
				return null;
			} else {
				Row r = (Row) o;
				Row copy = new Row(copyFields.length);
				for (int i = 0; i < copyFields.length; i++) {
					copy.setField(i, copyFields[i].apply(r.getField(i)));
				}
				return copy;
			}
		}
	}

	private static class CopyList implements Function<Object, Object> {

		private final Function<Object, Object> copyEntry;
		private final Class entryClass;

		CopyList(Function<Object, Object> copyEntry, Class entryClass) {
			this.copyEntry = copyEntry;
			this.entryClass = entryClass;
		}

		@Override
		public Object apply(Object o) {
			if (o == null) {
				return null;
			} else {
				Object[] l = (Object[]) o;
				Object[] copy = (Object[]) Array.newInstance(entryClass, l.length);
				for (int i = 0; i < l.length; i++) {
					copy[i] = copyEntry.apply(l[i]);
				}
				return copy;
			}
		}
	}

	@SuppressWarnings("unchecked")
	private static class CopyMap implements Function<Object, Object> {

		private final Function<Object, Object> copyKey;
		private final Function<Object, Object> copyValue;

		CopyMap(Function<Object, Object> copyKey, Function<Object, Object> copyValue) {
			this.copyKey = copyKey;
			this.copyValue = copyValue;
		}

		@Override
		public Object apply(Object o) {
			if (o == null) {
				return null;
			} else {
				Map<Object, Object> m = (Map<Object, Object>) o;
				HashMap<Object, Object> copy = new HashMap<>(m.size());

				for (Map.Entry<Object, Object> e : m.entrySet()) {
					Object keyCopy = copyKey.apply(e.getKey());
					Object valueCopy = copyValue.apply(e.getValue());
					copy.put(keyCopy, valueCopy);
				}
				return copy;
			}
		}
	}
}