 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *     http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.

package com.facebook.hive.orc;

import static com.facebook.hive.orc.OrcTestUtils.byteBuf;
import static com.facebook.hive.orc.OrcTestUtils.bytes;
import static com.facebook.hive.orc.OrcTestUtils.inner;
import static com.facebook.hive.orc.OrcTestUtils.list;
import static com.facebook.hive.orc.OrcTestUtils.map;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertFalse;
import static junit.framework.Assert.assertNotNull;
import static junit.framework.Assert.assertNull;
import static junit.framework.Assert.assertTrue;

import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;

import com.facebook.hive.orc.compression.CompressionKind;
import com.facebook.hive.orc.statistics.BooleanColumnStatistics;
import com.facebook.hive.orc.statistics.ColumnStatistics;
import com.facebook.hive.orc.statistics.DoubleColumnStatistics;
import com.facebook.hive.orc.statistics.IntegerColumnStatistics;
import com.facebook.hive.orc.statistics.StringColumnStatistics;
import com.google.common.collect.ImmutableList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.ReaderWriterProfiler;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;

import com.facebook.hive.orc.OrcTestUtils.BigRow;
import com.facebook.hive.orc.OrcTestUtils.DoubleRow;
import com.facebook.hive.orc.OrcTestUtils.InnerStruct;
import com.facebook.hive.orc.OrcTestUtils.IntStruct;
import com.facebook.hive.orc.OrcTestUtils.MiddleStruct;
import com.facebook.hive.orc.OrcTestUtils.ReallyBigRow;
import com.facebook.hive.orc.OrcTestUtils.StringListWithId;
import com.facebook.hive.orc.OrcTestUtils.StringStruct;
import com.facebook.hive.orc.lazy.LazyTreeReader;
import com.facebook.hive.orc.lazy.OrcLazyBinary;
import com.facebook.hive.orc.lazy.OrcLazyBoolean;
import com.facebook.hive.orc.lazy.OrcLazyByte;
import com.facebook.hive.orc.lazy.OrcLazyDouble;
import com.facebook.hive.orc.lazy.OrcLazyFloat;
import com.facebook.hive.orc.lazy.OrcLazyInt;
import com.facebook.hive.orc.lazy.OrcLazyList;
import com.facebook.hive.orc.lazy.OrcLazyLong;
import com.facebook.hive.orc.lazy.OrcLazyMap;
import com.facebook.hive.orc.lazy.OrcLazyObject;
import com.facebook.hive.orc.lazy.OrcLazyObjectInspectorUtils;
import com.facebook.hive.orc.lazy.OrcLazyRow;
import com.facebook.hive.orc.lazy.OrcLazyShort;
import com.facebook.hive.orc.lazy.OrcLazyString;
import com.facebook.hive.orc.lazy.OrcLazyStruct;
import com.facebook.hive.orc.lazy.OrcLazyTimestamp;
import com.facebook.hive.orc.lazy.OrcLazyUnion;

 * Tests for the top level reader/streamFactory of ORC files.
public class TestOrcFile {

  Path workDir = new Path(System.getProperty("test.tmp.dir",
      "target" + File.separator + "test" + File.separator + "tmp"));

  Configuration conf;
  FileSystem fs;
  Path testFilePath, testFilePath2;

  public TestName testCaseName = new TestName();

  public void openFileSystem () throws Exception {
    conf = new Configuration();
    fs = FileSystem.getLocal(conf);
    testFilePath = new Path(workDir, "TestOrcFile." +
        testCaseName.getMethodName() + ".orc");
    testFilePath2 = new Path(workDir, "TestOrcFile2." +
        testCaseName.getMethodName() + ".orc");
    fs.delete(testFilePath, false);
    fs.delete(testFilePath2, false);

  public void testHash() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        100000, CompressionKind.ZLIB, 10000, 10000);
    writer.addRow(new BigRow(false, (byte) 1, (short) 1, 1,
        1L, (float) 1.0, 1.0, bytes(1), "1",
        new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
        list(inner(3, "good"), inner(4, "bad")),
        map(inner(3, "good"), inner(4, "bad"))));
    writer.addRow(new BigRow(null, null, null, null,
        null, null, null, null, null, null, null, null));
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(1, row.getFieldValue(0).hashCode());
    assertEquals(1, row.getFieldValue(1).hashCode());
    assertEquals(1, row.getFieldValue(2).hashCode());
    assertEquals(1, row.getFieldValue(3).hashCode());
    assertEquals(1, row.getFieldValue(4).hashCode());
    assertEquals(1065353216, row.getFieldValue(5).hashCode());
    assertEquals(1072693248, row.getFieldValue(6).hashCode());
    assertEquals(32, row.getFieldValue(7).hashCode());
    assertEquals(80, row.getFieldValue(8).hashCode());
    assertEquals(8417130, row.getFieldValue(9).hashCode());
    assertEquals(127296452, row.getFieldValue(10).hashCode());
    assertEquals(7, row.getFieldValue(11).hashCode());

    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(0, row.getFieldValue(0).hashCode());
    assertEquals(0, row.getFieldValue(1).hashCode());
    assertEquals(0, row.getFieldValue(2).hashCode());
    assertEquals(0, row.getFieldValue(3).hashCode());
    assertEquals(0, row.getFieldValue(4).hashCode());
    assertEquals(0, row.getFieldValue(5).hashCode());
    assertEquals(0, row.getFieldValue(6).hashCode());
    assertEquals(0, row.getFieldValue(7).hashCode());
    assertEquals(0, row.getFieldValue(8).hashCode());
    assertEquals(0, row.getFieldValue(9).hashCode());
    assertEquals(0, row.getFieldValue(10).hashCode());
    assertEquals(0, row.getFieldValue(11).hashCode());

  public void testDeepCopy() throws Exception {
    // Create a table and write a row to it
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        100000, CompressionKind.ZLIB, 10000, 10000);
    writer.addRow(new BigRow(false, (byte) 1, (short) 1, 1,
        1L, (float) 1.0, 1.0, bytes(1), "1",
        new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
        list(inner(3, "good"), inner(4, "bad")),
        map(inner(3, "good"), inner(4, "bad"))));


    // Prepare to tread back the row
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();

    // Check that the object read equals what is expected, then copy the object, and make the same
    // check
    OrcLazyObject obj;
        ((BooleanWritable) ((OrcLazyBoolean) row.getFieldValue(0)).materialize()).get());
    obj = new OrcLazyBoolean((OrcLazyBoolean) row.getFieldValue(0));
    assertEquals(false, ((BooleanWritable) obj.materialize()).get());

    assertEquals(1, ((ByteWritable) ((OrcLazyByte) row.getFieldValue(1)).materialize()).get());
    obj = new OrcLazyByte((OrcLazyByte) row.getFieldValue(1));
    assertEquals(1, ((ByteWritable) obj.materialize()).get());

    assertEquals(1, ((ShortWritable) ((OrcLazyShort) row.getFieldValue(2)).materialize()).get());
    obj = new OrcLazyShort((OrcLazyShort) row.getFieldValue(2));
    assertEquals(1, ((ShortWritable) obj.materialize()).get());

    assertEquals(1, ((IntWritable) ((OrcLazyInt) row.getFieldValue(3)).materialize()).get());
    obj = new OrcLazyInt((OrcLazyInt) row.getFieldValue(3));
    assertEquals(1, ((IntWritable) obj.materialize()).get());

    assertEquals(1, ((LongWritable) ((OrcLazyLong) row.getFieldValue(4)).materialize()).get());
    obj = new OrcLazyLong((OrcLazyLong) row.getFieldValue(4));
    assertEquals(1, ((LongWritable) obj.materialize()).get());

        ((FloatWritable) ((OrcLazyFloat) row.getFieldValue(5)).materialize()).get());
    obj = new OrcLazyFloat((OrcLazyFloat) row.getFieldValue(5));
    assertEquals(1.0f, ((FloatWritable) obj.materialize()).get());

        ((DoubleWritable) ((OrcLazyDouble) row.getFieldValue(6)).materialize()).get());
    obj = new OrcLazyDouble((OrcLazyDouble) row.getFieldValue(6));
    assertEquals(1.0, ((DoubleWritable) obj.materialize()).get());

    assertEquals(bytes(1), ((OrcLazyBinary) row.getFieldValue(7)).materialize());
    obj = new OrcLazyBinary((OrcLazyBinary) row.getFieldValue(7));
    assertEquals(bytes(1), obj.materialize());

    assertEquals("1", ((Text) ((OrcLazyString) row.getFieldValue(8)).materialize()).toString());
    obj = new OrcLazyString((OrcLazyString) row.getFieldValue(8));
    assertEquals("1", ((Text) obj.materialize()).toString());

    // Currently copies are not supported for complex types

  public void testSeekAcrossChunks() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (DoubleRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);

    // Create a table consisting of a single column of doubles
    // Add enough values to it to get 3 index strides (doubles are 8 bytes) more is ok
    // Note that the compression buffer size and index stride length are very important
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 2097152,
        CompressionKind.ZLIB, 262144, 10000);
    Random rand = new Random(42);
    double[] values = new double[131702];

    // The first compression block is all 0's
    for (int i = 0; i < 32768; i++) {
      values[i] = 0;
      writer.addRow(new DoubleRow(values[i]));

    // The second compression block is random doubles
    for (int i = 0; i < 32768; i++) {
      values[i + 32768] = rand.nextDouble();
      writer.addRow(new DoubleRow(values[i + 32768]));

    // The third compression block is all 0's
    // (important so it compresses to the same size as the first)
    for (int i = 0; i < 32768; i++) {
      values[i + 32768 + 32768] = 0;
      writer.addRow(new DoubleRow(values[i + 32768 + 32768]));

    // The fourth compression block is random
    for (int i = 0; i < 32768; i++) {
      values[i + 32768 + 32768 + 32768] = rand.nextDouble();
      writer.addRow(new DoubleRow(values[i + 32768 + 32768 + 32768]));

    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_READ_COMPRESSION_STRIDES, 2);
    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_EAGER_HDFS_READ, false);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);

    StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
    List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
    DoubleObjectInspector columnInspector =
        (DoubleObjectInspector) fields.get(0).getFieldObjectInspector();

    RecordReader rows = reader.rows(null);
    Object row = null;

    // Skip enough values to get to the 2nd index stride in the first chunk
    for (int i = 0; i < 40001; i++) {
      row = rows.next(row);

    // This will set previousOffset to be the size of the first compression block and the
    // compressionOffset to some other value (doesn't matter what point is it's different from the
    // start of the compression block)
    assertEquals(values[40000], columnInspector.get(readerInspector.getStructFieldData(row,

    // Skip enough values to get to the 2nd index stride of the second chunk
    for (int i = 0; i < 80000; i++) {

    // When seek is called, previousOffset will equal newCompressedOffset since the former is the
    // the length of the first compression block and the latter is the length of the third
    // compression block (remember the chunks contain 2 index strides), so if we only check this
    // (or for some other reason) we will not adjust compressedIndex, we will read the wrong data
    assertEquals(values[120000], columnInspector.get(readerInspector.getStructFieldData(row, fields.get(0))));


  public void test1() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        100000, CompressionKind.ZLIB, 10000, 10000);
    writer.addRow(new BigRow(false, (byte) 1, (short) 1024, 65536,
        Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0,1,2,3,4), "hi",
        new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
        list(inner(3, "good"), inner(4, "bad")),
    writer.addRow(new BigRow(true, (byte) 100, (short) 2048, 65536,
        Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye",
        new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
        list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
        map(inner(5,"chani"), inner(1,"mauddib"))));
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);

    // check the stats
    ColumnStatistics[] stats = reader.getStatistics();
    assertEquals(2, stats[1].getNumberOfValues());
    assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
    assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
    assertEquals("count: 2 true: 1", stats[1].toString());

    assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
    assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
    assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
    assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
    assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",

        ((IntegerColumnStatistics) stats[5]).getMaximum());
        ((IntegerColumnStatistics) stats[5]).getMinimum());
    assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
    assertEquals("count: 2 min: 9223372036854775807 max: 9223372036854775807",

    assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
    assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
    assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
    assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",

    assertEquals("count: 2 min: bye max: hi", stats[9].toString());

    // check the inspectors
    StructObjectInspector readerInspector =
        (StructObjectInspector) reader.getObjectInspector();
        + "int1:int,long1:bigint,float1:float,double1:double,bytes1:"
        + "binary,string1:string,middle:struct<list:array<struct<int1:int,"
        + "string1:string>>>,list:array<struct<int1:int,string1:string>>,"
        + "map:map<string,struct<int1:int,string1:string>>>",
    List<? extends StructField> fields =
    BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector.
    ByteObjectInspector by = (ByteObjectInspector) readerInspector.
    ShortObjectInspector sh = (ShortObjectInspector) readerInspector.
    IntObjectInspector in = (IntObjectInspector) readerInspector.
    LongObjectInspector lo = (LongObjectInspector) readerInspector.
    FloatObjectInspector fl = (FloatObjectInspector) readerInspector.
    DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector.
    BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.
    StringObjectInspector st = (StringObjectInspector) readerInspector.
    StructObjectInspector mid = (StructObjectInspector) readerInspector.
    List<? extends StructField> midFields =
    ListObjectInspector midli =
        (ListObjectInspector) midFields.get(0).getFieldObjectInspector();
    StructObjectInspector inner = (StructObjectInspector)
    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    IntObjectInspector inner_in = (IntObjectInspector) inFields.get(0).getFieldObjectInspector();
    StringObjectInspector inner_st = (StringObjectInspector) inFields.get(1).getFieldObjectInspector();
    ListObjectInspector li = (ListObjectInspector) readerInspector.
    MapObjectInspector ma = (MapObjectInspector) readerInspector.
    StructObjectInspector lc = (StructObjectInspector)
    StringObjectInspector mk = (StringObjectInspector)
    StructObjectInspector mv = (StructObjectInspector)
    RecordReader rows = reader.rows(null);
    Object row = rows.next(null);
    // check the contents of the first row
        bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals(1, by.get(readerInspector.getStructFieldData(row,
    assertEquals(1024, sh.get(readerInspector.getStructFieldData(row,
    assertEquals(65536, in.get(readerInspector.getStructFieldData(row,
    assertEquals(Long.MAX_VALUE, lo.get(readerInspector.
        getStructFieldData(row, fields.get(4))));
    assertEquals(1.0, fl.get(readerInspector.getStructFieldData(row,
        fields.get(5))), 0.00001);
    assertEquals(-15.0, dbl.get(readerInspector.getStructFieldData(row,
        fields.get(6))), 0.00001);
    assertEquals(bytes(0,1,2,3,4), bi.getPrimitiveWritableObject(
        readerInspector.getStructFieldData(row, fields.get(7))));
    assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.
        getStructFieldData(row, fields.get(8))));
    List<?> midRow = midli.getList(mid.getStructFieldData(readerInspector.
        getStructFieldData(row, fields.get(9)), midFields.get(0)));
    assertEquals(2, midRow.size());
    assertEquals(1, inner_in.get(inner.getStructFieldData(midRow.get(0),
    assertEquals("bye", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (midRow.get(0), inFields.get(1))));
    assertEquals(2, inner_in.get(inner.getStructFieldData(midRow.get(1),
    assertEquals("sigh", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (midRow.get(1), inFields.get(1))));
    List<?> list = li.getList(readerInspector.getStructFieldData(row,
    assertEquals(2, list.size());
    assertEquals(3, inner_in.get(inner.getStructFieldData(list.get(0),
    assertEquals("good", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (list.get(0), inFields.get(1))));
    assertEquals(4, inner_in.get(inner.getStructFieldData(list.get(1),
    assertEquals("bad", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (list.get(1), inFields.get(1))));
    Map<?,?> map = ma.getMap(readerInspector.getStructFieldData(row,
    assertEquals(0, map.size());

    // check the contents of second row
    assertEquals(true, rows.hasNext());
    row = rows.next(row);
        bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals(100, by.get(readerInspector.getStructFieldData(row,
    assertEquals(2048, sh.get(readerInspector.getStructFieldData(row,
    assertEquals(65536, in.get(readerInspector.getStructFieldData(row,
    assertEquals(Long.MAX_VALUE, lo.get(readerInspector.
        getStructFieldData(row, fields.get(4))));
    assertEquals(2.0, fl.get(readerInspector.getStructFieldData(row,
        fields.get(5))), 0.00001);
    assertEquals(-5.0, dbl.get(readerInspector.getStructFieldData(row,
        fields.get(6))), 0.00001);
    assertEquals(bytes(), bi.getPrimitiveWritableObject(
        readerInspector.getStructFieldData(row, fields.get(7))));
    assertEquals("bye", st.getPrimitiveJavaObject(readerInspector.
        getStructFieldData(row, fields.get(8))));
    midRow = midli.getList(mid.getStructFieldData(readerInspector.
        getStructFieldData(row, fields.get(9)), midFields.get(0)));
    assertEquals(2, midRow.size());
    assertEquals(1, inner_in.get(inner.getStructFieldData(midRow.get(0),
    assertEquals("bye", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (midRow.get(0), inFields.get(1))));
    assertEquals(2, inner_in.get(inner.getStructFieldData(midRow.get(1),
    assertEquals("sigh", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (midRow.get(1), inFields.get(1))));
    list = li.getList(readerInspector.getStructFieldData(row,
    assertEquals(3, list.size());
    assertEquals(100000000, inner_in.get(inner.getStructFieldData(list.get(0),
    assertEquals("cat", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (list.get(0), inFields.get(1))));
    assertEquals(-100000, inner_in.get(inner.getStructFieldData(list.get(1),
    assertEquals("in", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (list.get(1), inFields.get(1))));
    assertEquals(1234, inner_in.get(inner.getStructFieldData(list.get(2),
    assertEquals("hat", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (list.get(2), inFields.get(1))));
    map = ma.getMap(readerInspector.getStructFieldData(row,
    assertEquals(2, map.size());
    boolean[] found = new boolean[2];
    for(Object key: map.keySet()) {
      String str = mk.getPrimitiveJavaObject(key);
      if (str.equals("chani")) {
        assertEquals(false, found[0]);
        assertEquals(5, inner_in.get(inner.getStructFieldData(map.get(key),
        assertEquals(str, inner_st.getPrimitiveJavaObject(
            inner.getStructFieldData(map.get(key), inFields.get(1))));
        found[0] = true;
      } else if (str.equals("mauddib")) {
        assertEquals(false, found[1]);
        assertEquals(1, inner_in.get(inner.getStructFieldData(map.get(key),
        assertEquals(str, inner_st.getPrimitiveJavaObject(
            inner.getStructFieldData(map.get(key), inFields.get(1))));
        found[1] = true;
      } else {
        throw new IllegalArgumentException("Unknown key " + str);
    assertEquals(true, found[0]);
    assertEquals(true, found[1]);

    // handle the close up
    assertEquals(false, rows.hasNext());

  public void testColumnProjection() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        1000, CompressionKind.NONE, 100, 1000);
    Random r1 = new Random(1);
    Random r2 = new Random(2);
    int x;
    int minInt=0, maxInt=0;
    String y;
    String minStr = null, maxStr = null;
    for(int i=0; i < 21000; ++i) {
      x = r1.nextInt();
      y = Long.toHexString(r2.nextLong());
      if (i == 0 || x < minInt) {
        minInt = x;
      if (i == 0 || x > maxInt) {
        maxInt = x;
      if (i == 0 || y.compareTo(minStr) < 0) {
        minStr = y;
      if (i == 0 || y.compareTo(maxStr) > 0) {
        maxStr = y;
      writer.addRow(inner(x, y));
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);

    // check out the statistics
    ColumnStatistics[] stats = reader.getStatistics();
    assertEquals(3, stats.length);
    for(ColumnStatistics s: stats) {
      assertEquals(21000, s.getNumberOfValues());
      if (s instanceof IntegerColumnStatistics) {
        assertEquals(minInt, ((IntegerColumnStatistics) s).getMinimum());
        assertEquals(maxInt, ((IntegerColumnStatistics) s).getMaximum());
      } else if (s instanceof StringColumnStatistics) {
        assertEquals(maxStr, ((StringColumnStatistics) s).getMaximum());
        assertEquals(minStr, ((StringColumnStatistics) s).getMinimum());

    // check out the types
    List<OrcProto.Type> types = reader.getTypes();
    assertEquals(3, types.size());
    assertEquals(OrcProto.Type.Kind.STRUCT, types.get(0).getKind());
    assertEquals(2, types.get(0).getSubtypesCount());
    assertEquals(1, types.get(0).getSubtypes(0));
    assertEquals(2, types.get(0).getSubtypes(1));
    assertEquals(OrcProto.Type.Kind.INT, types.get(1).getKind());
    assertEquals(0, types.get(1).getSubtypesCount());
    assertEquals(OrcProto.Type.Kind.STRING, types.get(2).getKind());
    assertEquals(0, types.get(2).getSubtypesCount());

    // read the contents and make sure they match
    RecordReader rows1 = reader.rows(new boolean[]{true, true, false});
    RecordReader rows2 = reader.rows(new boolean[]{true, false, true});
    r1 = new Random(1);
    r2 = new Random(2);
    OrcLazyStruct row1 = null;
    OrcLazyStruct row2 = null;
    for(int i = 0; i < 21000; ++i) {
      assertEquals(true, rows1.hasNext());
      assertEquals(true, rows2.hasNext());
      row1 = (OrcLazyStruct) rows1.next(row1);
      row2 = (OrcLazyStruct) rows2.next(row2);
      assertEquals(r1.nextInt(), ((IntWritable) ((OrcLazyInt) ((OrcStruct) row1.materialize()).getFieldValue(0)).materialize()).get());
          ((OrcLazyString) ((OrcStruct) row2.materialize()).getFieldValue(1)).materialize().toString());
    assertEquals(false, rows1.hasNext());
    assertEquals(false, rows2.hasNext());

  public void testEmptyFile() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        1000, CompressionKind.NONE, 100, 10000);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    assertEquals(false, reader.rows(null).hasNext());
    assertEquals(CompressionKind.NONE, reader.getCompression());
    assertEquals(0, reader.getNumberOfRows());
    assertEquals(0, reader.getCompressionSize());
    assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
    assertEquals(3, reader.getContentLength());
    assertEquals(false, reader.getStripes().iterator().hasNext());

  public void testMetaData() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        1000, CompressionKind.NONE, 100, 10000);
    writer.addUserMetadata("my.meta", byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127, -128));
    writer.addUserMetadata("clobber", byteBuf(1,2,3));
    writer.addUserMetadata("clobber", byteBuf(4,3,2,1));
    ByteBuffer bigBuf = ByteBuffer.allocate(40000);
    Random random = new Random(0);
    writer.addUserMetadata("big", bigBuf);
    writer.addRow(new BigRow(true, (byte) 127, (short) 1024, 42,
        42L * 1024 * 1024 * 1024, (float) 3.1415, -2.713, null,
        null, null, null, null));
    writer.addUserMetadata("clobber", byteBuf(5,7,11,13,17,19));
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    assertEquals(byteBuf(5,7,11,13,17,19), reader.getMetadataValue("clobber"));
    assertEquals(bigBuf, reader.getMetadataValue("big"));
    try {
    } catch (IllegalArgumentException iae) {
      // PASS
    int i = 0;
    for(String key: reader.getMetadataKeys()) {
      if ("my.meta".equals(key) ||
          "clobber".equals(key) ||
          "big".equals(key)) {
        i += 1;
      } else {
        throw new IllegalArgumentException("unknown key " + key);
    assertEquals(3, i);

   * We test union and timestamp separately since we need to make the
   * object inspector manually. (The Hive reflection-based doesn't handle
   * them properly.)
  public void testUnionAndTimestamp() throws Exception {
    final List<OrcProto.Type> types = ImmutableList.of(

    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = OrcLazyObjectInspectorUtils.createWritableObjectInspector(0, types);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        15 * 1024, CompressionKind.NONE, 100, 10000);
    OrcStruct row = new OrcStruct(types.get(0).getFieldNamesList());
    OrcUnion union = new OrcUnion();
    row.setFieldValue(1, union);
    row.setFieldValue(0, Timestamp.valueOf("2000-03-12 15:00:00"));
    union.set((byte) 0, new IntWritable(42));
    row.setFieldValue(0, Timestamp.valueOf("2000-03-20 12:00:00.123456789"));
    union.set((byte)1, new Text("hello"));
    row.setFieldValue(0, null);
    row.setFieldValue(1, null);
    row.setFieldValue(1, union);
    union.set((byte) 0, null);
    union.set((byte) 1, null);
    union.set((byte) 0, new IntWritable(200000));
    row.setFieldValue(0, Timestamp.valueOf("1900-01-01 00:00:00"));
    for(int i=1900; i < 2200; ++i) {
      row.setFieldValue(0, Timestamp.valueOf(i + "-05-05 12:34:56." + i));
      if ((i & 1) == 0) {
        union.set((byte) 0, new IntWritable(i*i));
      } else {
        union.set((byte) 1, new Text(new Integer(i*i).toString()));
    // let's add a lot of constant rows to test the rle
    row.setFieldValue(0, null);
    union.set((byte) 0, new IntWritable(1732050807));
    for(int i=0; i < 5000; ++i) {
    union.set((byte) 0, new IntWritable(0));
    union.set((byte) 0, new IntWritable(10));
    union.set((byte) 0, new IntWritable(138));
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
    assertEquals(5309, reader.getNumberOfRows());
    int stripeCount = 0;
    int rowCount = 0;
    long currentOffset = -1;
    for(StripeInformation stripe: reader.getStripes()) {
      stripeCount += 1;
      rowCount += stripe.getNumberOfRows();
      if (currentOffset < 0) {
        currentOffset = stripe.getOffset() + stripe.getIndexLength() +
            stripe.getDataLength() + stripe.getFooterLength();
      } else {
        assertEquals(currentOffset, stripe.getOffset());
        currentOffset += stripe.getIndexLength() +
            stripe.getDataLength() + stripe.getFooterLength();
    assertEquals(reader.getNumberOfRows(), rowCount);
    assertEquals(2, stripeCount);
    assertEquals(reader.getContentLength(), currentOffset);
    RecordReader rows = reader.rows(null);
    assertEquals(0, rows.getRowNumber());
    assertEquals(0.0, rows.getProgress(), 0.000001);
    assertEquals(true, rows.hasNext());
    OrcLazyStruct lazyRow = (OrcLazyStruct) rows.next(null);
    row = (OrcStruct) lazyRow.materialize();
    inspector = reader.getObjectInspector();
    assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
        ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp());
    union = (OrcUnion) ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(0, union.getTag());
    assertEquals(new IntWritable(42), union.getObject());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"),
        ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp());
    ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(1, union.getTag());
    assertEquals(new Text("hello"), union.getObject());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(null, ((OrcLazyObject) row.getFieldValue(0)).materialize());
    assertEquals(null, ((OrcLazyObject) row.getFieldValue(1)).materialize());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(null, ((OrcLazyObject) row.getFieldValue(0)).materialize());
    union = (OrcUnion) ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(0, union.getTag());
    assertEquals(null, union.getObject());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(null, ((OrcLazyObject) row.getFieldValue(0)).materialize());
    ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(1, union.getTag());
    assertEquals(null, union.getObject());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(Timestamp.valueOf("1900-01-01 00:00:00"),
        ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp());
    ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(new IntWritable(200000), union.getObject());
    for(int i=1900; i < 2200; ++i) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(Timestamp.valueOf(i + "-05-05 12:34:56." + i),
          ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp());
      ((OrcLazyUnion) row.getFieldValue(1)).materialize();
      if ((i & 1) == 0) {
        assertEquals(0, union.getTag());
        assertEquals(new IntWritable(i*i), union.getObject());
      } else {
        assertEquals(1, union.getTag());
        assertEquals(new Text(new Integer(i*i).toString()), union.getObject());
    for(int i=0; i < 5000; ++i) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      ((OrcLazyUnion) row.getFieldValue(1)).materialize();
      assertEquals(new IntWritable(1732050807), union.getObject());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(new IntWritable(0), union.getObject());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(new IntWritable(10), union.getObject());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(new IntWritable(138), union.getObject());
    assertEquals(false, rows.hasNext());
    assertEquals(1.0, rows.getProgress(), 0.00001);
    assertEquals(reader.getNumberOfRows(), rows.getRowNumber());

   * Read and write a randomly generated snappy file.
   * @throws Exception
  public void testSnappy() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        1000, CompressionKind.SNAPPY, 100, 10000);
    Random rand = new Random(12);
    for(int i=0; i < 10000; ++i) {
      writer.addRow(new InnerStruct(rand.nextInt(),
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    rand = new Random(12);
    OrcLazyStruct row = null;
    for(int i=0; i < 10000; ++i) {
      assertEquals(true, rows.hasNext());
      row = (OrcLazyStruct) rows.next(row);
      assertEquals(rand.nextInt(), ((IntWritable) ((OrcLazyInt) ((OrcStruct) row.materialize()).getFieldValue(0)).materialize()).get());
          ((OrcLazyString) ((OrcStruct) row.materialize()).getFieldValue(1)).materialize().toString());
    assertEquals(false, rows.hasNext());

   * Read and write a randomly generated snappy file.
   * @throws Exception
  public void testWithoutIndex() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector

    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        5000, CompressionKind.SNAPPY, 1000, 0);
    Random rand = new Random(24);
    for(int i=0; i < 10000; ++i) {
      InnerStruct row = new InnerStruct(rand.nextInt(),
      for(int j=0; j< 5; ++j) {
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    assertEquals(50000, reader.getNumberOfRows());
    assertEquals(0, reader.getRowIndexStride());
    StripeInformation stripe = reader.getStripes().iterator().next();
    assertEquals(true, stripe.getDataLength() != 0);
    assertEquals(0, stripe.getIndexLength());
    RecordReader rows = reader.rows(null);
    rand = new Random(24);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for(int i=0; i < 10000; ++i) {
      int intVal = rand.nextInt();
      String strVal = Integer.toBinaryString(rand.nextInt());
      for(int j=0; j < 5; ++j) {
        assertEquals(true, rows.hasNext());
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        assertEquals(intVal, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        assertEquals(strVal, ((OrcLazyString) row.getFieldValue(1)).materialize().toString());
    assertEquals(false, rows.hasNext());

  private static class RandomRowInputs {
    long[] intValues;
    double[] doubleValues;
    String[] stringValues;
    BytesWritable[] byteValues;
    String[] words = new String[128];

    public RandomRowInputs(int count) {
      intValues= new long[count];
      doubleValues = new double[count];
      stringValues = new String[count];
      byteValues = new BytesWritable[count];

  private RandomRowInputs writeRandomRows(int count, boolean lowMemoryMode) throws IOException {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (ReallyBigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);

    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_FILE_ENABLE_LOW_MEMORY_MODE, lowMemoryMode);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        8000000, CompressionKind.ZLIB, 65536, 1000,
        new MemoryManager(conf));
    Random rand = new Random(42);
    RandomRowInputs inputs = new RandomRowInputs(count);
    long[] intValues = inputs.intValues;
    double[] doubleValues = inputs.doubleValues;
    String[] stringValues = inputs.stringValues;
    BytesWritable[] byteValues = inputs.byteValues;
    String[] words = inputs.words;
    for(int i=0; i < words.length; ++i) {
      words[i] = Integer.toHexString(rand.nextInt());
    for(int i=0; i < count/2; ++i) {
      intValues[2*i] = rand.nextLong();
      intValues[2*i+1] = rand.nextLong();
      stringValues[2*i] = words[rand.nextInt(words.length)];
      stringValues[2*i+1] = words[rand.nextInt(words.length)];
    for(int i=0; i < count; ++i) {
      doubleValues[i] = rand.nextDouble();
      byte[] buf = new byte[20];
      byteValues[i] = new BytesWritable(buf);
    for(int i=0; i < count; ++i) {
      ReallyBigRow bigrow = createRandomRow(intValues, doubleValues, stringValues,
          byteValues, words, i);
    writer = null;
    return inputs;

  private static enum NumberOfNulls {
    // No nulls
    // Every nth value is null
    // Every nth value is NOT null

  private void compareRowsUsingPrimitives(ReallyBigRow expected, OrcLazyBoolean boolean1,
      OrcLazyShort short1, OrcLazyInt int1, OrcLazyLong long1, OrcLazyShort short2,
      OrcLazyInt int2, OrcLazyLong long2, OrcLazyShort short3, OrcLazyInt int3, OrcLazyLong long3,
      OrcLazyFloat float1, OrcLazyDouble double1) throws IOException {
    try {
      boolean b1 = boolean1.materializeBoolean();
      assertEquals(expected.boolean1.booleanValue(), b1);
    catch(IOException e) {

    if (short1.nextIsNull()) {
    } else {
          ((ShortWritable) short1.materialize()).get());

    try {
      int i1 = int1.materializeInt();
      assertEquals(expected.int1.intValue(), i1);
    catch(IOException e) {

    try {
      long l1 =  long1.materializeLong();
      assertEquals(expected.long1.longValue(), l1);
    catch(IOException e) {

    try {
      short s2 = short2.materializeShort();
      assertEquals(expected.short2.shortValue(), s2);
    catch(IOException e) {

    try {
      int i2 = int2.materializeInt();
      assertEquals(expected.int2.intValue(), i2);
    catch(IOException e) {

    try {
      long l2 = long2.materializeLong();
      assertEquals(expected.long2.longValue(), l2);
    catch(IOException e) {

    try {
      short s3 = short3.materializeShort();
      assertEquals(expected.short3.shortValue(), s3);
    catch(IOException e) {

    try {
      int i3 = int3.materializeInt();
      assertEquals(expected.int3.intValue(), i3);
    catch(IOException e) {

    try {
      long l3 = long3.materializeLong();
      assertEquals(expected.long3.longValue(), l3);
    catch(IOException e) {

    try {
      float f1 = float1.materializeFloat();
      assertEquals(expected.float1.floatValue(), f1, 0.0001);
    catch(IOException e) {

    try {
      double d1 = double1.materializeDouble();
      assertEquals(expected.double1.doubleValue(), d1, 0.0001);
    catch(IOException e) {

  private void compareRows(OrcStruct row, RandomRowInputs inputs, int rowNumber,
      NumberOfNulls numNulls, boolean testPrimitives) throws Exception {
    ReallyBigRow expected = null ;
    switch (numNulls) {
    case MANY:
    case SOME:
      expected = createRandomRowWithNulls(inputs.intValues, inputs.doubleValues,
          inputs.stringValues, inputs.byteValues, inputs.words, rowNumber, numNulls);
    case NONE:
      expected = createRandomRow(inputs.intValues, inputs.doubleValues,
          inputs.stringValues, inputs.byteValues, inputs.words, rowNumber);
    OrcLazyBoolean boolean1 = (OrcLazyBoolean) row.getFieldValue(0);
    if (boolean1.nextIsNull()) {
    } else {
          ((BooleanWritable) boolean1.materialize()).get());

    if (((OrcLazyObject) row.getFieldValue(1)).nextIsNull()) {
    } else {
          ((ByteWritable) ((OrcLazyByte) row.getFieldValue(1)).materialize()).get());

    OrcLazyShort short1 = (OrcLazyShort) row.getFieldValue(2);
      short s1 = short1.materializeShort();
      assertEquals(expected.short1.shortValue(), s1);
    catch(IOException e) {

    OrcLazyInt int1 = (OrcLazyInt)row.getFieldValue(3);
    if (int1.nextIsNull()) {
    } else {
          ((IntWritable) int1.materialize()).get());

    OrcLazyLong long1 = (OrcLazyLong)row.getFieldValue(4);
    if (long1.nextIsNull()) {
    } else {
          ((LongWritable) long1.materialize()).get());

    OrcLazyShort short2 = (OrcLazyShort)row.getFieldValue(5);
    if (short2.nextIsNull()) {
    } else {
          ((ShortWritable) short2.materialize()).get());

    OrcLazyInt int2 = (OrcLazyInt) row.getFieldValue(6);
    if (int2.nextIsNull()) {
    } else {
          ((IntWritable) int2.materialize()).get());

    OrcLazyLong long2 = (OrcLazyLong) row.getFieldValue(7);
    if (long2.nextIsNull()) {
    } else {
          ((LongWritable) long2.materialize()).get());

    OrcLazyShort short3 = (OrcLazyShort)row.getFieldValue(8);
    if (short3.nextIsNull()) {
    } else {
          ((ShortWritable) short3.materialize()).get());

    OrcLazyInt int3 = (OrcLazyInt) row.getFieldValue(9);
    if (int3.nextIsNull()) {
    } else {
          ((IntWritable) int3.materialize()).get());

    OrcLazyLong long3 = (OrcLazyLong) row.getFieldValue(10);
    if (long3.nextIsNull()) {
    } else {
          ((LongWritable) long3.materialize()).get());

    OrcLazyFloat float1 = (OrcLazyFloat) row.getFieldValue(11);
    if (float1.nextIsNull()) {
    } else {
          ((FloatWritable) float1.materialize()).get(), 0.0001);

    OrcLazyDouble double1 = (OrcLazyDouble) row.getFieldValue(12);
    if (double1.nextIsNull()) {
    } else {
          ((DoubleWritable) double1.materialize()).get(), 0.0001);

    if (((OrcLazyObject) row.getFieldValue(13)).nextIsNull()) {
    } else {
      assertEquals(expected.bytes1, ((OrcLazyBinary) row.getFieldValue(13)).materialize());

    if (((OrcLazyObject) row.getFieldValue(14)).nextIsNull()) {
    } else {
      assertEquals(expected.string1, ((OrcLazyString) row.getFieldValue(14)).materialize());

    if (((OrcLazyString) row.getFieldValue(15)).nextIsNull()) {
    } else {
      assertEquals(expected.string2, ((OrcLazyString) row.getFieldValue(15)).materialize());

    if (((OrcLazyString) row.getFieldValue(16)).nextIsNull()) {
    } else {
      assertEquals(expected.string3, ((OrcLazyString) row.getFieldValue(16)).materialize());

    if (((OrcLazyObject) row.getFieldValue(17)).nextIsNull()) {
    } else {
      final List<InnerStruct> expectedList = expected.middle.list;
      final OrcStruct actualMiddle = (OrcStruct) ((OrcLazyStruct) row.getFieldValue(17)).materialize();
      final List<OrcStruct> actualList =
          (List) actualMiddle.getFieldValue(0);
      compareListOfStructs(expectedList, actualList);
      final List<String> actualFieldNames = actualMiddle.getFieldNames();
      final List<String> expectedFieldNames = ImmutableList.of("list");
      compareLists(expectedFieldNames, actualFieldNames);
    if (((OrcLazyObject) row.getFieldValue(18)).nextIsNull()) {
    } else {
      compareListOfStructs(expected.list, (List) ((OrcLazyList) row.getFieldValue(18)).materialize());
    if (((OrcLazyObject) row.getFieldValue(19)).nextIsNull()) {
    } else {
      compareMap(expected.map, (Map) ((OrcLazyMap) row.getFieldValue(19)).materialize());

    if (testPrimitives) {
      compareRowsUsingPrimitives(expected, boolean1, short1, int1, long1, short2, int2, long2,
          short3, int3, long3, float1, double1);

  private void compareRowsWithoutNextIsNull(OrcStruct row, RandomRowInputs inputs, int rowNumber,
      NumberOfNulls numNulls, boolean usingPrimitives) throws Exception {

    ReallyBigRow expected = null;
    switch (numNulls) {
    case MANY:
    case SOME:
      expected = createRandomRowWithNulls(inputs.intValues, inputs.doubleValues,
          inputs.stringValues, inputs.byteValues, inputs.words, rowNumber, numNulls);
    case NONE:
      expected = createRandomRow(inputs.intValues, inputs.doubleValues,
          inputs.stringValues, inputs.byteValues, inputs.words, rowNumber);

    OrcLazyBoolean lazyBoolean1 = (OrcLazyBoolean) row.getFieldValue(0);
    BooleanWritable boolean1 = (BooleanWritable) lazyBoolean1.materialize();
    if (boolean1 == null) {
    } else {
      assertEquals(expected.boolean1.booleanValue(), boolean1.get());

    ByteWritable byte1 = (ByteWritable) ((OrcLazyByte) row.getFieldValue(1)).materialize();
    if (byte1 == null) {
    } else {
      assertEquals(expected.byte1.byteValue(), byte1.get());

    OrcLazyShort lazyShort1 = (OrcLazyShort) row.getFieldValue(2);
    ShortWritable short1 = (ShortWritable) lazyShort1.materialize();
    if (short1 == null) {
    } else {
      assertEquals(expected.short1.shortValue(), short1.get());

    OrcLazyInt lazyInt1 = (OrcLazyInt) row.getFieldValue(3);
    IntWritable int1 = (IntWritable) lazyInt1.materialize();
    if (int1 == null) {
    } else {
      assertEquals(expected.int1.intValue(), int1.get());

    OrcLazyLong lazyLong1 = (OrcLazyLong) row.getFieldValue(4);
    LongWritable long1 = (LongWritable) lazyLong1.materialize();
    if (long1 == null) {
    } else {
      assertEquals(expected.long1.longValue(), long1.get());

    OrcLazyShort lazyShort2 = (OrcLazyShort) row.getFieldValue(5);
    ShortWritable short2 = (ShortWritable) lazyShort2.materialize();
    if (short2 == null) {
    } else {
      assertEquals(expected.short2.shortValue(), short2.get());

    OrcLazyInt lazyInt2 = (OrcLazyInt) row.getFieldValue(6);
    IntWritable int2 = (IntWritable) lazyInt2.materialize();
    if (int2 == null) {
    } else {
      assertEquals(expected.int2.intValue(), int2.get());

    OrcLazyLong lazyLong2 = (OrcLazyLong) row.getFieldValue(7);
    LongWritable long2 = (LongWritable) lazyLong2.materialize();
    if (long2 == null) {
    } else {
      assertEquals(expected.long2.longValue(), long2.get());

    OrcLazyShort lazyShort3 = (OrcLazyShort) row.getFieldValue(8);
    ShortWritable short3 = (ShortWritable) lazyShort3.materialize();
    if (short3 == null) {
    } else {
      assertEquals(expected.short3.shortValue(), short3.get());

    OrcLazyInt lazyInt3 = (OrcLazyInt) row.getFieldValue(9);
    IntWritable int3 = (IntWritable) lazyInt3.materialize();
    if (int3 == null) {
    } else {
      assertEquals(expected.int3.intValue(), int3.get());

    OrcLazyLong lazyLong3 = (OrcLazyLong) row.getFieldValue(10);
    LongWritable long3 = (LongWritable) lazyLong3.materialize();
    if (long3 == null) {
    } else {
      assertEquals(expected.long3.longValue(), long3.get());

    OrcLazyFloat lazyFloat1 = (OrcLazyFloat) row.getFieldValue(11);
    FloatWritable float1 = (FloatWritable) lazyFloat1.materialize();
    if (float1 == null) {
    } else {
      assertEquals(expected.float1.floatValue(), float1.get(), 0.0001);

    OrcLazyDouble lazyDouble1 = (OrcLazyDouble) row.getFieldValue(12);
    DoubleWritable double1 = (DoubleWritable) lazyDouble1.materialize();
    if (double1 == null) {
    } else {
      assertEquals(expected.double1.doubleValue(), double1.get(), 0.0001);

    BytesWritable bytes1 = (BytesWritable) ((OrcLazyBinary) row.getFieldValue(13)).materialize();
    if (bytes1 == null) {
    } else {
      assertEquals(expected.bytes1, bytes1);

    Text string1 = (Text) ((OrcLazyString) row.getFieldValue(14)).materialize();
    if (string1 == null) {
    } else {
      assertEquals(expected.string1, string1);

    Text string2 = (Text) ((OrcLazyString) row.getFieldValue(15)).materialize();
    if (string2 == null) {
    } else {
      assertEquals(expected.string2, string2);

    Text string3 = (Text) ((OrcLazyString) row.getFieldValue(16)).materialize();
    if (string3 == null) {
    } else {
      assertEquals(expected.string3, string3);

    OrcStruct middle = (OrcStruct) ((OrcLazyStruct) row.getFieldValue(17)).materialize();
    if (middle == null) {
    } else {
      final List<InnerStruct> expectedList = expected.middle.list;
      final List<OrcStruct> actualList = (List) middle.getFieldValue(0);
      compareListOfStructs(expectedList, actualList);
      final List<String> actualFieldNames = middle.getFieldNames();
      final List<String> expectedFieldNames = ImmutableList.of("list");
      compareLists(expectedFieldNames, actualFieldNames);

    List list = (List) ((OrcLazyList) row.getFieldValue(18)).materialize();
    if (list == null) {
    } else {
      compareListOfStructs(expected.list, list);

    Map map = (Map) ((OrcLazyMap) row.getFieldValue(19)).materialize();
    if (map == null) {
    } else {
      compareMap(expected.map, map);

    if (usingPrimitives) {
      compareRowsUsingPrimitives(expected, lazyBoolean1, lazyShort1, lazyInt1, lazyLong1,
          lazyShort2, lazyInt2, lazyLong2, lazyShort3, lazyInt3, lazyLong3, lazyFloat1,

  public void testSeek() throws Exception {
    testSeek(false, true, false);

  public void testSeekLowMemory() throws Exception {
    testSeek(true, false, false);

  public void testSeekLazyHdfsReads() throws Exception {
    testSeek(false, false, true);

  private void testSeek(boolean lowMemory, boolean testPrimitives, boolean lazyHdfsReads)
      throws Exception {
    final int COUNT=32768;
    RandomRowInputs inputs = writeRandomRows(COUNT, lowMemory);
    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_EAGER_HDFS_READ, !lazyHdfsReads);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    assertEquals(COUNT, reader.getNumberOfRows());
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for(int i=COUNT-1; i >= 0; --i) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      compareRows(row, inputs, i, NumberOfNulls.NONE, testPrimitives);

  private void readEveryNthRow(int n, boolean withoutNextIsNull, NumberOfNulls numNulls) throws Exception {
    final int COUNT=32768;
    RandomRowInputs inputs = null;
    switch (numNulls) {
    case NONE:
      inputs = writeRandomRows(COUNT, false);
    case SOME:
    case MANY:
      inputs = writeRandomRowsWithNulls(COUNT, numNulls, false);

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    assertEquals(COUNT, reader.getNumberOfRows());
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for(int i = 0; i < COUNT / n; i++) {
      rows.seekToRow(i * n);
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      if (withoutNextIsNull) {
        compareRowsWithoutNextIsNull(row, inputs, i * n, numNulls, true);
      } else {
        compareRows(row, inputs, i * n, numNulls, true);

  public void testEveryRow() throws Exception {
    readEveryNthRow(1, false, NumberOfNulls.NONE);

  public void testEveryOtherRow() throws Exception {
    readEveryNthRow(2, false, NumberOfNulls.NONE);

  public void testEveryThirdRow() throws Exception {
    readEveryNthRow(3, false, NumberOfNulls.NONE);

  public void testEveryFourthRow() throws Exception {
    readEveryNthRow(4, false, NumberOfNulls.NONE);

  public void testEveryFifthRow() throws Exception {
    readEveryNthRow(5, false, NumberOfNulls.NONE);

  public void testEverySixthRow() throws Exception {
    readEveryNthRow(6, false, NumberOfNulls.NONE);

  public void testEverySeventhRow() throws Exception {
    readEveryNthRow(7, false, NumberOfNulls.NONE);

  public void testEveryEighthRow() throws Exception {
    readEveryNthRow(8, false, NumberOfNulls.NONE);

  public void testEveryNinthRow() throws Exception {
    readEveryNthRow(9, false, NumberOfNulls.NONE);

  public void testEveryTenthRow() throws Exception {
    readEveryNthRow(10, false, NumberOfNulls.NONE);

  public void testEveryHundredthRow() throws Exception {
    readEveryNthRow(100, false, NumberOfNulls.NONE);

  public void testEveryThousandthRow() throws Exception {
    readEveryNthRow(1000, false, NumberOfNulls.NONE);

  public void testEveryTenThousandthRow() throws Exception {
    readEveryNthRow(10000, false, NumberOfNulls.NONE);

  public void testEveryRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(1, true, NumberOfNulls.NONE);

  public void testEveryOtherRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(2, true, NumberOfNulls.NONE);

  public void testEveryThirdRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(3, true, NumberOfNulls.NONE);

  public void testEveryFourthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(4, true, NumberOfNulls.NONE);

  public void testEveryFifthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(5, true, NumberOfNulls.NONE);

  public void testEverySixthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(6, true, NumberOfNulls.NONE);

  public void testEverySeventhRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(7, true, NumberOfNulls.NONE);

  public void testEveryEighthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(8, true, NumberOfNulls.NONE);

  public void testEveryNinthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(9, true, NumberOfNulls.NONE);

  public void testEveryTenthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(10, true, NumberOfNulls.NONE);

  public void testEveryHundredthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(100, true, NumberOfNulls.NONE);

  public void testEveryThousandthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(1000, true, NumberOfNulls.NONE);

  public void testEveryTenThousandthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(10000, true, NumberOfNulls.NONE);

  private RandomRowInputs writeRandomRowsWithNulls(int count, NumberOfNulls numNulls,
      boolean lowMemoryMode) throws IOException {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (ReallyBigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        lowMemoryMode ? 200000 : 4000000, CompressionKind.ZLIB, 65536, 1000,
            new MemoryManager(conf));
    Random rand = new Random(42);
    RandomRowInputs inputs = new RandomRowInputs(count);
    long[] intValues = inputs.intValues;
    double[] doubleValues = inputs.doubleValues;
    String[] stringValues = inputs.stringValues;
    BytesWritable[] byteValues = inputs.byteValues;
    String[] words = inputs.words;
    for(int i=0; i < words.length; ++i) {
      words[i] = Integer.toHexString(rand.nextInt());
    for(int i=0; i < count/2; ++i) {
      intValues[2*i] = rand.nextLong();
      intValues[2*i+1] = rand.nextLong();
      stringValues[2*i] = words[rand.nextInt(words.length)];
      stringValues[2*i+1] = words[rand.nextInt(words.length)];
    for(int i=0; i < count; ++i) {
      doubleValues[i] = rand.nextDouble();
      byte[] buf = new byte[20];
      byteValues[i] = new BytesWritable(buf);
    for(int i=0; i < count; ++i) {
      ReallyBigRow bigrow = createRandomRowWithNulls(intValues, doubleValues, stringValues,
          byteValues, words, i, numNulls);
    writer = null;
    return inputs;

  public void testEveryRowWithNulls() throws Exception {
    readEveryNthRow(1, false, NumberOfNulls.SOME);

  public void testEveryOtherRowWithNulls() throws Exception {
    readEveryNthRow(2, false, NumberOfNulls.SOME);

  public void testEveryThirdRowWithNulls() throws Exception {
    readEveryNthRow(3, false, NumberOfNulls.SOME);

  public void testEveryFourthRowWithNulls() throws Exception {
    readEveryNthRow(4, false, NumberOfNulls.SOME);

  public void testEveryFifthRowWithNulls() throws Exception {
    readEveryNthRow(5, false, NumberOfNulls.SOME);

  public void testEverySixthRowWithNulls() throws Exception {
    readEveryNthRow(6, false, NumberOfNulls.SOME);

  public void testEverySeventhRowWithNulls() throws Exception {
    readEveryNthRow(7, false, NumberOfNulls.SOME);

  public void testEveryEighthRowWithNulls() throws Exception {
    readEveryNthRow(8, false, NumberOfNulls.SOME);

  public void testEveryNinthRowWithNulls() throws Exception {
    readEveryNthRow(9, false, NumberOfNulls.SOME);

  public void testEveryTenthRowWithNulls() throws Exception {
    readEveryNthRow(10, false, NumberOfNulls.SOME);

  public void testEveryHundredthRowWithNulls() throws Exception {
    readEveryNthRow(100, false, NumberOfNulls.SOME);

  public void testEveryThousandthRowWithNulls() throws Exception {
    readEveryNthRow(1000, false, NumberOfNulls.SOME);

  public void testEveryTenThousandthRowWithNulls() throws Exception {
    readEveryNthRow(10000, false, NumberOfNulls.SOME);

  public void testEveryRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(1, true, NumberOfNulls.SOME);

  public void testEveryOtherRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(2, true, NumberOfNulls.SOME);

  public void testEveryThirdRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(3, true, NumberOfNulls.SOME);

  public void testEveryFourthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(4, true, NumberOfNulls.SOME);

  public void testEveryFifthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(5, true, NumberOfNulls.SOME);

  public void testEverySixthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(6, true, NumberOfNulls.SOME);

  public void testEverySeventhRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(7, true, NumberOfNulls.SOME);

  public void testEveryEighthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(8, true, NumberOfNulls.SOME);

  public void testEveryNinthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(9, true, NumberOfNulls.SOME);

  public void testEveryTenthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(10, true, NumberOfNulls.SOME);

  public void testEveryHundredthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(100, true, NumberOfNulls.SOME);

  public void testEveryThousandthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(1000, true, NumberOfNulls.SOME);

  public void testEveryTenThousandthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(10000, true, NumberOfNulls.SOME);

  private void skipEveryNthRow(int n, boolean withoutNextIsNull, NumberOfNulls numNulls) throws Exception {
    final int COUNT=32768;
    RandomRowInputs inputs = null;
    switch (numNulls) {
    case NONE:
      inputs = writeRandomRows(COUNT, false);
    case SOME:
    case MANY:
      inputs = writeRandomRowsWithNulls(COUNT, numNulls, false);

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    assertEquals(COUNT, reader.getNumberOfRows());
    RecordReader rows = reader.rows(null);
    OrcLazyRow lazyRow = null;
    OrcStruct row = null;
    for(int i=0; i < COUNT; i++) {
      lazyRow = (OrcLazyRow) rows.next(lazyRow);
      if (i % n != 0) {
        row = (OrcStruct) lazyRow.materialize();
        if (withoutNextIsNull) {
          compareRowsWithoutNextIsNull(row, inputs, i, numNulls, false);
        } else {
          compareRows(row, inputs, i, numNulls, false);

  public void testEveryRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(1, false, NumberOfNulls.MANY);

  public void testEveryOtherRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(2, false, NumberOfNulls.MANY);

  public void testEveryThirdRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(3, false, NumberOfNulls.MANY);

  public void testEveryFourthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(4, false, NumberOfNulls.MANY);

  public void testEveryFifthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(5, false, NumberOfNulls.MANY);

  public void testEverySixthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(6, false, NumberOfNulls.MANY);

  public void testEverySeventhRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(7, false, NumberOfNulls.MANY);

  public void testEveryEighthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(8, false, NumberOfNulls.MANY);

  public void testEveryNinthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(9, false, NumberOfNulls.MANY);

  public void testEveryTenthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(10, false, NumberOfNulls.MANY);

  public void testEveryHundredthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(100, false, NumberOfNulls.MANY);

  public void testEveryThousandthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(1000, false, NumberOfNulls.MANY);

  public void testEveryTenThousandthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(10000, false, NumberOfNulls.MANY);

  public void testEveryRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(1, true, NumberOfNulls.MANY);

  public void testEveryOtherRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(2, true, NumberOfNulls.MANY);

  public void testEveryThirdRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(3, true, NumberOfNulls.MANY);

  public void testEveryFourthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(4, true, NumberOfNulls.MANY);

  public void testEveryFifthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(5, true, NumberOfNulls.MANY);

  public void testEverySixthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(6, true, NumberOfNulls.MANY);

  public void testEverySeventhRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(7, true, NumberOfNulls.MANY);

  public void testEveryEighthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(8, true, NumberOfNulls.MANY);

  public void testEveryNinthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(9, true, NumberOfNulls.MANY);

  public void testEveryTenthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(10, true, NumberOfNulls.MANY);

  public void testEveryHundredthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(100, true, NumberOfNulls.MANY);

  public void testEveryThousandthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(1000, true, NumberOfNulls.MANY);

  public void testEveryTenThousandthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(10000, true, NumberOfNulls.MANY);

  private void compareInner(InnerStruct expect,
      OrcStruct actual) throws Exception {
    if (expect == null || actual == null) {
      assertEquals(expect, actual);
    } else {
      if (actual.getFieldValue(0) == null) {
      } else {
        assertEquals(expect.int1.intValue(), ((IntWritable) actual.getFieldValue(0)).get());
      assertEquals(expect.string1, actual.getFieldValue(1));

  private void compareListOfStructs(List<InnerStruct> expect,
      List<OrcStruct> actual) throws Exception {
    assertEquals(expect.size(), actual.size());
    for(int j=0; j < expect.size(); ++j) {
      compareInner(expect.get(j), actual.get(j));

  private void compareLists(List expect, List actual) throws Exception {
    assertEquals(expect.size(), actual.size());

  private void compareMap(Map<Text, InnerStruct> expect, Map<Text, OrcStruct> actual)
      throws Exception {
    assertEquals(expect.size(), actual.size());
    for (Text key : expect.keySet()) {
      compareInner(expect.get(key), actual.get(key));

  private ReallyBigRow createRandomRow(long[] intValues, double[] doubleValues,
      String[] stringValues,
      BytesWritable[] byteValues,
      String[] words, int i) {
    InnerStruct inner = new InnerStruct((int) intValues[i], stringValues[i]);
    InnerStruct inner2 = new InnerStruct((int) (intValues[i] >> 32),
        words[i % words.length] + "-x");
    // Every 10th value of this string should be unique-ish in the file
    String stringWithUniques = i % 10 == 0 ? Integer.toHexString(i) : stringValues[i];
    Short shortWithUniques = i % 10 == 0 ? (short) i :  (short) (intValues[i] % 10);
    Integer intWithUniques = i % 10 == 0 ? (int) (Short.MAX_VALUE + i) :
      (int) (Short.MAX_VALUE + (intValues[i] % 10));
    Long longWithUniques = i % 10 == 0 ? (long) (Integer.MAX_VALUE + i) :
      (long) (Integer.MAX_VALUE + (intValues[i] % 10));
    return new ReallyBigRow((intValues[i] & 1) == 0, (byte) intValues[i],
        (short) intValues[i], (int) (Short.MAX_VALUE + intValues[i]),
        (long) (Integer.MAX_VALUE + intValues[i]), (short) (intValues[i] % 10),
        (int) (Short.MAX_VALUE + (intValues[i] % 10)),
        (long) (Integer.MAX_VALUE + (intValues[i] % 10)),
        shortWithUniques, intWithUniques, longWithUniques, (float) doubleValues[i],
        doubleValues[i], byteValues[i], stringValues[i], Integer.toHexString(i),
        stringWithUniques, new MiddleStruct(inner, inner2), list(), map(inner,inner2));

  private ReallyBigRow createRandomRowWithNulls(long[] intValues, double[] doubleValues,
      String[] stringValues, BytesWritable[] byteValues, String[] words, int i, NumberOfNulls numNulls) {
    boolean lotsOfNulls = numNulls == NumberOfNulls.MANY;
    Boolean booleanVal = intValues[i] % 10 == 0 ^ lotsOfNulls ? null : (intValues[i] & 1) == 0;
    Byte byteVal = intValues[i] % 11 == 0 ^ lotsOfNulls ? null : (byte) intValues[i];
    Short shortVal = intValues[i] % 12 == 0 ^ lotsOfNulls  ? null : (short) intValues[i];
    Integer intVal = intValues[i] % 13 == 0 ^ lotsOfNulls  ? null : (int) (Short.MAX_VALUE + i);
    Long longVal = intValues[i] % 14 == 0 ^ lotsOfNulls  ? null : (long) (Integer.MAX_VALUE + i);
    Float floatVal = intValues[i] % 15 == 0 ^ lotsOfNulls  ? null : (float) doubleValues[i];
    Double doubleVal = intValues[i] % 16 == 0 ^ lotsOfNulls  ? null : doubleValues[i];
    BytesWritable bytesVal = intValues[i] % 17 == 0 ^ lotsOfNulls  ? null : byteValues[i];
    String strVal = intValues[i] % 18 == 0 ^ lotsOfNulls  ? null : stringValues[i];
    InnerStruct inner = intValues[i] % 19 == 0 ^ lotsOfNulls  ? null : new InnerStruct(
        intValues[i] % 10 == 0 ^ lotsOfNulls  ? null : (int) intValues[i],
            intValues[i] % 11 == 0 ^ lotsOfNulls  ? null : stringValues[i]);
    InnerStruct inner2 = intValues[i] % 12 == 0 ^ lotsOfNulls  ? null : new InnerStruct(
        intValues[i] % 13 == 0 ^ lotsOfNulls  ? null : (int) (intValues[i] >> 32),
            intValues[i] % 14 == 0 ^ lotsOfNulls  ? null : words[i % words.length] + "-x");
    MiddleStruct middle = intValues[i] % 15 == 0 ^ lotsOfNulls  ? null :
      new MiddleStruct(inner, inner2);
    List<InnerStruct> list = intValues[i] % 16 == 0 ^ lotsOfNulls  ? null : list(inner, inner2);
    Map<Text, InnerStruct> map = intValues[i] % 17 == 0 ^ lotsOfNulls  ? null : map(inner, inner2);
    String strVal2 = intValues[i] % 18 == 0 ^ lotsOfNulls  ? null : Integer.toHexString(i);
    Short shortVal2 = intValues[i] % 19 == 0 ^ lotsOfNulls  ? null : (short) (intValues[i] % 10);
    Integer intVal2 = intValues[i] % 10 == 0 ^ lotsOfNulls  ? null :
      (int) (Short.MAX_VALUE + (intValues[i] % 10));
    Long longVal2 = intValues[i] % 11 == 0 ^ lotsOfNulls  ? null :
      (long) (Integer.MAX_VALUE + (intValues[i] % 10));
    String strVal3 = intValues[i] % 12 == 0 ^ lotsOfNulls ? null :
      (intValues[i] % 10 == 0 ? Integer.toHexString(i) : stringValues[i]);
    Short shortVal3 = intValues[i] % 13 == 0 ^ lotsOfNulls  ? null :
      (intValues[i] % 10 == 0 ? (short) i : (short) (intValues[i] % 10));
    Integer intVal3 = intValues[i] % 14 == 0 ^ lotsOfNulls  ? null :
      (intValues[i] % 10 == 0 ? (int) (Short.MAX_VALUE + i) :
        (int) (Short.MAX_VALUE + (intValues[i] % 10)));
    Long longVal3 = intValues[i] % 15 == 0 ^ lotsOfNulls  ? null :
      (intValues[i] % 10 == 0 ? (long) (Integer.MAX_VALUE + i) :
        (long) (Integer.MAX_VALUE + (intValues[i] % 10)));
    return new ReallyBigRow(booleanVal, byteVal, shortVal, intVal, longVal, shortVal2, intVal2,
        longVal2, shortVal3, intVal3, longVal3, floatVal, doubleVal, bytesVal, strVal, strVal2,
        strVal3, middle, list, map);

  private static class MyMemoryManager extends MemoryManager {
    final long totalSpace;
    double rate;
    Path path = null;
    long lastAllocation = 0;
    int rows = 0;
    MemoryManager.Callback callback;

    MyMemoryManager(Configuration conf, long totalSpace, double rate) {
      this.totalSpace = totalSpace;
      this.rate = rate;

    void addWriter(Path path, long requestedAllocation,
        MemoryManager.Callback callback, long initialAllocation) {
      this.path = path;
      this.lastAllocation = requestedAllocation;
      this.callback = callback;

    synchronized void removeWriter(Path path) {
      this.path = null;
      this.lastAllocation = 0;

    long getTotalMemoryPool() {
      return totalSpace;

    double getAllocationScale() {
      return rate;

    void addedRow() throws IOException {


    boolean shouldFlush(MemoryEstimate memoryEstimate, Path path, long stripeSize,
        long maxDictSize) {
      long limit = Math.round(stripeSize * rate);
      return memoryEstimate.getTotalMemory() > limit ||
          (maxDictSize > 0 && memoryEstimate.getDictionaryMemory() > maxDictSize);

  public void testMemoryManagement() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        50000, CompressionKind.NONE, 100, 0, memory);
    assertEquals(testFilePath, memory.path);
    for(int i=0; i < 2500; ++i) {
      writer.addRow(new InnerStruct(i*300, Integer.toHexString(10*i)));
    assertEquals(null, memory.path);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    int i = 0;
    for(StripeInformation stripe: reader.getStripes()) {
      i += 1;
      assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(),
          stripe.getDataLength() < 6000);
    assertEquals(5, i);
    assertEquals(2500, reader.getNumberOfRows());

   * Test a stride dictionary that contains only the empty string
  public void testEmptyStringStrideDictionary() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);
    writer.addRow(new StringStruct(""));
    for (int i = 0; i < 999; i++) {
      writer.addRow(new StringStruct("123"));
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals("", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    for (int i =0; i < 999; i++) {
      assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());

   * Tests writing a stripe containing a string column, which is not dictionary encoded in the
   * first stripe, this is carried over to the third stripe, then dictionary encoding is turned
   * back on.  This will cause the dictionary to be nulled out, then reinitialized.
  public void testStrideDictionariesWithoutStripeCarryover() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2);
    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_SORT_KEYS, true);
    WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
        inspector, 1000000, CompressionKind.NONE, 100, 1000, new MemoryManager(conf));
    // Write a stripe which is not dictionary encoded
    for (int i = 0; i < 2000; i++) {
      writer.addRow(new StringStruct(Integer.toString(i)));
    // Write another stripe (doesn't matter what)
    for (int i = 0; i < 2000; i++) {
      writer.addRow(new StringStruct(Integer.toString(i)));
    // Write a stripe which will be dictionary encoded
    // Note: it is important that this string is lexicographically after the string in the next
    // index stride.  This way, if sorting by index strides is not working, this value will appear
    // after the next one, though it should appear before, yielding incorrect results.
    writer.addRow(new StringStruct("b"));
    for (int i = 0; i < 999; i++) {
      writer.addRow(new StringStruct("123"));
    writer.addRow(new StringStruct("a"));
    for (int i = 0; i < 999; i++) {
      writer.addRow(new StringStruct("123"));
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    for (int i =0; i < 4000; i++) {
      assertEquals(Integer.toString(i % 2000), ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    assertEquals("b", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    for (int i =0; i < 999; i++) {
      assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    assertEquals("a", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    for (int i =0; i < 999; i++) {
      assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());

   * Tests writing a stripe that contains a single string column across two index strides where
   * the column is dictionary encoded with a stride dictionary in both strides.
   * When reading, all rows in the first stride whose values are in the stride dictionary are
   * skipped, and in the second stride the values in the stride dictionary are read.
   * This can cause problems if seeking across strides is broken for stride dictionary streams.
   * @throws Exception
  public void testSeekAcrossStrideDictionaries() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    // Set configs so the column is dictionary encoded and stride dictioanries are used
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);
    // Write this value once, so it's added to a stride dictionary
    writer.addRow(new StringStruct("a"));
    // Fill out the rest of the stride
    for (int i = 0; i < 999; i++) {
      writer.addRow(new StringStruct("123"));
    // Write this value once, so it's added to a stride dictionary
    writer.addRow(new StringStruct("b"));
    // Fill out the rest of the stride
    for (int i = 0; i < 999; i++) {
      writer.addRow(new StringStruct("123"));

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    // Skip the one row in the stride dictionary in the first stride ("a")
    // Read the rest of the values in the stride
    for (int i =0; i < 999; i++) {
      assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    // Read the row in the stride dictionary in the second stride (note that seek won't be called
    // because we read the previous row
    assertEquals("b", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());

   * Tests a writing a stripe with a stride dictionary, followed by a stripe without
   * followed by a stripe with.
  public void testEmptyInStringDictionaryStream() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
        inspector, 1000000, CompressionKind.NONE, 100, 1000, new MemoryManager(conf));
    writer.addRow(new StringStruct("a"));
    writer.addRow(new StringStruct("b"));
    writer.addRow(new StringStruct("c"));
    for (int i = 0; i < 997; i++) {
      writer.addRow(new StringStruct("123"));
    for (int i = 0; i < 1000; i++) {
      writer.addRow(new StringStruct("123"));
    writer.addRow(new StringStruct("a"));
    writer.addRow(new StringStruct("b"));
    writer.addRow(new StringStruct("c"));
    for (int i = 0; i < 997; i++) {
      writer.addRow(new StringStruct("123"));
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals("a", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    assertEquals("b", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    assertEquals("c", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    for (int i =0; i < 997; i++) {
      assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    for (int i =0; i < 1000; i++) {
      assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    assertEquals("a", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    assertEquals("b", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    assertEquals("c", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    for (int i =0; i < 997; i++) {
      assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());

   * Tests a writing a stripe with a stride dictionary, followed by a stripe without
   * followed by a stripe with.
  public void testEmptyInIntDictionaryStream() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
        inspector, 1000000, CompressionKind.NONE, 100, 1000, new MemoryManager(conf));
    writer.addRow(new IntStruct(1));
    writer.addRow(new IntStruct(2));
    writer.addRow(new IntStruct(3));
    for (int i = 0; i < 997; i++) {
      writer.addRow(new IntStruct(123));
    for (int i = 0; i < 1000; i++) {
      writer.addRow(new IntStruct(123));
    writer.addRow(new IntStruct(1));
    writer.addRow(new IntStruct(2));
    writer.addRow(new IntStruct(3));
    for (int i = 0; i < 997; i++) {
      writer.addRow(new IntStruct(123));
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(1, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    assertEquals(2, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    assertEquals(3, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    for (int i =0; i < 997; i++) {
      assertEquals(123, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    for (int i =0; i < 1000; i++) {
      assertEquals(123, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    assertEquals(1, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    assertEquals(2, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    assertEquals(3, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    for (int i =0; i < 997; i++) {
      assertEquals(123, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());

   * Verifies the scenario when {@link com.facebook.hive.orc.BitFieldReader#skip(long)} skips to
   * the last value and doesn't load the next value if it has reached the end of the stream.
   * @throws Exception
  public void testSkipWithEmptyArrayInEnd() throws Exception {
    ObjectInspector inspector;
    List<String> emptyList = Collections.emptyList();
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (StringListWithId.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);

    OrcConf.setFloatVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_KEY_STRING_SIZE_THRESHOLD, 0.01f);
    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_FILE_ENABLE_LOW_MEMORY_MODE, false);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 1000000, CompressionKind.ZLIB, 1000, 1000);

    int numNulls = 4;
    int numNonNulls = 8;
    for(int i = 0; i < numNonNulls; i++) {
      List<String> filledList = new ArrayList<String>(2);
      filledList.add("SomeMoreText" + i);
      writer.addRow(new StringListWithId(i, filledList));
    for(int j = 0; j < numNulls; j++) {
      writer.addRow(new StringListWithId(numNonNulls+j, emptyList));


    // Prepare to read back the data
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = (OrcLazyStruct) rows.next(null);
    OrcStruct row = (OrcStruct) lazyRow.materialize();
    OrcLazyList list = ((OrcLazyList) row.getFieldValue(1));
    LazyTreeReader lazyReader = list.getLazyTreeReader();

    Object prev = lazyReader.get(numNonNulls - 1, null);

    boolean gotException = false;
    String expectedExceptionMessage = "Read past end of buffer RLE byte from compressed stream Stream for column 3 " +
        "kind IN_DICTIONARY base: 60 limit: 66 current stride: 1 compressed offset: 66 uncompressed: 66 to 66";
    try {
      lazyReader.get(numNonNulls + 1, prev);
    } catch (EOFException e) {
      if(e.getMessage().compareTo(expectedExceptionMessage) == 0) {
        gotException = true;
      } else {
        throw e;

    assertFalse("Got EOFException for reading past end of buffer RLE byte", gotException);

   * Tests a writing a stripe with an integer column, which enters low memory mode before the first
   * index stride is complete.
  public void testIntEnterLowMemoryModeInFirstStride() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 10000, memory);

    // Write 500 rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new IntStruct(i));

    // Force the writer to enter low memory mode, note since the stride length was set to 10000
    // we're still in the first stride

    // Write 500 more rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new IntStruct(i + 500));

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1000; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());

   * Tests a writing a stripe with a string column, which enters low memory mode before the first
   * index stride is complete.
  public void testStringEnterLowMemoryModeInFirstStride() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 10000, memory);

    // Write 500 rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));

    // Force the writer to enter low memory mode, note since the stride length was set to 10000
    // we're still in the first stride

    // Write 500 more rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i + 500)));

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1000; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
          ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());

   * Tests a writing a stripe with a string column, which enters low memory mode before the second
   * index stride is complete, and does not complete that stride.
  public void testStringEnterLowMemoryModeInSecondStride() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);

    // Write 1000 rows (the first stride)
    for (int i = 0; i < 1000; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));

    // Write 250 more rows (a portion of the second stride)
    for (int i = 0; i < 250; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));

    // Force the writer to enter low memory mode, note since the stride length was set to 1000
    // we're still in the second stride

    // Write 250 more rows (which still gets written to the second stride, but not enough to fill
    // it)
    for (int i = 0; i < 250; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i + 250)));

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1500; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(Integer.toString(i % 1000),
          ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());

   * Tests a writing a stripe with an int column, which enters low memory mode before the second
   * index stride is complete, and does not complete that stride.
  public void testIntEnterLowMemoryModeInSecondStride() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);

    // Write 1000 rows (the first stride)
    for (int i = 0; i < 1000; i ++) {
      writer.addRow(new IntStruct(i));

    // Write 250 more rows (a portion of the second stride)
    for (int i = 0; i < 250; i ++) {
      writer.addRow(new IntStruct(i));

    // Force the writer to enter low memory mode, note since the stride length was set to 1000
    // we're still in the second stride

    // Write 250 more rows (which still gets written to the second stride, but not enough to fill
    // it)
    for (int i = 0; i < 250; i ++) {
      writer.addRow(new IntStruct(i + 250));

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1500; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i % 1000,
          ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());

   * Tests a writing a stripe with a string column, which enters low memory mode just before the
   * second stride starts
  public void testStringEnterLowMemoryModeAtStrideStart() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);

    // Write 1000 rows (the first stride)
    for (int i = 0; i < 1000; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));

    // Force the writer to enter low memory mode, note since the stride length was set to 1000
    // we're just starting the second stride

    // Write 500 more rows (a portion of the second stride)
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1500; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(Integer.toString(i % 1000),
          ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());

   * Tests a writing a stripe with an int column, which enters low memory mode just before the
   * second stride starts
  public void testIntEnterLowMemoryModeAtStrideStart() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);

    // Write 1000 rows (the first stride)
    for (int i = 0; i < 1000; i ++) {
      writer.addRow(new IntStruct(i));

    // Force the writer to enter low memory mode, note since the stride length was set to 1000
    // we're just starting the second stride

    // Write 500 more rows (a portion of the second stride)
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new IntStruct(i));

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1500; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i % 1000,
          ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());

   * Tests a writing a stripe with a string column, which enters low memory mode just after the
   * second stride starts
  public void testStringEnterLowMemoryModeAfterStrideStart() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);

    // Write 1000 rows (the first stride)
    for (int i = 0; i < 1001; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i % 1000)));

    // Force the writer to enter low memory mode, note since the stride length was set to 1000
    // we're just after starting the second stride

    // Write 499 more rows (a portion of the second stride)
    for (int i = 1; i < 500; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1500; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(Integer.toString(i % 1000),
          ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());

   * Tests a writing a stripe with an int column, which enters low memory mode just after the
   * second stride starts
  public void testIntEnterLowMemoryModeAfterStrideStart() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);

    // Write 1000 rows (the first stride)
    for (int i = 0; i < 1001; i ++) {
      writer.addRow(new IntStruct(i % 1000));

    // Force the writer to enter low memory mode, note since the stride length was set to 1000
    // we're just after starting the second stride

    // Write 499 more rows (a portion of the second stride)
    for (int i = 1; i < 500; i ++) {
      writer.addRow(new IntStruct(i));

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1500; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i % 1000,
          ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());

   * Tests writing a stripe with a string column, which doesn't do dictionary encoding, then
   * re-evaluates whether it should do dictionary encoding or not.  While it's re-evaluating, it
   * enters low memory mode.
  public void testStringEnterLowMemoryModeAndOnNotCarriedOverStripe() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    // Reevaluate if we should use dictionary encoding on every stripe
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 1);
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
        inspector, 1000000, CompressionKind.NONE, 100, 10000, memory);

    // Write 500 rows, they wil be directly encoded
    for (int i = 0; i < 1000; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));

    // Flush the first stripe

    // Write 500 more rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));

    // Force the writer to enter low memory mode

    // Write 500 more rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i + 500)));

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 2000; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(Integer.toString(i % 1000),
          ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());

   * Tests writing a stripe with an int column, which doesn't do dictionary encoding, then
   * re-evaluates whether it should do dictionary encoding or not.  While it's re-evaluating, it
   * enters low memory mode
  public void testIntegerEnterLowMemoryModeAndOnNotCarriedOverStripe() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    // Reevaluate if we should use dictionary encoding on every stripe
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 1);
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
        inspector, 1000000, CompressionKind.NONE, 100, 10000, memory);

    // Write 500 rows
    for (int i = 0; i < 1000; i ++) {
      writer.addRow(new IntStruct(i));

    // Flush the first stripe

    // Write 500 more rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new IntStruct(i));

    // Force the writer to enter low memory mode

    // Write 500 more rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new IntStruct(i + 500));

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 2000; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i % 1000,
          ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());

   * Tests calling seekToRow to make sure it updates the stripe accordingly
  public void testSeekToRow() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
        inspector, 1000000, CompressionKind.NONE, 100, 10000, new MemoryManager(conf));

    // Write 100 rows
    for (int i = 0; i < 100; i++) {
      writer.addRow(new IntStruct(i));

    // Flush the first stripe

    // Write 100 more rows
    for (int i = 0; i < 100; i++) {
      writer.addRow(new IntStruct(i + 100));

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(0, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    // Seek to row 98 which is almost at the end of a stripe, this way it stays in the current
    // stripe, and if the row is not updated correctly will read off the end of a stream.
    for (int i = 98; i < 200; i++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());

   * Tests that when a reader is initialized using offset, length the stripes included are
   * those that start in the range [offset, offset + length)
  public void testSplitStripe() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
    // Reevaluate if we should use dictionary encoding on every stripe
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 1);
    WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
        inspector, 1000000, CompressionKind.NONE, 100, 10000, new MemoryManager(conf));

    // Write 100 rows
    for (int i = 0; i < 100; i ++) {
      writer.addRow(new IntStruct(i));

    // Flush the first stripe

    // Write 100 more rows
    for (int i = 0; i < 100; i ++) {
      writer.addRow(new IntStruct(i + 100));


    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    Iterator<StripeInformation> stripes = reader.getStripes().iterator();

    StripeInformation firstStripe = stripes.next();
    StripeInformation secondStripe = stripes.next();

    // Create a record reader that has the offset and length of the first stripe
    RecordReader rows = reader.rows(firstStripe.getOffset(),
        secondStripe.getOffset() - firstStripe.getOffset(), null);

    // Read what we wrote for the first stripe
    OrcLazyStruct lazyRow = null;
    OrcStruct row;
    for (int i = 0; i < 100; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
          ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());

    // Make sure that there's no additional data

    // Create a record reader that has the offset and length of the second stripe
    // Since this is the last stripe it has length equal to the length of the file containing
    // stripes - the offset of the second stripe
    rows = reader.rows(secondStripe.getOffset(),
        reader.getContentLength() - secondStripe.getOffset(), null);

    // Read what we wrote for the first stripe
    for (int i = 0; i < 100; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i + 100,
          ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());

    // Make sure that there's no additional data