Java Code Examples for org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory#getReflectionObjectInspector()

The following examples show how to use org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory#getReflectionObjectInspector() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HiveSimpleUDF.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void openInternal() {
	LOG.info("Opening HiveSimpleUDF as '{}'", hiveFunctionWrapper.getClassName());

	function = hiveFunctionWrapper.createFunction();

	List<TypeInfo> typeInfos = new ArrayList<>();

	for (DataType arg : argTypes) {
		typeInfos.add(HiveTypeUtil.toHiveTypeInfo(arg, false));
	}

	try {
		method = function.getResolver().getEvalMethod(typeInfos);
		returnInspector = ObjectInspectorFactory.getReflectionObjectInspector(method.getGenericReturnType(),
			ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
		ObjectInspector[] argInspectors = new ObjectInspector[typeInfos.size()];

		for (int i = 0; i < argTypes.length; i++) {
			argInspectors[i] = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfos.get(i));
		}

		conversionHelper = new GenericUDFUtils.ConversionHelper(method, argInspectors);
		conversions = new HiveObjectConversion[argInspectors.length];
		for (int i = 0; i < argInspectors.length; i++) {
			conversions[i] = HiveInspectors.getConversion(argInspectors[i], argTypes[i].getLogicalType(), hiveShim);
		}

		allIdentityConverter = Arrays.stream(conversions)
			.allMatch(conv -> conv instanceof IdentityConversion);
	} catch (Exception e) {
		throw new FlinkHiveUDFException(
			String.format("Failed to open HiveSimpleUDF from %s", hiveFunctionWrapper.getClassName()), e);
	}
}
 
Example 2
Source File: EthereumUDFTest.java    From hadoopcryptoledger with Apache License 2.0 5 votes vote down vote up
@Test
 public void EthereumGetChainIdUDFNull() throws HiveException {
EthereumGetChainIdUDF egcidUDF = new EthereumGetChainIdUDF();
ObjectInspector[] arguments = new ObjectInspector[1];
arguments[0] =  ObjectInspectorFactory.getReflectionObjectInspector(EthereumTransaction.class,ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
egcidUDF.initialize(arguments);	
assertNull(egcidUDF.evaluate(null),"Null argument to UDF returns null");
 }
 
Example 3
Source File: EthereumUDFTest.java    From hadoopcryptoledger with Apache License 2.0 5 votes vote down vote up
@Test
 public void EthereumGetSendAddressUDFNull() throws HiveException {
  EthereumGetSendAddressUDF egsaUDF = new EthereumGetSendAddressUDF();
ObjectInspector[] arguments = new ObjectInspector[2];
arguments[0] =  ObjectInspectorFactory.getReflectionObjectInspector(EthereumTransaction.class,ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
arguments[1] = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
egsaUDF.initialize(arguments);	
assertNull(egsaUDF.evaluate(null),"Null argument to UDF returns null");
 }
 
Example 4
Source File: BitcoinUDFTest.java    From hadoopcryptoledger with Apache License 2.0 5 votes vote down vote up
@Test
public void BitcoinTransactionHashSegwitUDFNull() throws HiveException {
	BitcoinTransactionHashSegwitUDF bthUDF = new BitcoinTransactionHashSegwitUDF();
	ObjectInspector[] arguments = new ObjectInspector[1];
	arguments[0] =  ObjectInspectorFactory.getReflectionObjectInspector(BitcoinBlock.class,ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
	bthUDF.initialize(arguments);	
	assertNull(bthUDF.evaluate(null),"Null argument to UDF returns null");
	
	
}
 
Example 5
Source File: BitcoinUDFTest.java    From hadoopcryptoledger with Apache License 2.0 5 votes vote down vote up
@Test
  public void BitcoinTransactionHashUDFWriteable()  throws HiveException  {
	BitcoinTransactionHashUDF bthUDF = new BitcoinTransactionHashUDF();
	ObjectInspector[] arguments = new ObjectInspector[1];
	arguments[0] =  ObjectInspectorFactory.getReflectionObjectInspector(HiveBitcoinTransaction.class,ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
	bthUDF.initialize(arguments);	
// create BitcoinTransaction
 // reconstruct the transaction from the genesis block
	int version=1;
	byte[] inCounter = new byte[]{0x01};
	byte[] previousTransactionHash = new byte[]{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
	long previousTxOutIndex = 4294967295L;
	byte[] txInScriptLength = new byte[]{(byte)0x4D};
	byte[] txInScript= new byte[]{(byte)0x04,(byte)0xFF,(byte)0xFF,(byte)0x00,(byte)0x1D,(byte)0x01,(byte)0x04,(byte)0x45,(byte)0x54,(byte)0x68,(byte)0x65,(byte)0x20,(byte)0x54,(byte)0x69,(byte)0x6D,(byte)0x65,(byte)0x73,(byte)0x20,(byte)0x30,(byte)0x33,(byte)0x2F,(byte)0x4A,(byte)0x61,(byte)0x6E,(byte)0x2F,(byte)0x32,(byte)0x30,(byte)0x30,(byte)0x39,(byte)0x20,(byte)0x43,(byte)0x68,(byte)0x61,(byte)0x6E,(byte)0x63,(byte)0x65,(byte)0x6C,(byte)0x6C,(byte)0x6F,(byte)0x72,(byte)0x20,(byte)0x6F,(byte)0x6E,(byte)0x20,(byte)0x62,(byte)0x72,(byte)0x69,(byte)0x6E,(byte)0x6B,(byte)0x20,(byte)0x6F,(byte)0x66,(byte)0x20,(byte)0x73,(byte)0x65,(byte)0x63,(byte)0x6F,(byte)0x6E,(byte)0x64,(byte)0x20,(byte)0x62,(byte)0x61,(byte)0x69,(byte)0x6C,(byte)0x6F,(byte)0x75,(byte)0x74,(byte)0x20,(byte)0x66,(byte)0x6F,(byte)0x72,(byte)0x20,(byte)0x62,(byte)0x61,(byte)0x6E,(byte)0x6B,(byte)0x73};
	long seqNo=4294967295L;
	byte[] outCounter = new byte[]{0x01};
	long value=5000000000L;
	byte[] txOutScriptLength=new byte[]{(byte)0x43};
	byte[] txOutScript=new byte[]{(byte)0x41,(byte)0x04,(byte)0x67,(byte)0x8A,(byte)0xFD,(byte)0xB0,(byte)0xFE,(byte)0x55,(byte)0x48,(byte)0x27,(byte)0x19,(byte)0x67,(byte)0xF1,(byte)0xA6,(byte)0x71,(byte)0x30,(byte)0xB7,(byte)0x10,(byte)0x5C,(byte)0xD6,(byte)0xA8,(byte)0x28,(byte)0xE0,(byte)0x39,(byte)0x09,(byte)0xA6,(byte)0x79,(byte)0x62,(byte)0xE0,(byte)0xEA,(byte)0x1F,(byte)0x61,(byte)0xDE,(byte)0xB6,(byte)0x49,(byte)0xF6,(byte)0xBC,(byte)0x3F,(byte)0x4C,(byte)0xEF,(byte)0x38,(byte)0xC4,(byte)0xF3,(byte)0x55,(byte)0x04,(byte)0xE5,(byte)0x1E,(byte)0xC1,(byte)0x12,(byte)0xDE,(byte)0x5C,(byte)0x38,(byte)0x4D,(byte)0xF7,(byte)0xBA,(byte)0x0B,(byte)0x8D,(byte)0x57,(byte)0x8A,(byte)0x4C,(byte)0x70,(byte)0x2B,(byte)0x6B,(byte)0xF1,(byte)0x1D,(byte)0x5F,(byte)0xAC};
	int lockTime = 0;
	List<BitcoinTransactionInput> genesisInput = new ArrayList<BitcoinTransactionInput>(1);
	genesisInput.add(new BitcoinTransactionInput(previousTransactionHash,previousTxOutIndex,txInScriptLength,txInScript,seqNo));
	List<HiveBitcoinTransactionOutput> genesisOutput = new ArrayList<HiveBitcoinTransactionOutput>(1);
	genesisOutput.add(new HiveBitcoinTransactionOutput(HiveDecimal.create(BigInteger.valueOf(value)),txOutScriptLength,txOutScript));
	 HiveBitcoinTransaction genesisTransaction = new HiveBitcoinTransaction(version,inCounter,genesisInput,outCounter,genesisOutput,lockTime);
	 byte[] expectedHash = BitcoinUtil.reverseByteArray(new byte[]{(byte)0x4A,(byte)0x5E,(byte)0x1E,(byte)0x4B,(byte)0xAA,(byte)0xB8,(byte)0x9F,(byte)0x3A,(byte)0x32,(byte)0x51,(byte)0x8A,(byte)0x88,(byte)0xC3,(byte)0x1B,(byte)0xC8,(byte)0x7F,(byte)0x61,(byte)0x8F,(byte)0x76,(byte)0x67,(byte)0x3E,(byte)0x2C,(byte)0xC7,(byte)0x7A,(byte)0xB2,(byte)0x12,(byte)0x7B,(byte)0x7A,(byte)0xFD,(byte)0xED,(byte)0xA3,(byte)0x3B});
	GenericUDF.DeferredObject[] doa = new GenericUDF.DeferredObject[1];
	doa[0]=new GenericUDF.DeferredJavaObject(genesisTransaction);
	BytesWritable bw = (BytesWritable) bthUDF.evaluate(doa);
	
	assertArrayEquals( expectedHash,bw.copyBytes(),"BitcoinTransaction object genesis transaction hash from UDF");
  }
 
Example 6
Source File: TestOrcFile.java    From hive-dwrf with Apache License 2.0 4 votes vote down vote up
@Test
public void testDeepCopy() throws Exception {
  // Create a table and write a row to it
  ObjectInspector inspector;
  synchronized (TestOrcFile.class) {
    inspector = ObjectInspectorFactory.getReflectionObjectInspector
        (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
  }
  ReaderWriterProfiler.setProfilerOptions(conf);
  Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
      100000, CompressionKind.ZLIB, 10000, 10000);
  writer.addRow(new BigRow(false, (byte) 1, (short) 1, 1,
      1L, (float) 1.0, 1.0, bytes(1), "1",
      new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
      list(inner(3, "good"), inner(4, "bad")),
      map(inner(3, "good"), inner(4, "bad"))));

  writer.close();

  // Prepare to tread back the row
  ReaderWriterProfiler.setProfilerOptions(conf);
  Reader reader = OrcFile.createReader(fs, testFilePath, conf);
  RecordReader rows = reader.rows(null);
  OrcLazyStruct lazyRow = null;
  OrcStruct row = null;
  lazyRow = (OrcLazyStruct) rows.next(lazyRow);
  row = (OrcStruct) lazyRow.materialize();

  // Check that the object read equals what is expected, then copy the object, and make the same
  // check
  OrcLazyObject obj;
  assertEquals(false,
      ((BooleanWritable) ((OrcLazyBoolean) row.getFieldValue(0)).materialize()).get());
  obj = new OrcLazyBoolean((OrcLazyBoolean) row.getFieldValue(0));
  assertEquals(false, ((BooleanWritable) obj.materialize()).get());

  assertEquals(1, ((ByteWritable) ((OrcLazyByte) row.getFieldValue(1)).materialize()).get());
  obj = new OrcLazyByte((OrcLazyByte) row.getFieldValue(1));
  assertEquals(1, ((ByteWritable) obj.materialize()).get());

  assertEquals(1, ((ShortWritable) ((OrcLazyShort) row.getFieldValue(2)).materialize()).get());
  obj = new OrcLazyShort((OrcLazyShort) row.getFieldValue(2));
  assertEquals(1, ((ShortWritable) obj.materialize()).get());

  assertEquals(1, ((IntWritable) ((OrcLazyInt) row.getFieldValue(3)).materialize()).get());
  obj = new OrcLazyInt((OrcLazyInt) row.getFieldValue(3));
  assertEquals(1, ((IntWritable) obj.materialize()).get());

  assertEquals(1, ((LongWritable) ((OrcLazyLong) row.getFieldValue(4)).materialize()).get());
  obj = new OrcLazyLong((OrcLazyLong) row.getFieldValue(4));
  assertEquals(1, ((LongWritable) obj.materialize()).get());

  assertEquals(1.0f,
      ((FloatWritable) ((OrcLazyFloat) row.getFieldValue(5)).materialize()).get());
  obj = new OrcLazyFloat((OrcLazyFloat) row.getFieldValue(5));
  assertEquals(1.0f, ((FloatWritable) obj.materialize()).get());

  assertEquals(1.0,
      ((DoubleWritable) ((OrcLazyDouble) row.getFieldValue(6)).materialize()).get());
  obj = new OrcLazyDouble((OrcLazyDouble) row.getFieldValue(6));
  assertEquals(1.0, ((DoubleWritable) obj.materialize()).get());

  assertEquals(bytes(1), ((OrcLazyBinary) row.getFieldValue(7)).materialize());
  obj = new OrcLazyBinary((OrcLazyBinary) row.getFieldValue(7));
  assertEquals(bytes(1), obj.materialize());

  assertEquals("1", ((Text) ((OrcLazyString) row.getFieldValue(8)).materialize()).toString());
  obj = new OrcLazyString((OrcLazyString) row.getFieldValue(8));
  assertEquals("1", ((Text) obj.materialize()).toString());

  // Currently copies are not supported for complex types
}
 
Example 7
Source File: TestOrcFile.java    From hive-dwrf with Apache License 2.0 4 votes vote down vote up
@Test
/**
 * Tests a writing a stripe with a string column, which enters low memory mode before the first
 * index stride is complete.
 */
public void testStringEnterLowMemoryModeInFirstStride() throws Exception {
  ObjectInspector inspector;
  synchronized (TestOrcFile.class) {
    inspector = ObjectInspectorFactory.getReflectionObjectInspector
        (StringStruct.class,
            ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
  }
  MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
  ReaderWriterProfiler.setProfilerOptions(conf);
  Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
      1000000, CompressionKind.NONE, 100, 10000, memory);

  // Write 500 rows
  for (int i = 0; i < 500; i ++) {
    writer.addRow(new StringStruct(Integer.toString(i)));
  }

  // Force the writer to enter low memory mode, note since the stride length was set to 10000
  // we're still in the first stride
  memory.forceEnterLowMemoryMode();

  // Write 500 more rows
  for (int i = 0; i < 500; i ++) {
    writer.addRow(new StringStruct(Integer.toString(i + 500)));
  }

  writer.close();
  Reader reader = OrcFile.createReader(fs, testFilePath, conf);
  RecordReader rows = reader.rows(null);
  OrcLazyStruct lazyRow = null;
  OrcStruct row = null;
  for (int i = 0; i < 1000; i ++) {
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(Integer.toString(i),
        ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());
  }
  rows.close();
}
 
Example 8
Source File: TestFileDump.java    From hive-dwrf with Apache License 2.0 4 votes vote down vote up
private void testDictionary(Configuration conf, String expectedOutputFilename) throws Exception {
  ObjectInspector inspector;
  synchronized (TestOrcFile.class) {
    inspector = ObjectInspectorFactory.getReflectionObjectInspector
        (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
  }
  // Turn off using the approximate entropy heuristic to turn off dictionary encoding
  OrcConf.setFloatVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_KEY_STRING_SIZE_THRESHOLD, -1);
  ReaderWriterProfiler.setProfilerOptions(conf);
  Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
      100000, CompressionKind.SNAPPY, 10000, 10000, new MemoryManager(conf));
  Random r1 = new Random(1);
  String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
      "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
      "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
      "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
      "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
      "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
      "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
      "we", "had", "everything", "before", "us,", "we", "had", "nothing",
      "before", "us,", "we", "were", "all", "going", "direct", "to",
      "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
      "way"};
  int nextInt = 0;
  int nextNumIdx = 0;
  int numRows = 21000;
  List<Integer> intVals = new ArrayList<Integer>(words.length);
  List<Long> longVals = new ArrayList<Long>(words.length);

  for (int i=0; i < numRows; i++) {
    intVals.add(i);
    longVals.add((long)i + (long)Integer.MAX_VALUE);

  }
  Collections.shuffle(intVals, r1);
  Collections.shuffle(longVals, r1);

  for(int i=0; i < numRows; ++i) {
    // Write out the same string twice, this guarantees the fraction of rows with
    // distinct strings is 0.5
    if (i % 2 == 0) {
      nextInt = r1.nextInt(words.length);
      nextNumIdx = i;
      // Append the value of i to the word, this guarantees when an index or word is repeated
      // the actual string is unique.
      words[nextInt] += "-" + i;
    }
    writer.addRow(new MyRecord(intVals.get(nextNumIdx), longVals.get(nextNumIdx),
        words[nextInt]));
  }
  writer.close();
  checkOutput(expectedOutputFilename);
}
 
Example 9
Source File: TestOrcFile.java    From hive-dwrf with Apache License 2.0 4 votes vote down vote up
private RandomRowInputs writeRandomRowsWithNulls(int count, NumberOfNulls numNulls,
    boolean lowMemoryMode) throws IOException {
  ObjectInspector inspector;
  synchronized (TestOrcFile.class) {
    inspector = ObjectInspectorFactory.getReflectionObjectInspector
        (ReallyBigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
  }
  ReaderWriterProfiler.setProfilerOptions(conf);
  Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
      lowMemoryMode ? 200000 : 4000000, CompressionKind.ZLIB, 65536, 1000,
          new MemoryManager(conf));
  Random rand = new Random(42);
  RandomRowInputs inputs = new RandomRowInputs(count);
  long[] intValues = inputs.intValues;
  double[] doubleValues = inputs.doubleValues;
  String[] stringValues = inputs.stringValues;
  BytesWritable[] byteValues = inputs.byteValues;
  String[] words = inputs.words;
  for(int i=0; i < words.length; ++i) {
    words[i] = Integer.toHexString(rand.nextInt());
  }
  for(int i=0; i < count/2; ++i) {
    intValues[2*i] = rand.nextLong();
    intValues[2*i+1] = rand.nextLong();
    stringValues[2*i] = words[rand.nextInt(words.length)];
    stringValues[2*i+1] = words[rand.nextInt(words.length)];
  }
  for(int i=0; i < count; ++i) {
    doubleValues[i] = rand.nextDouble();
    byte[] buf = new byte[20];
    rand.nextBytes(buf);
    byteValues[i] = new BytesWritable(buf);
  }
  for(int i=0; i < count; ++i) {
    ReallyBigRow bigrow = createRandomRowWithNulls(intValues, doubleValues, stringValues,
        byteValues, words, i, numNulls);
    writer.addRow(bigrow);
  }
  writer.close();
  writer = null;
  return inputs;
}
 
Example 10
Source File: TestOrcFile.java    From hive-dwrf with Apache License 2.0 4 votes vote down vote up
@Test
/**
 * Tests a writing a stripe with a string column, which enters low memory mode just after the
 * second stride starts
 */
public void testStringEnterLowMemoryModeAfterStrideStart() throws Exception {
  ObjectInspector inspector;
  synchronized (TestOrcFile.class) {
    inspector = ObjectInspectorFactory.getReflectionObjectInspector
        (StringStruct.class,
            ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
  }
  MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
  ReaderWriterProfiler.setProfilerOptions(conf);
  Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
      1000000, CompressionKind.NONE, 100, 1000, memory);

  // Write 1000 rows (the first stride)
  for (int i = 0; i < 1001; i ++) {
    writer.addRow(new StringStruct(Integer.toString(i % 1000)));
  }

  // Force the writer to enter low memory mode, note since the stride length was set to 1000
  // we're just after starting the second stride
  memory.forceEnterLowMemoryMode();

  // Write 499 more rows (a portion of the second stride)
  for (int i = 1; i < 500; i ++) {
    writer.addRow(new StringStruct(Integer.toString(i)));
  }

  writer.close();
  Reader reader = OrcFile.createReader(fs, testFilePath, conf);
  RecordReader rows = reader.rows(null);
  OrcLazyStruct lazyRow = null;
  OrcStruct row = null;
  for (int i = 0; i < 1500; i ++) {
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(Integer.toString(i % 1000),
        ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());
  }
  rows.close();
}
 
Example 11
Source File: TestOrcFile.java    From hive-dwrf with Apache License 2.0 4 votes vote down vote up
@Test
public void testSeekAcrossChunks() throws Exception {
  ObjectInspector inspector;
  synchronized (TestOrcFile.class) {
    inspector = ObjectInspectorFactory.getReflectionObjectInspector
        (DoubleRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
  }

  // Create a table consisting of a single column of doubles
  // Add enough values to it to get 3 index strides (doubles are 8 bytes) more is ok
  // Note that the compression buffer size and index stride length are very important
  ReaderWriterProfiler.setProfilerOptions(conf);
  Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 2097152,
      CompressionKind.ZLIB, 262144, 10000);
  Random rand = new Random(42);
  double[] values = new double[131702];

  // The first compression block is all 0's
  for (int i = 0; i < 32768; i++) {
    values[i] = 0;
    writer.addRow(new DoubleRow(values[i]));
  }

  // The second compression block is random doubles
  for (int i = 0; i < 32768; i++) {
    values[i + 32768] = rand.nextDouble();
    writer.addRow(new DoubleRow(values[i + 32768]));
  }

  // The third compression block is all 0's
  // (important so it compresses to the same size as the first)
  for (int i = 0; i < 32768; i++) {
    values[i + 32768 + 32768] = 0;
    writer.addRow(new DoubleRow(values[i + 32768 + 32768]));
  }

  // The fourth compression block is random
  for (int i = 0; i < 32768; i++) {
    values[i + 32768 + 32768 + 32768] = rand.nextDouble();
    writer.addRow(new DoubleRow(values[i + 32768 + 32768 + 32768]));
  }

  writer.close();
  OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_READ_COMPRESSION_STRIDES, 2);
  OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_EAGER_HDFS_READ, false);
  Reader reader = OrcFile.createReader(fs, testFilePath, conf);

  StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
  List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
  DoubleObjectInspector columnInspector =
      (DoubleObjectInspector) fields.get(0).getFieldObjectInspector();

  RecordReader rows = reader.rows(null);
  Object row = null;

  // Skip enough values to get to the 2nd index stride in the first chunk
  for (int i = 0; i < 40001; i++) {
    row = rows.next(row);
  }

  // This will set previousOffset to be the size of the first compression block and the
  // compressionOffset to some other value (doesn't matter what point is it's different from the
  // start of the compression block)
  assertEquals(values[40000], columnInspector.get(readerInspector.getStructFieldData(row,
      fields.get(0))));

  // Skip enough values to get to the 2nd index stride of the second chunk
  for (int i = 0; i < 80000; i++) {
    rows.next(row);
  }

  // When seek is called, previousOffset will equal newCompressedOffset since the former is the
  // the length of the first compression block and the latter is the length of the third
  // compression block (remember the chunks contain 2 index strides), so if we only check this
  // (or for some other reason) we will not adjust compressedIndex, we will read the wrong data
  assertEquals(values[120000], columnInspector.get(readerInspector.getStructFieldData(row, fields.get(0))));

  rows.close();
}
 
Example 12
Source File: TestOrcFile.java    From hive-dwrf with Apache License 2.0 4 votes vote down vote up
@Test
/**
 * Tests a writing a stripe with a stride dictionary, followed by a stripe without
 * followed by a stripe with.
 */
public void testEmptyInIntDictionaryStream() throws Exception {
  ObjectInspector inspector;
  synchronized (TestOrcFile.class) {
    inspector = ObjectInspectorFactory.getReflectionObjectInspector
        (IntStruct.class,
            ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
  }
  ReaderWriterProfiler.setProfilerOptions(conf);
  WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
      inspector, 1000000, CompressionKind.NONE, 100, 1000, new MemoryManager(conf));
  writer.addRow(new IntStruct(1));
  writer.addRow(new IntStruct(2));
  writer.addRow(new IntStruct(3));
  for (int i = 0; i < 997; i++) {
    writer.addRow(new IntStruct(123));
  }
  writer.forceFlushStripe();
  for (int i = 0; i < 1000; i++) {
    writer.addRow(new IntStruct(123));
  }
  writer.forceFlushStripe();
  writer.addRow(new IntStruct(1));
  writer.addRow(new IntStruct(2));
  writer.addRow(new IntStruct(3));
  for (int i = 0; i < 997; i++) {
    writer.addRow(new IntStruct(123));
  }
  writer.close();
  Reader reader = OrcFile.createReader(fs, testFilePath, conf);
  RecordReader rows = reader.rows(null);
  OrcLazyStruct lazyRow = null;
  OrcStruct row = null;
  lazyRow = (OrcLazyStruct) rows.next(lazyRow);
  row = (OrcStruct) lazyRow.materialize();
  assertEquals(1, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
  rows.next(lazyRow);
  assertEquals(2, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
  rows.next(lazyRow);
  assertEquals(3, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
  for (int i =0; i < 997; i++) {
    rows.next(lazyRow);
    assertEquals(123, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
  }
  for (int i =0; i < 1000; i++) {
    rows.next(lazyRow);
    assertEquals(123, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
  }
  rows.next(lazyRow);
  assertEquals(1, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
  rows.next(lazyRow);
  assertEquals(2, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
  rows.next(lazyRow);
  assertEquals(3, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
  for (int i =0; i < 997; i++) {
    rows.next(lazyRow);
    assertEquals(123, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
  }
}
 
Example 13
Source File: TestOrcFile.java    From hive-dwrf with Apache License 2.0 4 votes vote down vote up
@Test
/**
 * Tests a writing a stripe with an int column, which enters low memory mode before the second
 * index stride is complete, and does not complete that stride.
 */
public void testIntEnterLowMemoryModeInSecondStride() throws Exception {
  ObjectInspector inspector;
  synchronized (TestOrcFile.class) {
    inspector = ObjectInspectorFactory.getReflectionObjectInspector
        (IntStruct.class,
            ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
  }
  MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
  ReaderWriterProfiler.setProfilerOptions(conf);
  Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
      1000000, CompressionKind.NONE, 100, 1000, memory);

  // Write 1000 rows (the first stride)
  for (int i = 0; i < 1000; i ++) {
    writer.addRow(new IntStruct(i));
  }

  // Write 250 more rows (a portion of the second stride)
  for (int i = 0; i < 250; i ++) {
    writer.addRow(new IntStruct(i));
  }

  // Force the writer to enter low memory mode, note since the stride length was set to 1000
  // we're still in the second stride
  memory.forceEnterLowMemoryMode();

  // Write 250 more rows (which still gets written to the second stride, but not enough to fill
  // it)
  for (int i = 0; i < 250; i ++) {
    writer.addRow(new IntStruct(i + 250));
  }

  writer.close();
  Reader reader = OrcFile.createReader(fs, testFilePath, conf);
  RecordReader rows = reader.rows(null);
  OrcLazyStruct lazyRow = null;
  OrcStruct row = null;
  for (int i = 0; i < 1500; i ++) {
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(i % 1000,
        ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
  }
  rows.close();
}
 
Example 14
Source File: EthereumUDFTest.java    From hadoopcryptoledger with Apache License 2.0 4 votes vote down vote up
@Test
 public void EthereumGetTransactionHashUDFWritable() throws HiveException, IOException, EthereumBlockReadException {
  // initialize object inspector
  EthereumGetTransactionHashUDF egthUDF = new EthereumGetTransactionHashUDF();
	ObjectInspector[] arguments = new ObjectInspector[1];
	arguments[0] =  ObjectInspectorFactory.getReflectionObjectInspector(HiveEthereumTransaction.class,ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
	egthUDF.initialize(arguments);	
	// load test data
  ClassLoader classLoader = getClass().getClassLoader();
	String fileName="eth1346406.bin";
	String fileNameBlock=classLoader.getResource(fileName).getFile();	
	File file = new File(fileNameBlock);
	boolean direct=false;
	FileInputStream fin = new FileInputStream(file);
	EthereumBlockReader ebr = null;
	try {
		ebr = new EthereumBlockReader(fin,this.DEFAULT_MAXSIZE_ETHEREUMBLOCK, this.DEFAULT_BUFFERSIZE,direct);
		EthereumBlock eblock = ebr.readBlock();
		List<EthereumTransaction> eTrans = eblock.getEthereumTransactions();
		// validate UDFs
		HiveEthereumTransaction trans0 = EthereumUDFTest.convertToHiveEthereumTransaction(eTrans.get(0));
		byte[] expectedHash = new byte[] {(byte)0xe2,(byte)0x7e,(byte)0x92,(byte)0x88,(byte)0xe2,(byte)0x9c,(byte)0xc8,(byte)0xeb,(byte)0x78,(byte)0xf9,(byte)0xf7,(byte)0x68,(byte)0xd8,(byte)0x9b,(byte)0xf1,(byte)0xcd,(byte)0x4b,(byte)0x68,(byte)0xb7,(byte)0x15,(byte)0xa3,(byte)0x8b,(byte)0x95,(byte)0xd4,(byte)0x6d,(byte)0x77,(byte)0x86,(byte)0x18,(byte)0xcb,(byte)0x10,(byte)0x4d,(byte)0x58};
		assertArrayEquals(expectedHash,((BytesWritable)egthUDF.evaluate(new GenericUDF.DeferredObject[] {new GenericUDF.DeferredJavaObject(trans0)})).copyBytes(),"Block 1346406 Transaction 1 hash is correctly calculated");
		HiveEthereumTransaction trans1 = EthereumUDFTest.convertToHiveEthereumTransaction(eTrans.get(1));
		expectedHash = new byte[] {(byte)0x7a,(byte)0x23,(byte)0x2a,(byte)0xa2,(byte)0xae,(byte)0x6a,(byte)0x5e,(byte)0x1f,(byte)0x32,(byte)0xca,(byte)0x3a,(byte)0xc9,(byte)0x3f,(byte)0x4f,(byte)0xdb,(byte)0x77,(byte)0x98,(byte)0x3e,(byte)0x93,(byte)0x2b,(byte)0x38,(byte)0x09,(byte)0x93,(byte)0x56,(byte)0x44,(byte)0x42,(byte)0x08,(byte)0xc6,(byte)0x9d,(byte)0x40,(byte)0x86,(byte)0x81};
		assertArrayEquals(expectedHash,((BytesWritable)egthUDF.evaluate(new GenericUDF.DeferredObject[] {new GenericUDF.DeferredJavaObject(trans1)})).copyBytes(),"Block 1346406 Transaction 2 hash is correctly calculated");
		HiveEthereumTransaction trans2 = EthereumUDFTest.convertToHiveEthereumTransaction(eTrans.get(2));
		expectedHash = new byte[] {(byte)0x14,(byte)0x33,(byte)0xe3,(byte)0xcb,(byte)0x66,(byte)0x2f,(byte)0x66,(byte)0x8d,(byte)0x87,(byte)0xb8,(byte)0x35,(byte)0x55,(byte)0x34,(byte)0x5a,(byte)0x20,(byte)0xcc,(byte)0xf8,(byte)0x70,(byte)0x6f,(byte)0x25,(byte)0x21,(byte)0x49,(byte)0x18,(byte)0xe2,(byte)0xf8,(byte)0x1f,(byte)0xe3,(byte)0xd2,(byte)0x1c,(byte)0x9d,(byte)0x5b,(byte)0x23};
		assertArrayEquals(expectedHash,((BytesWritable)egthUDF.evaluate(new GenericUDF.DeferredObject[] {new GenericUDF.DeferredJavaObject(trans2)})).copyBytes(),"Block 1346406 Transaction 3 hash is correctly calculated");
		HiveEthereumTransaction trans3 = EthereumUDFTest.convertToHiveEthereumTransaction(eTrans.get(3));
		expectedHash = new byte[] {(byte)0x39,(byte)0x22,(byte)0xf7,(byte)0xf6,(byte)0x0a,(byte)0x33,(byte)0xa1,(byte)0x2d,(byte)0x13,(byte)0x9d,(byte)0x67,(byte)0xfa,(byte)0x53,(byte)0x30,(byte)0xdb,(byte)0xfd,(byte)0xba,(byte)0x42,(byte)0xa4,(byte)0xb7,(byte)0x67,(byte)0x29,(byte)0x6e,(byte)0xff,(byte)0x64,(byte)0x15,(byte)0xee,(byte)0xa3,(byte)0x2d,(byte)0x8a,(byte)0x7b,(byte)0x2b};
		assertArrayEquals(expectedHash,((BytesWritable)egthUDF.evaluate(new GenericUDF.DeferredObject[] {new GenericUDF.DeferredJavaObject(trans3)})).copyBytes(),"Block 1346406 Transaction 4 hash is correctly calculated");
		HiveEthereumTransaction trans4 = EthereumUDFTest.convertToHiveEthereumTransaction(eTrans.get(4));
		expectedHash = new byte[] {(byte)0xbb,(byte)0x7c,(byte)0xaa,(byte)0x23,(byte)0x38,(byte)0x5a,(byte)0x0f,(byte)0x73,(byte)0x75,(byte)0x3f,(byte)0x9e,(byte)0x28,(byte)0xd8,(byte)0xf0,(byte)0x60,(byte)0x2f,(byte)0xe2,(byte)0xe7,(byte)0x2d,(byte)0x87,(byte)0xe1,(byte)0xe0,(byte)0x95,(byte)0x52,(byte)0x75,(byte)0x28,(byte)0xd1,(byte)0x44,(byte)0x88,(byte)0x5d,(byte)0x6b,(byte)0x51};
		assertArrayEquals(expectedHash,((BytesWritable)egthUDF.evaluate(new GenericUDF.DeferredObject[] {new GenericUDF.DeferredJavaObject(trans4)})).copyBytes(),"Block 1346406 Transaction 5 hash is correctly calculated");
		HiveEthereumTransaction trans5 = EthereumUDFTest.convertToHiveEthereumTransaction(eTrans.get(5));
		expectedHash = new byte[] {(byte)0xbc,(byte)0xde,(byte)0x6f,(byte)0x49,(byte)0x84,(byte)0x2c,(byte)0x6d,(byte)0x73,(byte)0x8d,(byte)0x64,(byte)0x32,(byte)0x8f,(byte)0x78,(byte)0x09,(byte)0xb1,(byte)0xd4,(byte)0x9b,(byte)0xf0,(byte)0xff,(byte)0x3f,(byte)0xfa,(byte)0x46,(byte)0x0f,(byte)0xdd,(byte)0xd2,(byte)0x7f,(byte)0xd4,(byte)0x2b,(byte)0x7a,(byte)0x01,(byte)0xfc,(byte)0x9a};
		assertArrayEquals(expectedHash,((BytesWritable)egthUDF.evaluate(new GenericUDF.DeferredObject[] {new GenericUDF.DeferredJavaObject(trans5)})).copyBytes(),"Block 1346406 Transaction 6 hash is correctly calculated");
		    
}
	 finally {
		if (ebr!=null) {
			ebr.close();
		}
	}

 }
 
Example 15
Source File: EthereumUDFTest.java    From hadoopcryptoledger with Apache License 2.0 4 votes vote down vote up
@Test
 public void EthereumGetSendAddressUDFObjectInspector() throws HiveException, IOException, EthereumBlockReadException {
  // initialize object inspector
  EthereumGetSendAddressUDF egsaUDF = new EthereumGetSendAddressUDF();
	ObjectInspector[] arguments = new ObjectInspector[2];
	arguments[0] =  ObjectInspectorFactory.getReflectionObjectInspector(TestEthereumTransaction.class,ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
	arguments[1] = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
	egsaUDF.initialize(arguments);	
	// load test data
  ClassLoader classLoader = getClass().getClassLoader();
	String fileName="eth1346406.bin";
	String fileNameBlock=classLoader.getResource(fileName).getFile();	
	File file = new File(fileNameBlock);
	boolean direct=false;
	FileInputStream fin = new FileInputStream(file);
	EthereumBlockReader ebr = null;
	try {
		ebr = new EthereumBlockReader(fin,this.DEFAULT_MAXSIZE_ETHEREUMBLOCK, this.DEFAULT_BUFFERSIZE,direct);
		EthereumBlock eblock = ebr.readBlock();
		List<EthereumTransaction> eTrans = eblock.getEthereumTransactions();
		// validate UDFs
		EthereumTransaction transOrig0 = eTrans.get(0);
		TestEthereumTransaction trans0 = new TestEthereumTransaction();
		trans0.set(transOrig0);
	      byte[] expectedSentAddress = new byte[] {(byte)0x39,(byte)0x42,(byte)0x4b,(byte)0xd2,(byte)0x8a,(byte)0x22,(byte)0x23,(byte)0xda,(byte)0x3e,(byte)0x14,(byte)0xbf,(byte)0x79,(byte)0x3c,(byte)0xf7,(byte)0xf8,(byte)0x20,(byte)0x8e,(byte)0xe9,(byte)0x98,(byte)0x0a};
	      assertArrayEquals(expectedSentAddress,((BytesWritable)egsaUDF.evaluate(new GenericUDF.DeferredObject[] {new GenericUDF.DeferredJavaObject(trans0),new GenericUDF.DeferredJavaObject(new IntWritable(1))})).copyBytes(),"Block 1346406 Transaction 1 send address is correctly calculated");
			EthereumTransaction transOrig1 = eTrans.get(1);
			TestEthereumTransaction trans1 = new TestEthereumTransaction();
			trans1.set(transOrig1);
	      expectedSentAddress = new byte[] {(byte)0x4b,(byte)0xb9,(byte)0x60,(byte)0x91,(byte)0xee,(byte)0x9d,(byte)0x80,(byte)0x2e,(byte)0xd0,(byte)0x39,(byte)0xc4,(byte)0xd1,(byte)0xa5,(byte)0xf6,(byte)0x21,(byte)0x6f,(byte)0x90,(byte)0xf8,(byte)0x1b,(byte)0x01};
	      assertArrayEquals(expectedSentAddress,((BytesWritable)egsaUDF.evaluate(new GenericUDF.DeferredObject[] {new GenericUDF.DeferredJavaObject(trans1),new GenericUDF.DeferredJavaObject(new IntWritable(1))})).copyBytes(),"Block 1346406 Transaction 2 send address is correctly calculated");
			EthereumTransaction transOrig2 = eTrans.get(2);
			TestEthereumTransaction trans2 = new TestEthereumTransaction();
			trans2.set(transOrig2);
	      expectedSentAddress = new byte[] {(byte)0x63,(byte)0xa9,(byte)0x97,(byte)0x5b,(byte)0xa3,(byte)0x1b,(byte)0x0b,(byte)0x96,(byte)0x26,(byte)0xb3,(byte)0x43,(byte)0x00,(byte)0xf7,(byte)0xf6,(byte)0x27,(byte)0x14,(byte)0x7d,(byte)0xf1,(byte)0xf5,(byte)0x26};
	      assertArrayEquals(expectedSentAddress,((BytesWritable)egsaUDF.evaluate(new GenericUDF.DeferredObject[] {new GenericUDF.DeferredJavaObject(trans2),new GenericUDF.DeferredJavaObject(new IntWritable(1))})).copyBytes(),"Block 1346406 Transaction 3 send address is correctly calculated");
			EthereumTransaction transOrig3 = eTrans.get(3);
			TestEthereumTransaction trans3 = new TestEthereumTransaction();
			trans3.set(transOrig3);
	      expectedSentAddress = new byte[] {(byte)0x63,(byte)0xa9,(byte)0x97,(byte)0x5b,(byte)0xa3,(byte)0x1b,(byte)0x0b,(byte)0x96,(byte)0x26,(byte)0xb3,(byte)0x43,(byte)0x00,(byte)0xf7,(byte)0xf6,(byte)0x27,(byte)0x14,(byte)0x7d,(byte)0xf1,(byte)0xf5,(byte)0x26};
	     assertArrayEquals(expectedSentAddress,((BytesWritable)egsaUDF.evaluate(new GenericUDF.DeferredObject[] {new GenericUDF.DeferredJavaObject(trans3),new GenericUDF.DeferredJavaObject(new IntWritable(1))})).copyBytes(),"Block 1346406 Transaction 4 send address is correctly calculated");
			EthereumTransaction transOrig4 = eTrans.get(4);
			TestEthereumTransaction trans4 = new TestEthereumTransaction();
			trans4.set(transOrig4);
	      expectedSentAddress = new byte[] {(byte)0x63,(byte)0xa9,(byte)0x97,(byte)0x5b,(byte)0xa3,(byte)0x1b,(byte)0x0b,(byte)0x96,(byte)0x26,(byte)0xb3,(byte)0x43,(byte)0x00,(byte)0xf7,(byte)0xf6,(byte)0x27,(byte)0x14,(byte)0x7d,(byte)0xf1,(byte)0xf5,(byte)0x26};
	      assertArrayEquals(expectedSentAddress,((BytesWritable)egsaUDF.evaluate(new GenericUDF.DeferredObject[] {new GenericUDF.DeferredJavaObject(trans4),new GenericUDF.DeferredJavaObject(new IntWritable(1))})).copyBytes(),"Block 1346406 Transaction 5 send address is correctly calculated");
			EthereumTransaction transOrig5 = eTrans.get(5);
			TestEthereumTransaction trans5 = new TestEthereumTransaction();
			trans5.set(transOrig5);
	      expectedSentAddress = new byte[] {(byte)0x63,(byte)0xa9,(byte)0x97,(byte)0x5b,(byte)0xa3,(byte)0x1b,(byte)0x0b,(byte)0x96,(byte)0x26,(byte)0xb3,(byte)0x43,(byte)0x00,(byte)0xf7,(byte)0xf6,(byte)0x27,(byte)0x14,(byte)0x7d,(byte)0xf1,(byte)0xf5,(byte)0x26};
	      assertArrayEquals(expectedSentAddress,((BytesWritable)egsaUDF.evaluate(new GenericUDF.DeferredObject[] {new GenericUDF.DeferredJavaObject(trans5),new GenericUDF.DeferredJavaObject(new IntWritable(1))})).copyBytes(),"Block 1346406 Transaction 6 send address is correctly calculated");
	      
}
	 finally {
		if (ebr!=null) {
			ebr.close();
		}
	}

 }
 
Example 16
Source File: TestOrcFile.java    From hive-dwrf with Apache License 2.0 4 votes vote down vote up
@Test
/**
 * Tests a writing a stripe with a string column, which enters low memory mode before the second
 * index stride is complete, and does not complete that stride.
 */
public void testStringEnterLowMemoryModeInSecondStride() throws Exception {
  ObjectInspector inspector;
  synchronized (TestOrcFile.class) {
    inspector = ObjectInspectorFactory.getReflectionObjectInspector
        (StringStruct.class,
            ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
  }
  MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
  ReaderWriterProfiler.setProfilerOptions(conf);
  Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
      1000000, CompressionKind.NONE, 100, 1000, memory);

  // Write 1000 rows (the first stride)
  for (int i = 0; i < 1000; i ++) {
    writer.addRow(new StringStruct(Integer.toString(i)));
  }

  // Write 250 more rows (a portion of the second stride)
  for (int i = 0; i < 250; i ++) {
    writer.addRow(new StringStruct(Integer.toString(i)));
  }

  // Force the writer to enter low memory mode, note since the stride length was set to 1000
  // we're still in the second stride
  memory.forceEnterLowMemoryMode();

  // Write 250 more rows (which still gets written to the second stride, but not enough to fill
  // it)
  for (int i = 0; i < 250; i ++) {
    writer.addRow(new StringStruct(Integer.toString(i + 250)));
  }

  writer.close();
  Reader reader = OrcFile.createReader(fs, testFilePath, conf);
  RecordReader rows = reader.rows(null);
  OrcLazyStruct lazyRow = null;
  OrcStruct row = null;
  for (int i = 0; i < 1500; i ++) {
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(Integer.toString(i % 1000),
        ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());
  }
  rows.close();
}
 
Example 17
Source File: TestOrcFile.java    From hive-dwrf with Apache License 2.0 4 votes vote down vote up
@Test
/**
 * Tests writing a stripe containing a string column, which is not dictionary encoded in the
 * first stripe, this is carried over to the third stripe, then dictionary encoding is turned
 * back on.  This will cause the dictionary to be nulled out, then reinitialized.
 */
public void testStrideDictionariesWithoutStripeCarryover() throws Exception {
  ObjectInspector inspector;
  synchronized (TestOrcFile.class) {
    inspector = ObjectInspectorFactory.getReflectionObjectInspector
        (StringStruct.class,
            ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
  }
  ReaderWriterProfiler.setProfilerOptions(conf);
  OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
  OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2);
  OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
  OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_SORT_KEYS, true);
  WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
      inspector, 1000000, CompressionKind.NONE, 100, 1000, new MemoryManager(conf));
  // Write a stripe which is not dictionary encoded
  for (int i = 0; i < 2000; i++) {
    writer.addRow(new StringStruct(Integer.toString(i)));
  }
  writer.forceFlushStripe();
  // Write another stripe (doesn't matter what)
  for (int i = 0; i < 2000; i++) {
    writer.addRow(new StringStruct(Integer.toString(i)));
  }
  writer.forceFlushStripe();
  // Write a stripe which will be dictionary encoded
  // Note: it is important that this string is lexicographically after the string in the next
  // index stride.  This way, if sorting by index strides is not working, this value will appear
  // after the next one, though it should appear before, yielding incorrect results.
  writer.addRow(new StringStruct("b"));
  for (int i = 0; i < 999; i++) {
    writer.addRow(new StringStruct("123"));
  }
  writer.addRow(new StringStruct("a"));
  for (int i = 0; i < 999; i++) {
    writer.addRow(new StringStruct("123"));
  }
  writer.forceFlushStripe();
  writer.close();
  Reader reader = OrcFile.createReader(fs, testFilePath, conf);
  RecordReader rows = reader.rows(null);
  OrcLazyStruct lazyRow = null;
  OrcStruct row = null;
  lazyRow = (OrcLazyStruct) rows.next(lazyRow);
  row = (OrcStruct) lazyRow.materialize();
  for (int i =0; i < 4000; i++) {
    assertEquals(Integer.toString(i % 2000), ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    rows.next(lazyRow);
  }
  assertEquals("b", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
  for (int i =0; i < 999; i++) {
    rows.next(lazyRow);
    assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
  }
  rows.next(lazyRow);
  assertEquals("a", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
  for (int i =0; i < 999; i++) {
    rows.next(lazyRow);
    assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
  }
}
 
Example 18
Source File: TestOrcFile.java    From hive-dwrf with Apache License 2.0 4 votes vote down vote up
@Test
/**
 * Tests writing a stripe with a string column, which doesn't do dictionary encoding, then
 * re-evaluates whether it should do dictionary encoding or not.  While it's re-evaluating, it
 * enters low memory mode.
 */
public void testStringEnterLowMemoryModeAndOnNotCarriedOverStripe() throws Exception {
  ObjectInspector inspector;
  synchronized (TestOrcFile.class) {
    inspector = ObjectInspectorFactory.getReflectionObjectInspector
        (StringStruct.class,
            ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
  }
  // Reevaluate if we should use dictionary encoding on every stripe
  OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 1);
  MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
  ReaderWriterProfiler.setProfilerOptions(conf);
  WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
      inspector, 1000000, CompressionKind.NONE, 100, 10000, memory);

  // Write 500 rows, they wil be directly encoded
  for (int i = 0; i < 1000; i ++) {
    writer.addRow(new StringStruct(Integer.toString(i)));
  }

  // Flush the first stripe
  writer.forceFlushStripe();

  // Write 500 more rows
  for (int i = 0; i < 500; i ++) {
    writer.addRow(new StringStruct(Integer.toString(i)));
  }

  // Force the writer to enter low memory mode
  memory.forceEnterLowMemoryMode();

  // Write 500 more rows
  for (int i = 0; i < 500; i ++) {
    writer.addRow(new StringStruct(Integer.toString(i + 500)));
  }

  writer.close();
  Reader reader = OrcFile.createReader(fs, testFilePath, conf);
  RecordReader rows = reader.rows(null);
  OrcLazyStruct lazyRow = null;
  OrcStruct row = null;
  for (int i = 0; i < 2000; i ++) {
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(Integer.toString(i % 1000),
        ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());
  }
  rows.close();
}
 
Example 19
Source File: TestOrcFile.java    From hive-dwrf with Apache License 2.0 4 votes vote down vote up
@Test
/**
 * Tests a writing a stripe with an integer column, which enters low memory mode before the first
 * index stride is complete.
 */
public void testIntEnterLowMemoryModeInFirstStride() throws Exception {
  ObjectInspector inspector;
  synchronized (TestOrcFile.class) {
    inspector = ObjectInspectorFactory.getReflectionObjectInspector
        (IntStruct.class,
            ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
  }
  MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
  ReaderWriterProfiler.setProfilerOptions(conf);
  Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
      1000000, CompressionKind.NONE, 100, 10000, memory);

  // Write 500 rows
  for (int i = 0; i < 500; i ++) {
    writer.addRow(new IntStruct(i));
  }

  // Force the writer to enter low memory mode, note since the stride length was set to 10000
  // we're still in the first stride
  memory.forceEnterLowMemoryMode();

  // Write 500 more rows
  for (int i = 0; i < 500; i ++) {
    writer.addRow(new IntStruct(i + 500));
  }

  writer.close();
  Reader reader = OrcFile.createReader(fs, testFilePath, conf);
  RecordReader rows = reader.rows(null);
  OrcLazyStruct lazyRow = null;
  OrcStruct row = null;
  for (int i = 0; i < 1000; i ++) {
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(i, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
  }
  rows.close();
}
 
Example 20
Source File: HdfsHelper.java    From DataLink with Apache License 2.0 4 votes vote down vote up
/**
 * 根据writer配置的字段类型,构建inspector
 * @param columns
 * @return
 */
public List<ObjectInspector>  getColumnTypeInspectors(List<Configuration> columns){
    List<ObjectInspector>  columnTypeInspectors = Lists.newArrayList();
    for (Configuration eachColumnConf : columns) {
        SupportHiveDataType columnType = SupportHiveDataType.valueOf(eachColumnConf.getString(Key.TYPE).toUpperCase());
        ObjectInspector objectInspector = null;
        switch (columnType) {
            case TINYINT:
                objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Byte.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
                break;
            case SMALLINT:
                objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Short.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
                break;
            case INT:
                objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
                break;
            case BIGINT:
                objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
                break;
            case FLOAT:
                objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Float.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
                break;
            case DOUBLE:
                objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Double.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
                break;
            case DECIMAL://decimal,added by lubiao
            	objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(HiveDecimal.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
            	break;
            case BINARY://binary,added by luibao
            	objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(BytesWritable.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
            	break;
            case TIMESTAMP:
                objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(java.sql.Timestamp.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
                break;
            case DATE:
                objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(java.sql.Date.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
                break;
            case STRING:
            case VARCHAR:
            case CHAR:
                objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(String.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
                break;
            case BOOLEAN:
                objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Boolean.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
                break;
            default:
                ErrorRecord.addError(String.format(
                        "您的配置文件中的列配置信息有误. 因为DataX 不支持数据库写入这种字段类型. 字段名:[%s], 字段类型:[%d]. 请修改表中该字段的类型或者不同步该字段.",
                        eachColumnConf.getString(Key.NAME),
                        eachColumnConf.getString(Key.TYPE)));
                throw DataXException
                        .asDataXException(
                                HdfsWriterErrorCode.ILLEGAL_VALUE,
                                String.format(
                                        "您的配置文件中的列配置信息有误. 因为DataX 不支持数据库写入这种字段类型. 字段名:[%s], 字段类型:[%d]. 请修改表中该字段的类型或者不同步该字段.",
                                        eachColumnConf.getString(Key.NAME),
                                        eachColumnConf.getString(Key.TYPE)));
        }

        columnTypeInspectors.add(objectInspector);
    }
    return columnTypeInspectors;
}