org.apache.arrow.vector.dictionary.DictionaryProvider Java Examples

The following examples show how to use org.apache.arrow.vector.dictionary.DictionaryProvider. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: ArrowWrite.java From ArrowExample with Apache License 2.0

5 votes

public void setupWrite(String filename, boolean useCustom) throws Exception {
    File arrowFile = validateFile(filename, false);
    this.fileOutputStream = new FileOutputStream(arrowFile);
    Schema schema = makeSchema();
    this.root = VectorSchemaRoot.create(schema, this.ra);
    DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
    if (!useCustom) {
        /* default java implementation of the channel */
        this.arrowFileWriter = new ArrowFileWriter(root,
                provider,
                this.fileOutputStream.getChannel());
    } else {
        /* custom channel implementation in ArrowOutputStream */
        this.arrowFileWriter = new ArrowFileWriter(root,
                provider,
                new ArrowOutputStream(this.fileOutputStream));
    }

    if (false) {
        // show some stuff about the schema and layout
        for (Field field : root.getSchema().getFields()) {
            FieldVector vector = root.getVector(field.getName());
            showFieldLayout(field, vector);
        }
    }
    System.out.println("Generated " + this.entries + " data entries , batch size " + batchSize + " usingCustomWriter: " + useCustom + " useNullValues " + this.useNullValues);
}

Example #2

Source File: Twister2ArrowFileWriter.java From twister2 with Apache License 2.0

5 votes

public boolean setUpTwister2ArrowWrite(int workerId) throws Exception {
  LOG.fine("%%%%%%%%% worker id details:" + workerId + "\t" + arrowFile);
  this.root = VectorSchemaRoot.create(Schema.fromJSON(arrowSchema), this.rootAllocator);
  Path path = new Path(arrowFile);
  this.fileSystem = FileSystemUtils.get(path);
  this.fsDataOutputStream = fileSystem.create(path);
  this.twister2ArrowOutputStream = new Twister2ArrowOutputStream(this.fsDataOutputStream);
  DictionaryProvider.MapDictionaryProvider provider
      = new DictionaryProvider.MapDictionaryProvider();
  if (!flag) {
    this.arrowFileWriter = new ArrowFileWriter(root, provider,
        this.fsDataOutputStream.getChannel());
  } else {
    this.arrowFileWriter = new ArrowFileWriter(root, provider, this.twister2ArrowOutputStream);
  }

  LOG.info("root schema fields:" + root.getSchema().getFields());
  for (Field field : root.getSchema().getFields()) {
    FieldVector vector = root.getVector(field.getName());
    if (vector.getMinorType().equals(Types.MinorType.INT)) {
      this.generatorMap.put(vector, new IntVectorGenerator());
    } else if (vector.getMinorType().equals(Types.MinorType.BIGINT)) {
      this.generatorMap.put(vector, new BigIntVectorGenerator());
    } else if (vector.getMinorType().equals(Types.MinorType.FLOAT4)) {
      this.generatorMap.put(vector, new FloatVectorGenerator());
    } else {
      throw new RuntimeException("unsupported arrow write type");
    }
  }
  return true;
}

Example #3

Source File: SFArrowResultSetIT.java From snowflake-jdbc with Apache License 2.0

5 votes

private File createArrowFile(String fileName, Schema schema, Object[][] data,
                             int rowsPerRecordBatch)
throws IOException
{
  File file = resultFolder.newFile(fileName);
  VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);

  try (ArrowWriter writer = new ArrowStreamWriter(
      root, new DictionaryProvider.MapDictionaryProvider(),
      new FileOutputStream(file)))
  {
    writer.start();

    for (int i = 0; i < data[0].length; )
    {
      int rowsToAppend = Math.min(rowsPerRecordBatch, data[0].length - i);
      root.setRowCount(rowsToAppend);

      for (int j = 0; j < data.length; j++)
      {
        FieldVector vector = root.getFieldVectors().get(j);

        switch (vector.getMinorType())
        {
          case INT:
            writeIntToField(vector, data[j], i, rowsToAppend);
            break;
        }
      }

      writer.writeBatch();
      i += rowsToAppend;
    }
  }

  return file;
}

Example #4

Source File: ArrowRead.java From ArrowExample with Apache License 2.0

4 votes

public void makeRead(String filename) throws Exception {
    File arrowFile = validateFile(filename, true);
    FileInputStream fileInputStream = new FileInputStream(arrowFile);
    DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();

    ArrowFileReader arrowFileReader = new ArrowFileReader(new SeekableReadChannel(fileInputStream.getChannel()),
            this.ra);
    System.out.println("\nReading the arrow file : " + filename);
    VectorSchemaRoot root  = arrowFileReader.getVectorSchemaRoot();
    System.out.println("File size : " + arrowFile.length() +
            " schema is "  + root.getSchema().toString());

    List<ArrowBlock> arrowBlocks = arrowFileReader.getRecordBlocks();
    System.out.println("Number of arrow blocks are " + arrowBlocks.size());
    for (int i = 0; i < arrowBlocks.size(); i++) {
        ArrowBlock rbBlock = arrowBlocks.get(i);
        if (!arrowFileReader.loadRecordBatch(rbBlock)) {
            throw new IOException("Expected to read record batch");
        }
        System.out.println("\t["+i+"] ArrowBlock, offset: " + rbBlock.getOffset() +
                ", metadataLength: " + rbBlock.getMetadataLength() +
                ", bodyLength " + rbBlock.getBodyLength());
        /* we can now process this block, it is now loaded */
        System.out.println("\t["+i+"] row count for this block is " + root.getRowCount());
        List<FieldVector> fieldVector = root.getFieldVectors();
        System.out.println("\t["+i+"] number of fieldVectors (corresponding to columns) : " + fieldVector.size());
        for(int j = 0; j < fieldVector.size(); j++){
            Types.MinorType mt = fieldVector.get(j).getMinorType();
            switch(mt){
                case INT: showIntAccessor(fieldVector.get(j)); break;
                case BIGINT: showBigIntAccessor(fieldVector.get(j)); break;
                case VARBINARY: showVarBinaryAccessor(fieldVector.get(j)); break;
                case FLOAT4: showFloat4Accessor(fieldVector.get(j));break;
                case FLOAT8: showFloat8Accessor(fieldVector.get(j));break;
                default: throw new Exception(" MinorType " + mt);
            }
            //showAccessor(fieldVector.get(j).getAccessor());
            //System.out.println("\t["+i+"] accessor " + j + " | " + getAccessorString(accessor));
        }
    }
    System.out.println("Done processing the file");
    arrowFileReader.close();
    long s1 = this.intCsum + this.longCsum + this.arrCsum + this.floatCsum;
    System.out.println("intSum " + intCsum + " longSum " + longCsum + " arrSum " + arrCsum + " floatSum " + floatCsum + " = " + s1);
    System.err.println("Colsum Checksum > " + this.checkSumx + " , difference " + (s1 - this.checkSumx));
}