Java Code Examples for org.apache.spark.sql.types.StructType#size()

The following examples show how to use org.apache.spark.sql.types.StructType#size() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DBClientWrapper.java    From spark-data-sources with MIT License 6 votes vote down vote up
public static edb.common.Row sparkToDBRow(org.apache.spark.sql.Row row, StructType type) {
    edb.common.Row dbRow = new edb.common.Row();
    StructField[] fields = type.fields();
    for (int i = 0; i < type.size(); i++) {
        StructField sf = fields[i];
        if (sf.dataType() == DataTypes.StringType) {
            dbRow.addField(new edb.common.Row.StringField(sf.name(), row.getString(i)));
        } else if (sf.dataType() == DataTypes.DoubleType) {
            dbRow.addField(new edb.common.Row.DoubleField(sf.name(), row.getDouble(i)));
        } else if (sf.dataType() == DataTypes.LongType) {
            dbRow.addField(new edb.common.Row.Int64Field(sf.name(), row.getLong(i)));
        } else {
            // TODO: type leakage
        }
    }

    return dbRow;
}
 
Example 2
Source File: UnsafeFixedWidthAggregationMap.java    From indexr with Apache License 2.0 6 votes vote down vote up
/**
 * Create a new UnsafeFixedWidthAggregationMap.
 *
 * @param emptyAggregationBuffer  the default value for new keys (a "zero" of the agg. function)
 * @param aggregationBufferSchema the schema of the aggregation buffer, used for row conversion.
 * @param groupingKeySchema       the schema of the grouping key, used for row conversion.
 * @param taskMemoryManager       the memory manager used to allocate our Unsafe memory structures.
 * @param initialCapacity         the initial capacity of the map (a sizing hint to avoid re-hashing).
 * @param pageSizeBytes           the data page size, in bytes; limits the maximum record size.
 * @param enablePerfMetrics       if true, performance metrics will be recorded (has minor perf impact)
 */
public UnsafeFixedWidthAggregationMap(
        InternalRow emptyAggregationBuffer,
        StructType aggregationBufferSchema,
        StructType groupingKeySchema,
        TaskMemoryManager taskMemoryManager,
        int initialCapacity,
        long pageSizeBytes,
        boolean enablePerfMetrics) {
    this.aggregationBufferSchema = aggregationBufferSchema;
    this.currentAggregationBuffer = new UnsafeRow(aggregationBufferSchema.size());
    this.groupingKeyProjection = UnsafeProjection.createFromSchema(groupingKeySchema);
    this.groupingKeySchema = groupingKeySchema;
    this.map =
            new BytesToBytesMap(taskMemoryManager, initialCapacity, pageSizeBytes, enablePerfMetrics);
    this.enablePerfMetrics = enablePerfMetrics;

    // Initialize the buffer for aggregation value
    final UnsafeProjection valueProjection = UnsafeProjection.createFromSchema(aggregationBufferSchema);
    this.emptyAggregationBuffer = valueProjection.apply(emptyAggregationBuffer).getBytes();
}
 
Example 3
Source File: Reader.java    From iceberg with Apache License 2.0 5 votes vote down vote up
StructLikeInternalRow(StructType struct) {
  this.types = new DataType[struct.size()];
  StructField[] fields = struct.fields();
  for (int i = 0; i < fields.length; i += 1) {
    types[i] = fields[i].dataType();
  }
}
 
Example 4
Source File: SortPrefixUtils.java    From indexr with Apache License 2.0 5 votes vote down vote up
/**
 * Creates the prefix comparator for the first field in the given schema, in ascending order.
 */
public static PrefixComparator getPrefixComparator(StructType schema) {
    if (schema.size() != 0) {
        return getPrefixComparator(
                new SortOrder(new BoundReference(0, schema.get(0).dataType),
                        SortOrder.SortDirection.Ascending));
    } else {
        return (a, b) -> 0;
    }
}
 
Example 5
Source File: BitemporalHistoryPlanner.java    From envelope with Apache License 2.0 5 votes vote down vote up
private Row getCurrentSystemTimeRow(long currentSystemTimeMillis) {
  StructType schema = 
      SchemaUtils.appendFields(systemEffectiveFromTimeModel.getSchema(),
          Lists.newArrayList(systemEffectiveToTimeModel.getSchema().fields()));
  Object[] nulls = new Object[schema.size()];
  Row row = new RowWithSchema(schema, nulls);
  row = systemEffectiveFromTimeModel.setCurrentSystemTime(row);
  row = systemEffectiveToTimeModel.setCurrentSystemTime(row);
  
  return row;
}
 
Example 6
Source File: ExternalTableUtils.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
public static StructType supportAvroDateType(StructType schema, String storedAs) {
    if (storedAs.toLowerCase().equals("a")) {
        for (int i = 0; i < schema.size(); i++) {
            StructField column = schema.fields()[i];
            if (column.dataType().equals(DataTypes.DateType)) {
                StructField replace = DataTypes.createStructField(column.name(), DataTypes.StringType, column.nullable(), column.metadata());
                schema.fields()[i] = replace;
            }
        }
    }
    return schema;
}
 
Example 7
Source File: SpliceOrcNewInputFormat.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
public static Map<Integer,DataType> getColumnsAndTypes(List<Integer> columnIds, StructType rowStruct) throws IOException {
    int structTypeSize = rowStruct.size();
    int columnIdsSize = columnIds.size();
    Map columnsAndTypes = new HashMap<>();
    for (int i = 0,j = 0; i < columnIdsSize; i++) {
        if (columnIds.get(i) == -1)
            continue;
        columnsAndTypes.put(i,rowStruct.fields()[j]);
        j++;
    }
    if (columnsAndTypes.size() != structTypeSize)
        throw new IOException(String.format("Column IDS do not match the underlying struct columnIds(%s), struct(%s)",columnIds,rowStruct.json()));
    return columnsAndTypes;
}
 
Example 8
Source File: UnsafeKVExternalSorter.java    From indexr with Apache License 2.0 4 votes vote down vote up
public UnsafeKVExternalSorter(
        StructType keySchema,
        StructType valueSchema,
        //BlockManager blockManager,
        long pageSizeBytes,
        @Nullable BytesToBytesMap map) throws IOException {
    this.keySchema = keySchema;
    this.valueSchema = valueSchema;
    final TaskContext taskContext = TaskContext.get();

    prefixComputer = SortPrefixUtils.createPrefixGenerator(keySchema);
    PrefixComparator prefixComparator = SortPrefixUtils.getPrefixComparator(keySchema);
    BaseOrdering ordering = BaseOrdering.create(keySchema);
    KVComparator recordComparator = new KVComparator(ordering, keySchema.size());

    TaskMemoryManager taskMemoryManager = taskContext.taskMemoryManager();

    if (map == null) {
        sorter = UnsafeExternalSorter.create(
                taskMemoryManager,
                //blockManager,
                taskContext,
                recordComparator,
                prefixComparator,
    /* initialSize */ 4096,
                pageSizeBytes);
    } else {
        // During spilling, the array in map will not be used, so we can borrow that and use it
        // as the underline array for in-memory sorter (it's always large enough).
        // Since we will not grow the array, it's fine to pass `null` as consumer.
        final UnsafeInMemorySorter inMemSorter = new UnsafeInMemorySorter(
                null, taskMemoryManager, recordComparator, prefixComparator, map.getArray());

        // We cannot use the destructive iterator here because we are reusing the existing memory
        // pages in BytesToBytesMap to hold records during sorting.
        // The only new memory we are allocating is the pointer/prefix array.
        BytesToBytesMap.MapIterator iter = map.iterator();
        final int numKeyFields = keySchema.size();
        UnsafeRow row = new UnsafeRow(numKeyFields);
        while (iter.hasNext()) {
            final BytesToBytesMap.Location loc = iter.next();
            final Object baseObject = loc.getKeyAddress().getBaseObject();
            final long baseOffset = loc.getKeyAddress().getBaseOffset();

            // Get encoded memory address
            // baseObject + baseOffset point to the beginning of the key data in the map, but that
            // the KV-pair's length data is stored in the word immediately before that address
            MemoryBlock page = loc.getMemoryPage();
            long address = taskMemoryManager.encodePageNumberAndOffset(page, baseOffset - 8);

            // Compute prefix
            row.pointTo(baseObject, baseOffset, loc.getKeyLength());
            final long prefix = prefixComputer.computePrefix(row);

            inMemSorter.insertRecord(address, prefix);
        }

        sorter = UnsafeExternalSorter.createWithExistingInMemorySorter(
                taskMemoryManager,
                //blockManager,
                taskContext,
                new KVComparator(ordering, keySchema.size()),
                prefixComparator,
    /* initialSize */ 4096,
                pageSizeBytes,
                inMemSorter);

        // reset the map, so we can re-use it to insert new records. the inMemSorter will not used
        // anymore, so the underline array could be used by map again.
        map.reset();
    }
}
 
Example 9
Source File: ColumnarBatch.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
private ColumnarBatch(StructType schema, int maxRows, MemoryMode memMode) {
    this.numRows = maxRows;
    this.columns = new WritableColumnVector[schema.size()];
}
 
Example 10
Source File: ColumnarBatch.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
private ColumnarBatch(StructType schema, int maxRows, MemoryMode memMode) {
    this.numRows = maxRows;
    this.columns = new WritableColumnVector[schema.size()];
}
 
Example 11
Source File: ColumnarBatch.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
private ColumnarBatch(StructType schema, int maxRows, MemoryMode memMode) {
    this.numRows = maxRows;
    this.columns = new WritableColumnVector[schema.size()];
}