org.apache.spark.sql.types.IntegerType Java Examples

The following examples show how to use org.apache.spark.sql.types.IntegerType. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IndexRUtil.java    From indexr with Apache License 2.0 6 votes vote down vote up
public static SegmentSchema sparkSchemaToIndexRSchema(List<StructField> sparkSchema, IsIndexed isIndexed) {
    List<ColumnSchema> columns = new ArrayList<>();
    for (StructField f : sparkSchema) {
        SQLType type;
        if (f.dataType() instanceof IntegerType) {
            type = SQLType.INT;
        } else if (f.dataType() instanceof LongType) {
            type = SQLType.BIGINT;
        } else if (f.dataType() instanceof FloatType) {
            type = SQLType.FLOAT;
        } else if (f.dataType() instanceof DoubleType) {
            type = SQLType.DOUBLE;
        } else if (f.dataType() instanceof StringType) {
            type = SQLType.VARCHAR;
        } else if (f.dataType() instanceof DateType) {
            type = SQLType.DATE;
        } else if (f.dataType() instanceof TimestampType) {
            type = SQLType.DATETIME;
        } else {
            throw new IllegalStateException("Unsupported type: " + f.dataType());
        }
        columns.add(new ColumnSchema(f.name(), type, isIndexed.apply(f.name())));
    }
    return new SegmentSchema(columns);
}
 
Example #2
Source File: TestSuite.java    From stocator with Apache License 2.0 6 votes vote down vote up
public void test16(SparkSession spark, Dataset<Row> schemaFlights, String containerOut, String type)
    throws Exception {
  System.out.println("*********************************");
  System.out.println("T16: Non overwrite mode " + containerOut);
  String o1 = containerOut + "myData/123";
  StructType schema = DataTypes
      .createStructType(new StructField[] { DataTypes.createStructField("NAME", DataTypes.StringType, false),
          DataTypes.createStructField("STRING_VALUE", DataTypes.StringType, false),
          DataTypes.createStructField("NUM_VALUE", DataTypes.IntegerType, false), });
  Row r1 = RowFactory.create("name1", "value1", 1);
  Row r2 = RowFactory.create("name2", "value2", 2);
  List<Row> rowList = ImmutableList.of(r1, r2);
  Dataset<Row> rows = spark.createDataFrame(rowList, schema);
  try {
    if (type.equals(Constants.PARQUET_TYPE)) {
      rows.write().mode(SaveMode.Overwrite).parquet(o1);
    } else if (type.equals(Constants.JSON_TYPE)) {
      rows.write().mode(SaveMode.Overwrite).json(o1);
    }
  } catch (Exception e) {
    deleteData(o1, spark.sparkContext().hadoopConfiguration(), dataCreate);
    throw e;
  } finally {
    deleteData(o1, spark.sparkContext().hadoopConfiguration(), dataCreate);
  }
}
 
Example #3
Source File: StructInternalRow.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("checkstyle:CyclomaticComplexity")
public Object get(int ordinal, DataType dataType) {
  if (dataType instanceof IntegerType) {
    return getInt(ordinal);
  } else if (dataType instanceof LongType) {
    return getLong(ordinal);
  } else if (dataType instanceof StringType) {
    return getUTF8String(ordinal);
  } else if (dataType instanceof FloatType) {
    return getFloat(ordinal);
  } else if (dataType instanceof DoubleType) {
    return getDouble(ordinal);
  } else if (dataType instanceof DecimalType) {
    DecimalType decimalType = (DecimalType) dataType;
    return getDecimal(ordinal, decimalType.precision(), decimalType.scale());
  } else if (dataType instanceof BinaryType) {
    return getBinary(ordinal);
  } else if (dataType instanceof StructType) {
    return getStruct(ordinal, ((StructType) dataType).size());
  } else if (dataType instanceof ArrayType) {
    return getArray(ordinal);
  } else if (dataType instanceof MapType) {
    return getMap(ordinal);
  } else if (dataType instanceof BooleanType) {
    return getBoolean(ordinal);
  } else if (dataType instanceof ByteType) {
    return getByte(ordinal);
  } else if (dataType instanceof ShortType) {
    return getShort(ordinal);
  }
  return null;
}
 
Example #4
Source File: IndexRUtil.java    From indexr with Apache License 2.0 5 votes vote down vote up
public static List<StructField> indexrSchemaToSparkSchema(SegmentSchema schema) {
    List<StructField> fields = new ArrayList<>();
    for (ColumnSchema cs : schema.getColumns()) {
        DataType dataType;
        switch (cs.getSqlType()) {
            case INT:
                dataType = DataTypes.IntegerType;
                break;
            case BIGINT:
                dataType = DataTypes.LongType;
                break;
            case FLOAT:
                dataType = DataTypes.FloatType;
                break;
            case DOUBLE:
                dataType = DataTypes.DoubleType;
                break;
            case VARCHAR:
                dataType = DataTypes.StringType;
                break;
            case DATE:
                dataType = DataTypes.DateType;
                break;
            case DATETIME:
                dataType = DataTypes.TimestampType;
                break;
            default:
                throw new IllegalStateException("Unsupported type: " + cs.getSqlType());
        }
        fields.add(new StructField(cs.getName(), dataType, scala.Boolean.box(false), Metadata.empty()));
    }
    return fields;
}
 
Example #5
Source File: LongDirectStreamReader.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public ColumnVector readBlock(DataType type, ColumnVector vector)
        throws IOException
{
    if (!rowGroupOpen) {
        openRowGroup();
    }

    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (dataStream == null) {
                throw new OrcCorruptionException("Value is not null but data stream is not present");
            }
            dataStream.skip(readOffset);
        }
    }

    if (presentStream == null) {
        if (dataStream == null) {
            throw new OrcCorruptionException("Value is not null but data stream is not present");
        }
        if (type instanceof DateType || type instanceof IntegerType) {
            dataStream.nextIntVector(type, nextBatchSize, vector);
        } else {
            dataStream.nextLongVector(type, nextBatchSize, vector);
        }
    }
    else {
        if (nullVector.length < nextBatchSize) {
            nullVector = new boolean[nextBatchSize];
        }
        int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
        if (nullValues != nextBatchSize) {
            if (dataStream == null) {
                throw new OrcCorruptionException("Value is not null but data stream is not present");
            }
            if (type instanceof DateType || type instanceof IntegerType) {
                dataStream.nextIntVector(type, nextBatchSize, vector, nullVector);
            } else {
                dataStream.nextLongVector(type, nextBatchSize, vector, nullVector);
            }
        }
        else {
            for (int i = 0, j = 0; i < nextBatchSize; i++) {
                while (vector.isNullAt(i+j)) {
                    vector.appendNull();
                    j++;
                }
                vector.appendNull();
            }
        }
    }

    readOffset = 0;
    nextBatchSize = 0;

    return vector;
}