org.apache.spark.sql.catalyst.expressions.GenericRow Java Examples
The following examples show how to use
org.apache.spark.sql.catalyst.expressions.GenericRow.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Spark1Shims.java From zeppelin with Apache License 2.0 | 6 votes |
@Override public DataFrame getAsDataFrame(String value) { String[] lines = value.split("\\n"); String head = lines[0]; String[] columns = head.split("\t"); StructType schema = new StructType(); for (String column : columns) { schema = schema.add(column, "String"); } List<Row> rows = new ArrayList<>(); for (int i = 1; i < lines.length; ++i) { String[] tokens = lines[i].split("\t"); Row row = new GenericRow(tokens); rows.add(row); } return SQLContext.getOrCreate(sc) .createDataFrame(rows, schema); }
Example #2
Source File: Spark3Shims.java From zeppelin with Apache License 2.0 | 6 votes |
@Override public Dataset<Row> getAsDataFrame(String value) { String[] lines = value.split("\\n"); String head = lines[0]; String[] columns = head.split("\t"); StructType schema = new StructType(); for (String column : columns) { schema = schema.add(column, "String"); } List<Row> rows = new ArrayList<>(); for (int i = 1; i < lines.length; ++i) { String[] tokens = lines[i].split("\t"); Row row = new GenericRow(tokens); rows.add(row); } return sparkSession.createDataFrame(rows, schema); }
Example #3
Source File: Spark2Shims.java From zeppelin with Apache License 2.0 | 6 votes |
@Override public Dataset<Row> getAsDataFrame(String value) { String[] lines = value.split("\\n"); String head = lines[0]; String[] columns = head.split("\t"); StructType schema = new StructType(); for (String column : columns) { schema = schema.add(column, "String"); } List<Row> rows = new ArrayList<>(); for (int i = 1; i < lines.length; ++i) { String[] tokens = lines[i].split("\t"); Row row = new GenericRow(tokens); rows.add(row); } return sparkSession.createDataFrame(rows, schema); }
Example #4
Source File: SparkRecord.java From sylph with Apache License 2.0 | 5 votes |
public static Row parserRow(Record record) { if (record instanceof SparkRecord) { return ((SparkRecord) record).get(); } else if (record instanceof DefaultRecord) { //todo: schema field type return new GenericRow(((DefaultRecord) record).getValues()); } else { throw new RuntimeException(" not souch row type: " + record.getClass()); } }
Example #5
Source File: MockHiveWarehouseSessionImpl.java From spark-llap with Apache License 2.0 | 5 votes |
public static DriverResultSet testFixture() { ArrayList<Row> row = new ArrayList<>(); row.add(new GenericRow(new Object[] {1, "ID 1"})); row.add(new GenericRow(new Object[] {2, "ID 2"})); StructType schema = (new StructType()) .add("col1", "int") .add("col2", "string"); return new DriverResultSet(row, schema); }
Example #6
Source File: TestHelpers.java From iceberg with Apache License 2.0 | 4 votes |
private static Object getValue(SpecializedGetters container, int ord, Type type) { if (container.isNullAt(ord)) { return null; } switch (type.typeId()) { case BOOLEAN: return container.getBoolean(ord); case INTEGER: return container.getInt(ord); case LONG: return container.getLong(ord); case FLOAT: return container.getFloat(ord); case DOUBLE: return container.getDouble(ord); case STRING: return container.getUTF8String(ord).toString(); case BINARY: case FIXED: case UUID: return container.getBinary(ord); case DATE: return new DateWritable(container.getInt(ord)).get(); case TIMESTAMP: return DateTimeUtils.toJavaTimestamp(container.getLong(ord)); case DECIMAL: { Types.DecimalType dt = (Types.DecimalType) type; return container.getDecimal(ord, dt.precision(), dt.scale()).toJavaBigDecimal(); } case STRUCT: Types.StructType struct = type.asStructType(); InternalRow internalRow = container.getStruct(ord, struct.fields().size()); Object[] data = new Object[struct.fields().size()]; for (int i = 0; i < data.length; i += 1) { if (internalRow.isNullAt(i)) { data[i] = null; } else { data[i] = getValue(internalRow, i, struct.fields().get(i).type()); } } return new GenericRow(data); default: throw new IllegalArgumentException("Unhandled type " + type); } }
Example #7
Source File: KafkaSource.java From sylph with Apache License 2.0 | 4 votes |
public JavaDStream<Row> createSource(JavaStreamingContext ssc, KafkaSourceConfig config, SourceContext context) { String topics = config.getTopics(); String brokers = config.getBrokers(); //需要把集群的host 配置到程序所在机器 String groupId = config.getGroupid(); //消费者的名字 String offsetMode = config.getOffsetMode(); Map<String, Object> kafkaParams = new HashMap<>(config.getOtherConfig()); kafkaParams.put("bootstrap.servers", brokers); kafkaParams.put("key.deserializer", ByteArrayDeserializer.class); //StringDeserializer kafkaParams.put("value.deserializer", ByteArrayDeserializer.class); //StringDeserializer kafkaParams.put("enable.auto.commit", false); //不自动提交偏移量 // "fetch.message.max.bytes" -> // "session.timeout.ms" -> "30000", //session默认是30秒 // "heartbeat.interval.ms" -> "5000", //10秒提交一次 心跳周期 kafkaParams.put("group.id", groupId); //注意不同的流 group.id必须要不同 否则会出现offect commit提交失败的错误 kafkaParams.put("auto.offset.reset", offsetMode); //latest earliest List<String> topicSets = Arrays.asList(topics.split(",")); JavaInputDStream<ConsumerRecord<byte[], byte[]>> inputStream = KafkaUtils.createDirectStream( ssc, LocationStrategies.PreferConsistent(), ConsumerStrategies.Subscribe(topicSets, kafkaParams)); DStream<ConsumerRecord<byte[], byte[]>> sylphKafkaOffset = new SylphKafkaOffset<ConsumerRecord<byte[], byte[]>>(inputStream.inputDStream()) { @Override public void commitOffsets(RDD<?> kafkaRdd) { OffsetRange[] offsetRanges = ((HasOffsetRanges) kafkaRdd).offsetRanges(); log().info("commitKafkaOffsets {}", (Object) offsetRanges); DStream<?> firstDStream = DStreamUtil.getFirstDStream(inputStream.dstream()); ((CanCommitOffsets) firstDStream).commitAsync(offsetRanges); } }; JavaDStream<ConsumerRecord<byte[], byte[]>> javaDStream = new JavaDStream<>(sylphKafkaOffset, ClassTag$.MODULE$.apply(ConsumerRecord.class)); if ("json".equalsIgnoreCase(config.getValueType())) { JsonSchema jsonParser = new JsonSchema(context.getSchema()); return javaDStream .map(record -> jsonParser.deserialize(record.key(), record.value(), record.topic(), record.partition(), record.offset())); } else { List<String> names = context.getSchema().getFieldNames(); return javaDStream .map(record -> { Object[] values = new Object[names.size()]; for (int i = 0; i < names.size(); i++) { switch (names.get(i)) { case "_topic": values[i] = record.topic(); continue; case "_message": values[i] = new String(record.value(), UTF_8); continue; case "_key": values[i] = record.key() == null ? null : new String(record.key(), UTF_8); continue; case "_partition": values[i] = record.partition(); continue; case "_offset": values[i] = record.offset(); case "_timestamp": values[i] = record.timestamp(); case "_timestampType": values[i] = record.timestampType().id; default: values[i] = null; } } return new GenericRow(values); //GenericRowWithSchema }); //.window(Duration(10 * 1000)) } }
Example #8
Source File: JsonSchema.java From sylph with Apache License 2.0 | 4 votes |
public Row deserialize(byte[] messageKey, byte[] message, String topic, int partition, long offset) throws IOException { @SuppressWarnings("unchecked") Map<String, Object> map = MAPPER.readValue(message, Map.class); String[] names = rowTypeInfo.names(); Object[] values = new Object[names.length]; for (int i = 0; i < names.length; i++) { String key = names[i]; switch (key) { case "_topic": values[i] = topic; continue; case "_message": values[i] = new String(message, UTF_8); continue; case "_key": values[i] = new String(messageKey, UTF_8); continue; case "_partition": values[i] = partition; continue; case "_offset": values[i] = offset; continue; } Object value = map.get(key); if (value == null) { continue; } DataType type = rowTypeInfo.apply(i).dataType(); if (type instanceof MapType && ((MapType) type).valueType() == DataTypes.StringType) { scala.collection.mutable.Map convertValue = new scala.collection.mutable.HashMap(); //必须是scala的map for (Map.Entry entry : ((Map<?, ?>) value).entrySet()) { convertValue.put(entry.getKey(), entry.getValue() == null ? null : entry.getValue().toString()); } values[i] = convertValue; } else if (value instanceof ArrayType) { //Class<?> aClass = type.getTypeClass(); //values[i] = MAPPER.convertValue(value, aClass); //todo: Spark List to Array values[i] = value; } else if (type == DataTypes.LongType) { values[i] = ((Number) value).longValue(); } else { values[i] = value; } } return new GenericRow(values); }
Example #9
Source File: TestHelpers.java From iceberg with Apache License 2.0 | 4 votes |
private static Object getValue(SpecializedGetters container, int ord, Type type) { if (container.isNullAt(ord)) { return null; } switch (type.typeId()) { case BOOLEAN: return container.getBoolean(ord); case INTEGER: return container.getInt(ord); case LONG: return container.getLong(ord); case FLOAT: return container.getFloat(ord); case DOUBLE: return container.getDouble(ord); case STRING: return container.getUTF8String(ord).toString(); case BINARY: case FIXED: case UUID: return container.getBinary(ord); case DATE: return new DateWritable(container.getInt(ord)).get(); case TIMESTAMP: return DateTimeUtils.toJavaTimestamp(container.getLong(ord)); case DECIMAL: { Types.DecimalType dt = (Types.DecimalType) type; return container.getDecimal(ord, dt.precision(), dt.scale()).toJavaBigDecimal(); } case STRUCT: Types.StructType struct = type.asStructType(); InternalRow internalRow = container.getStruct(ord, struct.fields().size()); Object[] data = new Object[struct.fields().size()]; for (int i = 0; i < data.length; i += 1) { if (internalRow.isNullAt(i)) { data[i] = null; } else { data[i] = getValue(internalRow, i, struct.fields().get(i).type()); } } return new GenericRow(data); default: throw new IllegalArgumentException("Unhandled type " + type); } }
Example #10
Source File: SimpleMockConnector.java From spark-llap with Apache License 2.0 | 4 votes |
@Override public Row get() { Row value = new GenericRow(new Object[] {i, "Element " + i}); i++; return value; }