org.apache.spark.sql.catalyst.expressions.GenericRow Java Examples

The following examples show how to use org.apache.spark.sql.catalyst.expressions.GenericRow. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Spark1Shims.java    From zeppelin with Apache License 2.0 6 votes vote down vote up
@Override
public DataFrame getAsDataFrame(String value) {
  String[] lines = value.split("\\n");
  String head = lines[0];
  String[] columns = head.split("\t");
  StructType schema = new StructType();
  for (String column : columns) {
    schema = schema.add(column, "String");
  }

  List<Row> rows = new ArrayList<>();
  for (int i = 1; i < lines.length; ++i) {
    String[] tokens = lines[i].split("\t");
    Row row = new GenericRow(tokens);
    rows.add(row);
  }
  return SQLContext.getOrCreate(sc)
          .createDataFrame(rows, schema);
}
 
Example #2
Source File: Spark3Shims.java    From zeppelin with Apache License 2.0 6 votes vote down vote up
@Override
public Dataset<Row> getAsDataFrame(String value) {
  String[] lines = value.split("\\n");
  String head = lines[0];
  String[] columns = head.split("\t");
  StructType schema = new StructType();
  for (String column : columns) {
    schema = schema.add(column, "String");
  }

  List<Row> rows = new ArrayList<>();
  for (int i = 1; i < lines.length; ++i) {
    String[] tokens = lines[i].split("\t");
    Row row = new GenericRow(tokens);
    rows.add(row);
  }
  return sparkSession.createDataFrame(rows, schema);
}
 
Example #3
Source File: Spark2Shims.java    From zeppelin with Apache License 2.0 6 votes vote down vote up
@Override
public Dataset<Row> getAsDataFrame(String value) {
  String[] lines = value.split("\\n");
  String head = lines[0];
  String[] columns = head.split("\t");
  StructType schema = new StructType();
  for (String column : columns) {
    schema = schema.add(column, "String");
  }

  List<Row> rows = new ArrayList<>();
  for (int i = 1; i < lines.length; ++i) {
    String[] tokens = lines[i].split("\t");
    Row row = new GenericRow(tokens);
    rows.add(row);
  }
  return sparkSession.createDataFrame(rows, schema);
}
 
Example #4
Source File: SparkRecord.java    From sylph with Apache License 2.0 5 votes vote down vote up
public static Row parserRow(Record record)
{
    if (record instanceof SparkRecord) {
        return ((SparkRecord) record).get();
    }
    else if (record instanceof DefaultRecord) {
        //todo: schema field type
        return new GenericRow(((DefaultRecord) record).getValues());
    }
    else {
        throw new RuntimeException(" not souch row type: " + record.getClass());
    }
}
 
Example #5
Source File: MockHiveWarehouseSessionImpl.java    From spark-llap with Apache License 2.0 5 votes vote down vote up
public static DriverResultSet testFixture() {
    ArrayList<Row> row = new ArrayList<>();
    row.add(new GenericRow(new Object[] {1, "ID 1"}));
    row.add(new GenericRow(new Object[] {2, "ID 2"}));
    StructType schema = (new StructType())
            .add("col1", "int")
            .add("col2", "string");
    return new DriverResultSet(row, schema);
}
 
Example #6
Source File: TestHelpers.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private static Object getValue(SpecializedGetters container, int ord,
                               Type type) {
  if (container.isNullAt(ord)) {
    return null;
  }
  switch (type.typeId()) {
    case BOOLEAN:
      return container.getBoolean(ord);
    case INTEGER:
      return container.getInt(ord);
    case LONG:
      return container.getLong(ord);
    case FLOAT:
      return container.getFloat(ord);
    case DOUBLE:
      return container.getDouble(ord);
    case STRING:
      return container.getUTF8String(ord).toString();
    case BINARY:
    case FIXED:
    case UUID:
      return container.getBinary(ord);
    case DATE:
      return new DateWritable(container.getInt(ord)).get();
    case TIMESTAMP:
      return DateTimeUtils.toJavaTimestamp(container.getLong(ord));
    case DECIMAL: {
      Types.DecimalType dt = (Types.DecimalType) type;
      return container.getDecimal(ord, dt.precision(), dt.scale()).toJavaBigDecimal();
    }
    case STRUCT:
      Types.StructType struct = type.asStructType();
      InternalRow internalRow = container.getStruct(ord, struct.fields().size());
      Object[] data = new Object[struct.fields().size()];
      for (int i = 0; i < data.length; i += 1) {
        if (internalRow.isNullAt(i)) {
          data[i] = null;
        } else {
          data[i] = getValue(internalRow, i, struct.fields().get(i).type());
        }
      }
      return new GenericRow(data);
    default:
      throw new IllegalArgumentException("Unhandled type " + type);
  }
}
 
Example #7
Source File: KafkaSource.java    From sylph with Apache License 2.0 4 votes vote down vote up
public JavaDStream<Row> createSource(JavaStreamingContext ssc, KafkaSourceConfig config, SourceContext context)
{
    String topics = config.getTopics();
    String brokers = config.getBrokers(); //需要把集群的host 配置到程序所在机器
    String groupId = config.getGroupid(); //消费者的名字
    String offsetMode = config.getOffsetMode();

    Map<String, Object> kafkaParams = new HashMap<>(config.getOtherConfig());
    kafkaParams.put("bootstrap.servers", brokers);
    kafkaParams.put("key.deserializer", ByteArrayDeserializer.class); //StringDeserializer
    kafkaParams.put("value.deserializer", ByteArrayDeserializer.class); //StringDeserializer
    kafkaParams.put("enable.auto.commit", false); //不自动提交偏移量
    //      "fetch.message.max.bytes" ->
    //      "session.timeout.ms" -> "30000", //session默认是30秒
    //      "heartbeat.interval.ms" -> "5000", //10秒提交一次 心跳周期
    kafkaParams.put("group.id", groupId); //注意不同的流 group.id必须要不同 否则会出现offect commit提交失败的错误
    kafkaParams.put("auto.offset.reset", offsetMode); //latest   earliest

    List<String> topicSets = Arrays.asList(topics.split(","));
    JavaInputDStream<ConsumerRecord<byte[], byte[]>> inputStream = KafkaUtils.createDirectStream(
            ssc, LocationStrategies.PreferConsistent(), ConsumerStrategies.Subscribe(topicSets, kafkaParams));

    DStream<ConsumerRecord<byte[], byte[]>> sylphKafkaOffset = new SylphKafkaOffset<ConsumerRecord<byte[], byte[]>>(inputStream.inputDStream())
    {
        @Override
        public void commitOffsets(RDD<?> kafkaRdd)
        {
            OffsetRange[] offsetRanges = ((HasOffsetRanges) kafkaRdd).offsetRanges();
            log().info("commitKafkaOffsets {}", (Object) offsetRanges);
            DStream<?> firstDStream = DStreamUtil.getFirstDStream(inputStream.dstream());
            ((CanCommitOffsets) firstDStream).commitAsync(offsetRanges);
        }
    };

    JavaDStream<ConsumerRecord<byte[], byte[]>> javaDStream = new JavaDStream<>(sylphKafkaOffset, ClassTag$.MODULE$.apply(ConsumerRecord.class));
    if ("json".equalsIgnoreCase(config.getValueType())) {
        JsonSchema jsonParser = new JsonSchema(context.getSchema());
        return javaDStream
                .map(record -> jsonParser.deserialize(record.key(), record.value(), record.topic(), record.partition(), record.offset()));
    }
    else {
        List<String> names = context.getSchema().getFieldNames();
        return javaDStream
                .map(record -> {
                    Object[] values = new Object[names.size()];
                    for (int i = 0; i < names.size(); i++) {
                        switch (names.get(i)) {
                            case "_topic":
                                values[i] = record.topic();
                                continue;
                            case "_message":
                                values[i] = new String(record.value(), UTF_8);
                                continue;
                            case "_key":
                                values[i] = record.key() == null ? null : new String(record.key(), UTF_8);
                                continue;
                            case "_partition":
                                values[i] = record.partition();
                                continue;
                            case "_offset":
                                values[i] = record.offset();
                            case "_timestamp":
                                values[i] = record.timestamp();
                            case "_timestampType":
                                values[i] = record.timestampType().id;
                            default:
                                values[i] = null;
                        }
                    }
                    return new GenericRow(values);  //GenericRowWithSchema
                });  //.window(Duration(10 * 1000))
    }
}
 
Example #8
Source File: JsonSchema.java    From sylph with Apache License 2.0 4 votes vote down vote up
public Row deserialize(byte[] messageKey, byte[] message, String topic, int partition, long offset)
        throws IOException
{
    @SuppressWarnings("unchecked")
    Map<String, Object> map = MAPPER.readValue(message, Map.class);
    String[] names = rowTypeInfo.names();
    Object[] values = new Object[names.length];
    for (int i = 0; i < names.length; i++) {
        String key = names[i];
        switch (key) {
            case "_topic":
                values[i] = topic;
                continue;
            case "_message":
                values[i] = new String(message, UTF_8);
                continue;
            case "_key":
                values[i] = new String(messageKey, UTF_8);
                continue;
            case "_partition":
                values[i] = partition;
                continue;
            case "_offset":
                values[i] = offset;
                continue;
        }

        Object value = map.get(key);
        if (value == null) {
            continue;
        }
        DataType type = rowTypeInfo.apply(i).dataType();

        if (type instanceof MapType && ((MapType) type).valueType() == DataTypes.StringType) {
            scala.collection.mutable.Map convertValue = new scala.collection.mutable.HashMap(); //必须是scala的map
            for (Map.Entry entry : ((Map<?, ?>) value).entrySet()) {
                convertValue.put(entry.getKey(), entry.getValue() == null ? null : entry.getValue().toString());
            }
            values[i] = convertValue;
        }
        else if (value instanceof ArrayType) {
            //Class<?> aClass = type.getTypeClass();
            //values[i] = MAPPER.convertValue(value, aClass);
            //todo: Spark List to Array
            values[i] = value;
        }
        else if (type == DataTypes.LongType) {
            values[i] = ((Number) value).longValue();
        }
        else {
            values[i] = value;
        }
    }
    return new GenericRow(values);
}
 
Example #9
Source File: TestHelpers.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private static Object getValue(SpecializedGetters container, int ord,
                               Type type) {
  if (container.isNullAt(ord)) {
    return null;
  }
  switch (type.typeId()) {
    case BOOLEAN:
      return container.getBoolean(ord);
    case INTEGER:
      return container.getInt(ord);
    case LONG:
      return container.getLong(ord);
    case FLOAT:
      return container.getFloat(ord);
    case DOUBLE:
      return container.getDouble(ord);
    case STRING:
      return container.getUTF8String(ord).toString();
    case BINARY:
    case FIXED:
    case UUID:
      return container.getBinary(ord);
    case DATE:
      return new DateWritable(container.getInt(ord)).get();
    case TIMESTAMP:
      return DateTimeUtils.toJavaTimestamp(container.getLong(ord));
    case DECIMAL: {
      Types.DecimalType dt = (Types.DecimalType) type;
      return container.getDecimal(ord, dt.precision(), dt.scale()).toJavaBigDecimal();
    }
    case STRUCT:
      Types.StructType struct = type.asStructType();
      InternalRow internalRow = container.getStruct(ord, struct.fields().size());
      Object[] data = new Object[struct.fields().size()];
      for (int i = 0; i < data.length; i += 1) {
        if (internalRow.isNullAt(i)) {
          data[i] = null;
        } else {
          data[i] = getValue(internalRow, i, struct.fields().get(i).type());
        }
      }
      return new GenericRow(data);
    default:
      throw new IllegalArgumentException("Unhandled type " + type);
  }
}
 
Example #10
Source File: SimpleMockConnector.java    From spark-llap with Apache License 2.0 4 votes vote down vote up
@Override
public Row get() {
    Row value = new GenericRow(new Object[] {i, "Element " + i});
    i++;
    return value;
}