org.apache.spark.sql.catalyst.expressions.GenericRow Java Exaples

Source File: Spark1Shims.java From zeppelin with Apache License 2.0

6 votes

@Override
public DataFrame getAsDataFrame(String value) {
  String[] lines = value.split("\\n");
  String head = lines[0];
  String[] columns = head.split("\t");
  StructType schema = new StructType();
  for (String column : columns) {
    schema = schema.add(column, "String");
  }

  List<Row> rows = new ArrayList<>();
  for (int i = 1; i < lines.length; ++i) {
    String[] tokens = lines[i].split("\t");
    Row row = new GenericRow(tokens);
    rows.add(row);
  }
  return SQLContext.getOrCreate(sc)
          .createDataFrame(rows, schema);
}

Source File: Spark3Shims.java From zeppelin with Apache License 2.0

6 votes

@Override
public Dataset<Row> getAsDataFrame(String value) {
  String[] lines = value.split("\\n");
  String head = lines[0];
  String[] columns = head.split("\t");
  StructType schema = new StructType();
  for (String column : columns) {
    schema = schema.add(column, "String");
  }

  List<Row> rows = new ArrayList<>();
  for (int i = 1; i < lines.length; ++i) {
    String[] tokens = lines[i].split("\t");
    Row row = new GenericRow(tokens);
    rows.add(row);
  }
  return sparkSession.createDataFrame(rows, schema);
}

Source File: Spark2Shims.java From zeppelin with Apache License 2.0

6 votes

@Override
public Dataset<Row> getAsDataFrame(String value) {
  String[] lines = value.split("\\n");
  String head = lines[0];
  String[] columns = head.split("\t");
  StructType schema = new StructType();
  for (String column : columns) {
    schema = schema.add(column, "String");
  }

  List<Row> rows = new ArrayList<>();
  for (int i = 1; i < lines.length; ++i) {
    String[] tokens = lines[i].split("\t");
    Row row = new GenericRow(tokens);
    rows.add(row);
  }
  return sparkSession.createDataFrame(rows, schema);
}

Source File: SparkRecord.java From sylph with Apache License 2.0

5 votes

public static Row parserRow(Record record)
{
    if (record instanceof SparkRecord) {
        return ((SparkRecord) record).get();
    }
    else if (record instanceof DefaultRecord) {
        //todo: schema field type
        return new GenericRow(((DefaultRecord) record).getValues());
    }
    else {
        throw new RuntimeException(" not souch row type: " + record.getClass());
    }
}

Source File: MockHiveWarehouseSessionImpl.java From spark-llap with Apache License 2.0

5 votes

public static DriverResultSet testFixture() {
    ArrayList<Row> row = new ArrayList<>();
    row.add(new GenericRow(new Object[] {1, "ID 1"}));
    row.add(new GenericRow(new Object[] {2, "ID 2"}));
    StructType schema = (new StructType())
            .add("col1", "int")
            .add("col2", "string");
    return new DriverResultSet(row, schema);
}

Source File: TestHelpers.java From iceberg with Apache License 2.0

4 votes

private static Object getValue(SpecializedGetters container, int ord,
                               Type type) {
  if (container.isNullAt(ord)) {
    return null;
  }
  switch (type.typeId()) {
    case BOOLEAN:
      return container.getBoolean(ord);
    case INTEGER:
      return container.getInt(ord);
    case LONG:
      return container.getLong(ord);
    case FLOAT:
      return container.getFloat(ord);
    case DOUBLE:
      return container.getDouble(ord);
    case STRING:
      return container.getUTF8String(ord).toString();
    case BINARY:
    case FIXED:
    case UUID:
      return container.getBinary(ord);
    case DATE:
      return new DateWritable(container.getInt(ord)).get();
    case TIMESTAMP:
      return DateTimeUtils.toJavaTimestamp(container.getLong(ord));
    case DECIMAL: {
      Types.DecimalType dt = (Types.DecimalType) type;
      return container.getDecimal(ord, dt.precision(), dt.scale()).toJavaBigDecimal();
    }
    case STRUCT:
      Types.StructType struct = type.asStructType();
      InternalRow internalRow = container.getStruct(ord, struct.fields().size());
      Object[] data = new Object[struct.fields().size()];
      for (int i = 0; i < data.length; i += 1) {
        if (internalRow.isNullAt(i)) {
          data[i] = null;
        } else {
          data[i] = getValue(internalRow, i, struct.fields().get(i).type());
        }
      }
      return new GenericRow(data);
    default:
      throw new IllegalArgumentException("Unhandled type " + type);
  }
}

Source File: KafkaSource.java From sylph with Apache License 2.0

4 votes

public JavaDStream<Row> createSource(JavaStreamingContext ssc, KafkaSourceConfig config, SourceContext context)
{
    String topics = config.getTopics();
    String brokers = config.getBrokers(); //需要把集群的host 配置到程序所在机器
    String groupId = config.getGroupid(); //消费者的名字
    String offsetMode = config.getOffsetMode();

    Map<String, Object> kafkaParams = new HashMap<>(config.getOtherConfig());
    kafkaParams.put("bootstrap.servers", brokers);
    kafkaParams.put("key.deserializer", ByteArrayDeserializer.class); //StringDeserializer
    kafkaParams.put("value.deserializer", ByteArrayDeserializer.class); //StringDeserializer
    kafkaParams.put("enable.auto.commit", false); //不自动提交偏移量
    //      "fetch.message.max.bytes" ->
    //      "session.timeout.ms" -> "30000", //session默认是30秒
    //      "heartbeat.interval.ms" -> "5000", //10秒提交一次 心跳周期
    kafkaParams.put("group.id", groupId); //注意不同的流 group.id必须要不同 否则会出现offect commit提交失败的错误
    kafkaParams.put("auto.offset.reset", offsetMode); //latest   earliest

    List<String> topicSets = Arrays.asList(topics.split(","));
    JavaInputDStream<ConsumerRecord<byte[], byte[]>> inputStream = KafkaUtils.createDirectStream(
            ssc, LocationStrategies.PreferConsistent(), ConsumerStrategies.Subscribe(topicSets, kafkaParams));

    DStream<ConsumerRecord<byte[], byte[]>> sylphKafkaOffset = new SylphKafkaOffset<ConsumerRecord<byte[], byte[]>>(inputStream.inputDStream())
    {
        @Override
        public void commitOffsets(RDD<?> kafkaRdd)
        {
            OffsetRange[] offsetRanges = ((HasOffsetRanges) kafkaRdd).offsetRanges();
            log().info("commitKafkaOffsets {}", (Object) offsetRanges);
            DStream<?> firstDStream = DStreamUtil.getFirstDStream(inputStream.dstream());
            ((CanCommitOffsets) firstDStream).commitAsync(offsetRanges);
        }
    };

    JavaDStream<ConsumerRecord<byte[], byte[]>> javaDStream = new JavaDStream<>(sylphKafkaOffset, ClassTag$.MODULE$.apply(ConsumerRecord.class));
    if ("json".equalsIgnoreCase(config.getValueType())) {
        JsonSchema jsonParser = new JsonSchema(context.getSchema());
        return javaDStream
                .map(record -> jsonParser.deserialize(record.key(), record.value(), record.topic(), record.partition(), record.offset()));
    }
    else {
        List<String> names = context.getSchema().getFieldNames();
        return javaDStream
                .map(record -> {
                    Object[] values = new Object[names.size()];
                    for (int i = 0; i < names.size(); i++) {
                        switch (names.get(i)) {
                            case "_topic":
                                values[i] = record.topic();
                                continue;
                            case "_message":
                                values[i] = new String(record.value(), UTF_8);
                                continue;
                            case "_key":
                                values[i] = record.key() == null ? null : new String(record.key(), UTF_8);
                                continue;
                            case "_partition":
                                values[i] = record.partition();
                                continue;
                            case "_offset":
                                values[i] = record.offset();
                            case "_timestamp":
                                values[i] = record.timestamp();
                            case "_timestampType":
                                values[i] = record.timestampType().id;
                            default:
                                values[i] = null;
                        }
                    }
                    return new GenericRow(values);  //GenericRowWithSchema
                });  //.window(Duration(10 * 1000))
    }
}

Source File: JsonSchema.java From sylph with Apache License 2.0

4 votes

public Row deserialize(byte[] messageKey, byte[] message, String topic, int partition, long offset)
        throws IOException
{
    @SuppressWarnings("unchecked")
    Map<String, Object> map = MAPPER.readValue(message, Map.class);
    String[] names = rowTypeInfo.names();
    Object[] values = new Object[names.length];
    for (int i = 0; i < names.length; i++) {
        String key = names[i];
        switch (key) {
            case "_topic":
                values[i] = topic;
                continue;
            case "_message":
                values[i] = new String(message, UTF_8);
                continue;
            case "_key":
                values[i] = new String(messageKey, UTF_8);
                continue;
            case "_partition":
                values[i] = partition;
                continue;
            case "_offset":
                values[i] = offset;
                continue;
        }

        Object value = map.get(key);
        if (value == null) {
            continue;
        }
        DataType type = rowTypeInfo.apply(i).dataType();

        if (type instanceof MapType && ((MapType) type).valueType() == DataTypes.StringType) {
            scala.collection.mutable.Map convertValue = new scala.collection.mutable.HashMap(); //必须是scala的map
            for (Map.Entry entry : ((Map<?, ?>) value).entrySet()) {
                convertValue.put(entry.getKey(), entry.getValue() == null ? null : entry.getValue().toString());
            }
            values[i] = convertValue;
        }
        else if (value instanceof ArrayType) {
            //Class<?> aClass = type.getTypeClass();
            //values[i] = MAPPER.convertValue(value, aClass);
            //todo: Spark List to Array
            values[i] = value;
        }
        else if (type == DataTypes.LongType) {
            values[i] = ((Number) value).longValue();
        }
        else {
            values[i] = value;
        }
    }
    return new GenericRow(values);
}

Source File: TestHelpers.java From iceberg with Apache License 2.0

4 votes

private static Object getValue(SpecializedGetters container, int ord,
                               Type type) {
  if (container.isNullAt(ord)) {
    return null;
  }
  switch (type.typeId()) {
    case BOOLEAN:
      return container.getBoolean(ord);
    case INTEGER:
      return container.getInt(ord);
    case LONG:
      return container.getLong(ord);
    case FLOAT:
      return container.getFloat(ord);
    case DOUBLE:
      return container.getDouble(ord);
    case STRING:
      return container.getUTF8String(ord).toString();
    case BINARY:
    case FIXED:
    case UUID:
      return container.getBinary(ord);
    case DATE:
      return new DateWritable(container.getInt(ord)).get();
    case TIMESTAMP:
      return DateTimeUtils.toJavaTimestamp(container.getLong(ord));
    case DECIMAL: {
      Types.DecimalType dt = (Types.DecimalType) type;
      return container.getDecimal(ord, dt.precision(), dt.scale()).toJavaBigDecimal();
    }
    case STRUCT:
      Types.StructType struct = type.asStructType();
      InternalRow internalRow = container.getStruct(ord, struct.fields().size());
      Object[] data = new Object[struct.fields().size()];
      for (int i = 0; i < data.length; i += 1) {
        if (internalRow.isNullAt(i)) {
          data[i] = null;
        } else {
          data[i] = getValue(internalRow, i, struct.fields().get(i).type());
        }
      }
      return new GenericRow(data);
    default:
      throw new IllegalArgumentException("Unhandled type " + type);
  }
}

Source File: SimpleMockConnector.java From spark-llap with Apache License 2.0

4 votes

@Override
public Row get() {
    Row value = new GenericRow(new Object[] {i, "Element " + i});
    i++;
    return value;
}

org.apache.spark.sql.catalyst.expressions.GenericRow Java Examples