Java Code Examples for org.apache.spark.sql.types.StructField#name()

The following examples show how to use org.apache.spark.sql.types.StructField#name() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: SparkCubingJobTest.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

private Integer convertOutSchema(Dataset<Row> layoutDs, String fieldName,
        org.apache.spark.sql.types.DataType dataType) {
    StructField[] structFieldList = layoutDs.schema().fields();
    String[] columns = layoutDs.columns();

    int index = 0;
    StructField[] outStructFieldList = new StructField[structFieldList.length];
    for (int i = 0; i < structFieldList.length; i++) {
        if (columns[i].equalsIgnoreCase(fieldName)) {
            index = i;
            StructField structField = structFieldList[i];
            outStructFieldList[i] = new StructField(structField.name(), dataType, false, structField.metadata());
        } else {
            outStructFieldList[i] = structFieldList[i];
        }
    }

    OUT_SCHEMA = new StructType(outStructFieldList);

    return index;
}

Example 2

Source File: NManualBuildAndQueryCuboidTest.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

private Integer convertOutSchema(Dataset<Row> layoutDs, String fieldName,
                                 org.apache.spark.sql.types.DataType dataType) {
    StructField[] structFieldList = layoutDs.schema().fields();
    String[] columns = layoutDs.columns();

    int index = 0;
    StructField[] outStructFieldList = new StructField[structFieldList.length];
    for (int i = 0; i < structFieldList.length; i++) {
        if (columns[i].equalsIgnoreCase(fieldName)) {
            index = i;
            StructField structField = structFieldList[i];
            outStructFieldList[i] = new StructField(structField.name(), dataType, false, structField.metadata());
        } else {
            outStructFieldList[i] = structFieldList[i];
        }
    }

    OUT_SCHEMA = new StructType(outStructFieldList);

    return index;
}

Example 3

Source File: TranslateFunction.java From envelope with Apache License 2.0

6 votes

private StructType addFieldNameUnderscores(StructType without) {
  List<StructField> withFields = Lists.newArrayList();

  for (StructField withoutField : without.fields()) {
    String withName = "_" + withoutField.name();
    if (Arrays.asList(without.fieldNames()).contains(withName)) {
      throw new RuntimeException("Can not append raw field '" + withName + "' because that " +
          "field already exists as a result of the translation");
    }

    StructField withField = DataTypes.createStructField(
        withName, withoutField.dataType(), withoutField.nullable(), withoutField.metadata());

    withFields.add(withField);
  }

  return DataTypes.createStructType(withFields);
}

Example 4

Source File: DbPersistorSQLServer.java From rdf2x with Apache License 2.0

5 votes

@Override
public void writeDataFrame(String name, DataFrame df) {
    for (StructField field : df.schema().fields()) {
        String column = field.name();
        // convert booleans to integers to avoid error in Spark 1.6.2
        // "Cannot specify a column width on data type bit."
        if (field.dataType() == DataTypes.BooleanType) {
            df = df.withColumn(column + TMP_SUFFIX, df.col(column).cast(DataTypes.IntegerType))
                    .drop(column)
                    .withColumnRenamed(column + TMP_SUFFIX, column);
        }
    }
    super.writeDataFrame(name, df);
}

Example 5

Source File: FlatteningTransformer.java From hudi with Apache License 2.0

5 votes

public String flattenSchema(StructType schema, String prefix) {
  final StringBuilder selectSQLQuery = new StringBuilder();

  for (StructField field : schema.fields()) {
    final String fieldName = field.name();

    // it is also possible to expand arrays by using Spark "expand" function.
    // As it can increase data size significantly we later pass additional property with a
    // list of arrays to expand.
    final String colName = prefix == null ? fieldName : (prefix + "." + fieldName);
    if (field.dataType().getClass().equals(StructType.class)) {
      selectSQLQuery.append(flattenSchema((StructType) field.dataType(), colName));
    } else {
      selectSQLQuery.append(colName);
      selectSQLQuery.append(" as ");
      selectSQLQuery.append(colName.replace(".", "_"));
    }

    selectSQLQuery.append(",");
  }

  if (selectSQLQuery.length() > 0) {
    selectSQLQuery.deleteCharAt(selectSQLQuery.length() - 1);
  }

  return selectSQLQuery.toString();
}

Example 6

Source File: EventTimeHistoryPlanner.java From envelope with Apache License 2.0

5 votes

private Row carryForwardWhenNull(Row into, Row from) {
  if (!config.hasPath(CARRY_FORWARD_CONFIG_NAME) || !config.getBoolean(CARRY_FORWARD_CONFIG_NAME)) {
    return into;
  }

  for (StructField field : into.schema().fields()) {
    String fieldName = field.name();
    if (into.getAs(fieldName) == null && from.getAs(fieldName) != null) {
      into = RowUtils.set(into, fieldName, from.getAs(fieldName));
    }
  }

  return into;
}

Example 7

Source File: BitemporalHistoryPlanner.java From envelope with Apache License 2.0

5 votes

private Row carryForwardWhenNull(Row into, Row from) {
  if (!doesCarryForward()) {
    return into;
  }

  for (StructField field : into.schema().fields()) {
    String fieldName = field.name();
    if (into.getAs(fieldName) == null && from.getAs(fieldName) != null) {
      into = RowUtils.set(into, fieldName, from.getAs(fieldName));
    }
  }

  return into;
}

Example 8

Source File: InputTranslatorCompatibilityValidation.java From envelope with Apache License 2.0

4 votes

@Override
public ValidationResult validate(Config config) {
  Input input;
  Translator translator;
  try {
    input = ComponentFactory.create(Input.class, config.getConfig(DataStep.INPUT_TYPE), false);
    translator = ComponentFactory.create(
        Translator.class, config.getConfig(StreamingStep.TRANSLATOR_PROPERTY), false);
  }
  catch (Exception e) {
    return new ValidationResult(this, Validity.VALID,
        "Could not instantiate input and/or translator, so will not check if they" +
            " are compatible.");
  }

  String inputClass = input.getClass().getSimpleName();
  String translatorClass = translator.getClass().getSimpleName();

  if (translator instanceof UsesProvidedSchema && !(input instanceof DeclaresProvidingSchema)) {
    return new ValidationResult(this, Validity.INVALID,
        inputClass + " is not compatible with " + translatorClass +
        " because " + translatorClass + " requires " + inputClass + " to declare the schema that" +
        " it provides, but " + inputClass + " does not do so.");
  }

  if (input instanceof DeclaresProvidingSchema) {
    for (StructField translatorExpectingField : translator.getExpectingSchema().fields()) {
      boolean expectedFieldFound = false;
      for (StructField inputProvidingField : ((DeclaresProvidingSchema) input).getProvidingSchema().fields()) {
        if (translatorExpectingField.name().equals(inputProvidingField.name()) &&
            translatorExpectingField.dataType().equals(inputProvidingField.dataType())) {
          expectedFieldFound = true;
        }
      }

      if (!expectedFieldFound) {
        return new ValidationResult(this, Validity.INVALID,
            inputClass + " is not compatible with " + translatorClass + " because " +
                inputClass + " does not provide expected " + "field '" +
                translatorExpectingField.name() + "' with data type '" +
                translatorExpectingField.dataType() + "'");
      }
    }
  }

  return new ValidationResult(this, Validity.VALID, "Input and translator are compatible");
}

Example 9

Source File: ColumnExploder.java From jpmml-evaluator-spark with GNU Affero General Public License v3.0

4 votes

@Override
public Dataset<Row> transform(Dataset<?> dataset){
	StructType schema = dataset.schema();

	StructType structSchema = getStructSchema(schema);

	Column structColumn = dataset.apply(DatasetUtil.escapeColumnName(getStructCol()));

	Dataset<Row> result = dataset.toDF();

	StructField[] fields = structSchema.fields();
	for(StructField field : fields){
		String name = field.name();

		Column fieldColumn = structColumn.getField(DatasetUtil.escapeColumnName(name));

		result = result.withColumn(DatasetUtil.escapeColumnName(name), fieldColumn);
	}

	return result;
}