Java Code Examples for org.apache.spark.sql.types.StructField#name()

The following examples show how to use org.apache.spark.sql.types.StructField#name() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkCubingJobTest.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private Integer convertOutSchema(Dataset<Row> layoutDs, String fieldName,
        org.apache.spark.sql.types.DataType dataType) {
    StructField[] structFieldList = layoutDs.schema().fields();
    String[] columns = layoutDs.columns();

    int index = 0;
    StructField[] outStructFieldList = new StructField[structFieldList.length];
    for (int i = 0; i < structFieldList.length; i++) {
        if (columns[i].equalsIgnoreCase(fieldName)) {
            index = i;
            StructField structField = structFieldList[i];
            outStructFieldList[i] = new StructField(structField.name(), dataType, false, structField.metadata());
        } else {
            outStructFieldList[i] = structFieldList[i];
        }
    }

    OUT_SCHEMA = new StructType(outStructFieldList);

    return index;
}
 
Example 2
Source File: NManualBuildAndQueryCuboidTest.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private Integer convertOutSchema(Dataset<Row> layoutDs, String fieldName,
                                 org.apache.spark.sql.types.DataType dataType) {
    StructField[] structFieldList = layoutDs.schema().fields();
    String[] columns = layoutDs.columns();

    int index = 0;
    StructField[] outStructFieldList = new StructField[structFieldList.length];
    for (int i = 0; i < structFieldList.length; i++) {
        if (columns[i].equalsIgnoreCase(fieldName)) {
            index = i;
            StructField structField = structFieldList[i];
            outStructFieldList[i] = new StructField(structField.name(), dataType, false, structField.metadata());
        } else {
            outStructFieldList[i] = structFieldList[i];
        }
    }

    OUT_SCHEMA = new StructType(outStructFieldList);

    return index;
}
 
Example 3
Source File: TranslateFunction.java    From envelope with Apache License 2.0 6 votes vote down vote up
private StructType addFieldNameUnderscores(StructType without) {
  List<StructField> withFields = Lists.newArrayList();

  for (StructField withoutField : without.fields()) {
    String withName = "_" + withoutField.name();
    if (Arrays.asList(without.fieldNames()).contains(withName)) {
      throw new RuntimeException("Can not append raw field '" + withName + "' because that " +
          "field already exists as a result of the translation");
    }

    StructField withField = DataTypes.createStructField(
        withName, withoutField.dataType(), withoutField.nullable(), withoutField.metadata());

    withFields.add(withField);
  }

  return DataTypes.createStructType(withFields);
}
 
Example 4
Source File: DbPersistorSQLServer.java    From rdf2x with Apache License 2.0 5 votes vote down vote up
@Override
public void writeDataFrame(String name, DataFrame df) {
    for (StructField field : df.schema().fields()) {
        String column = field.name();
        // convert booleans to integers to avoid error in Spark 1.6.2
        // "Cannot specify a column width on data type bit."
        if (field.dataType() == DataTypes.BooleanType) {
            df = df.withColumn(column + TMP_SUFFIX, df.col(column).cast(DataTypes.IntegerType))
                    .drop(column)
                    .withColumnRenamed(column + TMP_SUFFIX, column);
        }
    }
    super.writeDataFrame(name, df);
}
 
Example 5
Source File: FlatteningTransformer.java    From hudi with Apache License 2.0 5 votes vote down vote up
public String flattenSchema(StructType schema, String prefix) {
  final StringBuilder selectSQLQuery = new StringBuilder();

  for (StructField field : schema.fields()) {
    final String fieldName = field.name();

    // it is also possible to expand arrays by using Spark "expand" function.
    // As it can increase data size significantly we later pass additional property with a
    // list of arrays to expand.
    final String colName = prefix == null ? fieldName : (prefix + "." + fieldName);
    if (field.dataType().getClass().equals(StructType.class)) {
      selectSQLQuery.append(flattenSchema((StructType) field.dataType(), colName));
    } else {
      selectSQLQuery.append(colName);
      selectSQLQuery.append(" as ");
      selectSQLQuery.append(colName.replace(".", "_"));
    }

    selectSQLQuery.append(",");
  }

  if (selectSQLQuery.length() > 0) {
    selectSQLQuery.deleteCharAt(selectSQLQuery.length() - 1);
  }

  return selectSQLQuery.toString();
}
 
Example 6
Source File: EventTimeHistoryPlanner.java    From envelope with Apache License 2.0 5 votes vote down vote up
private Row carryForwardWhenNull(Row into, Row from) {
  if (!config.hasPath(CARRY_FORWARD_CONFIG_NAME) || !config.getBoolean(CARRY_FORWARD_CONFIG_NAME)) {
    return into;
  }

  for (StructField field : into.schema().fields()) {
    String fieldName = field.name();
    if (into.getAs(fieldName) == null && from.getAs(fieldName) != null) {
      into = RowUtils.set(into, fieldName, from.getAs(fieldName));
    }
  }

  return into;
}
 
Example 7
Source File: BitemporalHistoryPlanner.java    From envelope with Apache License 2.0 5 votes vote down vote up
private Row carryForwardWhenNull(Row into, Row from) {
  if (!doesCarryForward()) {
    return into;
  }

  for (StructField field : into.schema().fields()) {
    String fieldName = field.name();
    if (into.getAs(fieldName) == null && from.getAs(fieldName) != null) {
      into = RowUtils.set(into, fieldName, from.getAs(fieldName));
    }
  }

  return into;
}
 
Example 8
Source File: InputTranslatorCompatibilityValidation.java    From envelope with Apache License 2.0 4 votes vote down vote up
@Override
public ValidationResult validate(Config config) {
  Input input;
  Translator translator;
  try {
    input = ComponentFactory.create(Input.class, config.getConfig(DataStep.INPUT_TYPE), false);
    translator = ComponentFactory.create(
        Translator.class, config.getConfig(StreamingStep.TRANSLATOR_PROPERTY), false);
  }
  catch (Exception e) {
    return new ValidationResult(this, Validity.VALID,
        "Could not instantiate input and/or translator, so will not check if they" +
            " are compatible.");
  }

  String inputClass = input.getClass().getSimpleName();
  String translatorClass = translator.getClass().getSimpleName();

  if (translator instanceof UsesProvidedSchema && !(input instanceof DeclaresProvidingSchema)) {
    return new ValidationResult(this, Validity.INVALID,
        inputClass + " is not compatible with " + translatorClass +
        " because " + translatorClass + " requires " + inputClass + " to declare the schema that" +
        " it provides, but " + inputClass + " does not do so.");
  }

  if (input instanceof DeclaresProvidingSchema) {
    for (StructField translatorExpectingField : translator.getExpectingSchema().fields()) {
      boolean expectedFieldFound = false;
      for (StructField inputProvidingField : ((DeclaresProvidingSchema) input).getProvidingSchema().fields()) {
        if (translatorExpectingField.name().equals(inputProvidingField.name()) &&
            translatorExpectingField.dataType().equals(inputProvidingField.dataType())) {
          expectedFieldFound = true;
        }
      }

      if (!expectedFieldFound) {
        return new ValidationResult(this, Validity.INVALID,
            inputClass + " is not compatible with " + translatorClass + " because " +
                inputClass + " does not provide expected " + "field '" +
                translatorExpectingField.name() + "' with data type '" +
                translatorExpectingField.dataType() + "'");
      }
    }
  }

  return new ValidationResult(this, Validity.VALID, "Input and translator are compatible");
}
 
Example 9
Source File: ColumnExploder.java    From jpmml-evaluator-spark with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public Dataset<Row> transform(Dataset<?> dataset){
	StructType schema = dataset.schema();

	StructType structSchema = getStructSchema(schema);

	Column structColumn = dataset.apply(DatasetUtil.escapeColumnName(getStructCol()));

	Dataset<Row> result = dataset.toDF();

	StructField[] fields = structSchema.fields();
	for(StructField field : fields){
		String name = field.name();

		Column fieldColumn = structColumn.getField(DatasetUtil.escapeColumnName(name));

		result = result.withColumn(DatasetUtil.escapeColumnName(name), fieldColumn);
	}

	return result;
}