Java Code Examples for org.apache.spark.sql.types.StructType#fieldIndex()

The following examples show how to use org.apache.spark.sql.types.StructType#fieldIndex() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: MLContextConversionUtil.java From systemds with Apache License 2.0

6 votes

/**
 * If the FrameFormat of the DataFrame has not been explicitly specified,
 * attempt to determine the proper FrameFormat.
 *
 * @param dataFrame
 *            the Spark {@code DataFrame}
 * @param frameMetadata
 *            the frame metadata, if available
 */
public static void determineFrameFormatIfNeeded(Dataset<Row> dataFrame, FrameMetadata frameMetadata) {
	FrameFormat frameFormat = frameMetadata.getFrameFormat();
	if (frameFormat != null) {
		return;
	}

	StructType schema = dataFrame.schema();
	boolean hasID = false;
	try {
		schema.fieldIndex(RDDConverterUtils.DF_ID_COLUMN);
		hasID = true;
	} catch (IllegalArgumentException iae) {
	}

	FrameFormat ff = hasID ? FrameFormat.DF_WITH_INDEX : FrameFormat.DF;
	frameMetadata.setFrameFormat(ff);
}

Example 2

Source File: SparkBenchmarkUtil.java From iceberg with Apache License 2.0

6 votes

public static UnsafeProjection projection(Schema expectedSchema, Schema actualSchema) {
  StructType struct = SparkSchemaUtil.convert(actualSchema);

  List<AttributeReference> refs = JavaConverters.seqAsJavaListConverter(struct.toAttributes()).asJava();
  List<Attribute> attrs = Lists.newArrayListWithExpectedSize(struct.fields().length);
  List<Expression> exprs = Lists.newArrayListWithExpectedSize(struct.fields().length);

  for (AttributeReference ref : refs) {
    attrs.add(ref.toAttribute());
  }

  for (Types.NestedField field : expectedSchema.columns()) {
    int indexInIterSchema = struct.fieldIndex(field.name());
    exprs.add(refs.get(indexInIterSchema));
  }

  return UnsafeProjection.create(
      JavaConverters.asScalaBufferConverter(exprs).asScala().toSeq(),
      JavaConverters.asScalaBufferConverter(attrs).asScala().toSeq());
}

Example 3

Source File: RowDataReader.java From iceberg with Apache License 2.0

6 votes

private static UnsafeProjection projection(Schema finalSchema, Schema readSchema) {
  StructType struct = SparkSchemaUtil.convert(readSchema);

  List<AttributeReference> refs = JavaConverters.seqAsJavaListConverter(struct.toAttributes()).asJava();
  List<Attribute> attrs = Lists.newArrayListWithExpectedSize(struct.fields().length);
  List<org.apache.spark.sql.catalyst.expressions.Expression> exprs =
      Lists.newArrayListWithExpectedSize(struct.fields().length);

  for (AttributeReference ref : refs) {
    attrs.add(ref.toAttribute());
  }

  for (Types.NestedField field : finalSchema.columns()) {
    int indexInReadSchema = struct.fieldIndex(field.name());
    exprs.add(refs.get(indexInReadSchema));
  }

  return UnsafeProjection.create(
      JavaConverters.asScalaBufferConverter(exprs).asScala().toSeq(),
      JavaConverters.asScalaBufferConverter(attrs).asScala().toSeq());
}

Example 4

Source File: Reader.java From iceberg with Apache License 2.0

6 votes

private static UnsafeProjection projection(Schema finalSchema, Schema readSchema) {
  StructType struct = convert(readSchema);

  List<AttributeReference> refs = seqAsJavaListConverter(struct.toAttributes()).asJava();
  List<Attribute> attrs = Lists.newArrayListWithExpectedSize(struct.fields().length);
  List<org.apache.spark.sql.catalyst.expressions.Expression> exprs =
      Lists.newArrayListWithExpectedSize(struct.fields().length);

  for (AttributeReference ref : refs) {
    attrs.add(ref.toAttribute());
  }

  for (Types.NestedField field : finalSchema.columns()) {
    int indexInReadSchema = struct.fieldIndex(field.name());
    exprs.add(refs.get(indexInReadSchema));
  }

  return UnsafeProjection.create(
      asScalaBufferConverter(exprs).asScala().toSeq(),
      asScalaBufferConverter(attrs).asScala().toSeq());
}

Example 5

Source File: MLContextConversionUtil.java From systemds with Apache License 2.0

6 votes

/**
 * If the FrameFormat of the DataFrame has not been explicitly specified,
 * attempt to determine the proper FrameFormat.
 *
 * @param dataFrame
 *            the Spark {@code DataFrame}
 * @param frameMetadata
 *            the frame metadata, if available
 */
public static void determineFrameFormatIfNeeded(Dataset<Row> dataFrame, FrameMetadata frameMetadata) {
	FrameFormat frameFormat = frameMetadata.getFrameFormat();
	if (frameFormat != null) {
		return;
	}

	StructType schema = dataFrame.schema();
	boolean hasID = false;
	try {
		schema.fieldIndex(RDDConverterUtils.DF_ID_COLUMN);
		hasID = true;
	} catch (IllegalArgumentException iae) {
	}

	FrameFormat ff = hasID ? FrameFormat.DF_WITH_INDEX : FrameFormat.DF;
	frameMetadata.setFrameFormat(ff);
}

Example 6

Source File: MLContextConversionUtil.java From systemds with Apache License 2.0

5 votes

/**
 * If the MatrixFormat of the DataFrame has not been explicitly specified,
 * attempt to determine the proper MatrixFormat.
 *
 * @param dataFrame
 *            the Spark {@code DataFrame}
 * @param matrixMetadata
 *            the matrix metadata, if available
 */
public static void determineMatrixFormatIfNeeded(Dataset<Row> dataFrame, MatrixMetadata matrixMetadata) {
	if (matrixMetadata == null) {
		return;
	}
	MatrixFormat matrixFormat = matrixMetadata.getMatrixFormat();
	if (matrixFormat != null) {
		return;
	}
	StructType schema = dataFrame.schema();
	boolean hasID = false;
	try {
		schema.fieldIndex(RDDConverterUtils.DF_ID_COLUMN);
		hasID = true;
	} catch (IllegalArgumentException iae) {
	}

	StructField[] fields = schema.fields();
	MatrixFormat mf = null;
	if (hasID) {
		if (fields[1].dataType() instanceof VectorUDT) {
			mf = MatrixFormat.DF_VECTOR_WITH_INDEX;
		} else {
			mf = MatrixFormat.DF_DOUBLES_WITH_INDEX;
		}
	} else {
		if (fields[0].dataType() instanceof VectorUDT) {
			mf = MatrixFormat.DF_VECTOR;
		} else {
			mf = MatrixFormat.DF_DOUBLES;
		}
	}

	if (mf == null) {
		throw new MLContextException("DataFrame format not recognized as an accepted SystemDS MatrixFormat");
	}
	matrixMetadata.setMatrixFormat(mf);
}

Example 7

Source File: SparkDataFile.java From iceberg with Apache License 2.0

5 votes

private int fieldPosition(String name, StructType sparkType) {
  try {
    return sparkType.fieldIndex(name);
  } catch (IllegalArgumentException e) {
    // the partition field is absent for unpartitioned tables
    if (name.equals("partition") && wrappedPartition.size() == 0) {
      return -1;
    }
    throw e;
  }
}

Example 8

Source File: MLContextConversionUtil.java From systemds with Apache License 2.0

5 votes

/**
 * If the MatrixFormat of the DataFrame has not been explicitly specified,
 * attempt to determine the proper MatrixFormat.
 *
 * @param dataFrame
 *            the Spark {@code DataFrame}
 * @param matrixMetadata
 *            the matrix metadata, if available
 */
public static void determineMatrixFormatIfNeeded(Dataset<Row> dataFrame, MatrixMetadata matrixMetadata) {
	if (matrixMetadata == null) {
		return;
	}
	MatrixFormat matrixFormat = matrixMetadata.getMatrixFormat();
	if (matrixFormat != null) {
		return;
	}
	StructType schema = dataFrame.schema();
	boolean hasID = false;
	try {
		schema.fieldIndex(RDDConverterUtils.DF_ID_COLUMN);
		hasID = true;
	} catch (IllegalArgumentException iae) {
	}

	StructField[] fields = schema.fields();
	MatrixFormat mf = null;
	if (hasID) {
		if (fields[1].dataType() instanceof VectorUDT) {
			mf = MatrixFormat.DF_VECTOR_WITH_INDEX;
		} else {
			mf = MatrixFormat.DF_DOUBLES_WITH_INDEX;
		}
	} else {
		if (fields[0].dataType() instanceof VectorUDT) {
			mf = MatrixFormat.DF_VECTOR;
		} else {
			mf = MatrixFormat.DF_DOUBLES;
		}
	}

	if (mf == null) {
		throw new MLContextException("DataFrame format not recognized as an accepted SystemDS MatrixFormat");
	}
	matrixMetadata.setMatrixFormat(mf);
}