Java Code Examples for org.apache.spark.sql.catalyst.encoders.ExpressionEncoder

The following examples show how to use org.apache.spark.sql.catalyst.encoders.ExpressionEncoder. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: EncoderHelpers.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Wrap a Beam coder into a Spark Encoder using Catalyst Expression Encoders (which uses java code
 * generation).
 */
public static <T> Encoder<T> fromBeamCoder(Coder<T> coder) {
  Class<? super T> clazz = coder.getEncodedTypeDescriptor().getRawType();
  ClassTag<T> classTag = ClassTag$.MODULE$.apply(clazz);
  List<Expression> serializers =
      Collections.singletonList(
          new EncodeUsingBeamCoder<>(new BoundReference(0, new ObjectType(clazz), true), coder));

  return new ExpressionEncoder<>(
      SchemaHelpers.binarySchema(),
      false,
      JavaConversions.collectionAsScalaIterable(serializers).toSeq(),
      new DecodeUsingBeamCoder<>(
          new Cast(new GetColumnByOrdinal(0, BinaryType), BinaryType), classTag, coder),
      classTag);
}
 
Example 2
/**
 *
 * Not Supported
 *
 * @param dsp
 * @param partitionBy
 * @param location
 * @param context
 * @return
 */
@Override
public DataSet<ExecRow> writeParquetFile(DataSetProcessor dsp, int[] partitionBy, String location, String compression, OperationContext context) {

    try {
        //Generate Table Schema
        String[] colNames;
        DataValueDescriptor[] dvds;
        if (context.getOperation() instanceof DMLWriteOperation) {
            dvds  = context.getOperation().getExecRowDefinition().getRowArray();
            colNames = ((DMLWriteOperation) context.getOperation()).getColumnNames();
        } else if (context.getOperation() instanceof ExportOperation) {
            dvds = context.getOperation().getLeftOperation().getLeftOperation().getExecRowDefinition().getRowArray();
            ExportOperation export = (ExportOperation) context.getOperation();
            ResultColumnDescriptor[] descriptors = export.getSourceResultColumnDescriptors();
            colNames = new String[descriptors.length];
            int i = 0;
            for (ResultColumnDescriptor rcd : export.getSourceResultColumnDescriptors()) {
                colNames[i++] = rcd.getName();
            }
        } else {
            throw new IllegalArgumentException("Unsupported operation type: " + context.getOperation());
        }
        StructField[] fields = new StructField[colNames.length];
        for (int i=0 ; i<colNames.length ; i++){
            fields[i] = dvds[i].getStructField(colNames[i]);
        }
        StructType tableSchema = DataTypes.createStructType(fields);
        RecordWriter<Void, Object> rw = ParquetWriterService.getFactory().getParquetRecordWriter(location, compression, tableSchema);

        try {
            ExpressionEncoder<Row> encoder = RowEncoder.apply(tableSchema);
            while (iterator.hasNext()) {
                ValueRow vr = (ValueRow) iterator.next();
                context.recordWrite();

                rw.write(null, encoder.toRow(vr));
            }
        } finally {
            rw.close(null);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    ValueRow valueRow=new ValueRow(1);
    valueRow.setColumn(1,new SQLLongint(context.getRecordsWritten()));
    return new ControlDataSet(Collections.singletonList(valueRow).iterator());
}