Java Code Examples for org.apache.spark.sql.Row#size()
The following examples show how to use
org.apache.spark.sql.Row#size() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataFrames.java From DataVec with Apache License 2.0 | 6 votes |
/** * Convert a given Row to a list of writables, given the specified Schema * * @param schema Schema for the data * @param row Row of data */ public static List<Writable> rowToWritables(Schema schema, Row row) { List<Writable> ret = new ArrayList<>(); for (int i = 0; i < row.size(); i++) { switch (schema.getType(i)) { case Double: ret.add(new DoubleWritable(row.getDouble(i))); break; case Float: ret.add(new FloatWritable(row.getFloat(i))); break; case Integer: ret.add(new IntWritable(row.getInt(i))); break; case Long: ret.add(new LongWritable(row.getLong(i))); break; case String: ret.add(new Text(row.getString(i))); break; default: throw new IllegalStateException("Illegal type"); } } return ret; }
Example 2
Source File: NestDeriver.java From envelope with Apache License 2.0 | 6 votes |
@Override public Row call(Tuple2<Iterable<Row>, Iterable<Row>> cogrouped) throws Exception { // There should only be one 'into' record per key Row intoRow = cogrouped._1().iterator().next(); Row[] fromRows = Iterables.toArray(cogrouped._2(), Row.class); int intoRowNumFields = intoRow.size(); Object[] nestedValues = new Object[intoRowNumFields + 1]; for (int i = 0; i < intoRowNumFields; i++) { nestedValues[i] = intoRow.get(i); } nestedValues[intoRowNumFields] = fromRows; Row nested = RowFactory.create(nestedValues); return nested; }
Example 3
Source File: JavaRDDToDataset.java From mmtf-spark with Apache License 2.0 | 5 votes |
/** * Converts a JavaRDD<Row> to a Dataset<Row>. This method only * supports simple data types and all data need to be not null. * * @param data JavaRDD of Row objects * @param colNames names of the columns in a row * @return */ public static Dataset<Row> getDataset(JavaRDD<Row> data, String...colNames) { // create the schema for the dataset Row row = data.first(); int length = row.length(); if (length != colNames.length) { throw new IllegalArgumentException("colNames length does not match row length"); } StructField[] sf = new StructField[length]; for (int i = 0; i < row.size(); i++) { Object o = row.get(i); // TODO add more types if (o instanceof String) { sf[i] = DataTypes.createStructField(colNames[i], DataTypes.StringType, false); } else if (o instanceof Integer) { sf[i] = DataTypes.createStructField(colNames[i], DataTypes.IntegerType, false); } else if (o instanceof Long) { sf[i] = DataTypes.createStructField(colNames[i], DataTypes.LongType, false); } else if (o instanceof Float) { sf[i] = DataTypes.createStructField(colNames[i], DataTypes.FloatType, false); } else if (o instanceof Double) { sf[i] = DataTypes.createStructField(colNames[i], DataTypes.DoubleType, false); } else if (o instanceof Boolean) { sf[i] = DataTypes.createStructField(colNames[i], DataTypes.BooleanType, false); } else { System.out.println("Data type not implemented yet"); } } StructType schema = new StructType(sf); // convert JavaRDD to Dataset SparkSession spark = SparkSession.builder().getOrCreate(); return spark.createDataFrame(data, schema); }
Example 4
Source File: ToRecord.java From DataVec with Apache License 2.0 | 5 votes |
@Override public List<Writable> call(Row v1) throws Exception { List<Writable> ret = new ArrayList<>(); if (v1.size() != schema.numColumns()) throw new IllegalArgumentException("Invalid number of columns for row " + v1.size() + " should have matched schema columns " + schema.numColumns()); for (int i = 0; i < v1.size(); i++) { if (v1.get(i) == null) throw new IllegalStateException("Row item " + i + " is null"); switch (schema.getType(i)) { case Double: ret.add(new DoubleWritable(v1.getDouble(i))); break; case Float: ret.add(new FloatWritable(v1.getFloat(i))); break; case Integer: ret.add(new IntWritable(v1.getInt(i))); break; case Long: ret.add(new LongWritable(v1.getLong(i))); break; default: throw new IllegalStateException("Illegal type"); } } return ret; }
Example 5
Source File: LogOutput.java From envelope with Apache License 2.0 | 5 votes |
@Override public void call(Row mutation) throws Exception { if (joiner == null) { joiner = Joiner.on(delimiter).useForNull(""); } List<Object> values = Lists.newArrayList(); for (int fieldIndex = 0; fieldIndex < mutation.size(); fieldIndex++) { values.add(mutation.get(fieldIndex)); } String log = joiner.join(values); switch (logLevel) { case "TRACE": LOG.trace(log); break; case "DEBUG": LOG.debug(log); break; case "INFO": LOG.info(log); break; case "WARN": LOG.warn(log); break; case "ERROR": LOG.error(log); break; default: throw new RuntimeException("Invalid log level: " + logLevel); } }
Example 6
Source File: Spark1Shims.java From zeppelin with Apache License 2.0 | 5 votes |
private List sparkRowToList(Row row) { List list = new ArrayList(); for (int i = 0; i< row.size(); i++) { list.add(row.get(i)); } return list; }
Example 7
Source File: Spark3Shims.java From zeppelin with Apache License 2.0 | 5 votes |
private List sparkRowToList(Row row) { List list = new ArrayList(); for (int i = 0; i< row.size(); i++) { list.add(row.get(i)); } return list; }
Example 8
Source File: Spark2Shims.java From zeppelin with Apache License 2.0 | 5 votes |
private List sparkRowToList(Row row) { List list = new ArrayList(); for (int i = 0; i< row.size(); i++) { list.add(row.get(i)); } return list; }
Example 9
Source File: ToRecord.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public List<Writable> call(Row v1) throws Exception { List<Writable> ret = new ArrayList<>(); if (v1.size() != schema.numColumns()) throw new IllegalArgumentException("Invalid number of columns for row " + v1.size() + " should have matched schema columns " + schema.numColumns()); for (int i = 0; i < v1.size(); i++) { if (v1.get(i) == null) throw new IllegalStateException("Row item " + i + " is null"); switch (schema.getType(i)) { case Double: ret.add(new DoubleWritable(v1.getDouble(i))); break; case Float: ret.add(new FloatWritable(v1.getFloat(i))); break; case Integer: ret.add(new IntWritable(v1.getInt(i))); break; case Long: ret.add(new LongWritable(v1.getLong(i))); break; default: throw new IllegalStateException("Illegal type"); } } return ret; }
Example 10
Source File: FrameRDDConverterUtils.java From systemds with Apache License 2.0 | 4 votes |
@Override public Iterator<Tuple2<Long, FrameBlock>> call(Iterator<Tuple2<Row, Long>> arg0) throws Exception { ArrayList<Tuple2<Long,FrameBlock>> ret = new ArrayList<>(); long ix = -1; FrameBlock fb = null; Object[] tmprow = new Object[(int)_clen]; while( arg0.hasNext() ) { Tuple2<Row,Long> tmp = arg0.next(); Row row = tmp._1(); long rowix = tmp._2()+1; if( fb == null || fb.getNumRows() == _maxRowsPerBlock) { if( fb != null ) flushBlocksToList(ix, fb, ret); ix = rowix; fb = new FrameBlock(_schema, _colnames); } //process row data int off = _containsID ? 1 : 0; for(int i=off, pos=0; i<row.size(); i++) { if( i-off == _colVect ) { Vector vect = (Vector) row.get(i); for( int j=0; j<vect.size(); j++ ) tmprow[pos++] = vect.apply(j); } else { tmprow[pos] = UtilFunctions.objectToObject( _schema[pos], row.get(i)); pos++; } } fb.appendRow(tmprow); } //flush last blocks flushBlocksToList(ix, fb, ret); return ret.iterator(); }
Example 11
Source File: Spark1Shims.java From zeppelin with Apache License 2.0 | 4 votes |
@Override public String showDataFrame(Object obj, int maxResult, InterpreterContext context) { if (obj instanceof DataFrame) { DataFrame df = (DataFrame) obj; String[] columns = df.columns(); // DDL will empty DataFrame if (columns.length == 0) { return ""; } // fetch maxResult+1 rows so that we can check whether it is larger than zeppelin.spark.maxResult List<Row> rows = df.takeAsList(maxResult + 1); String template = context.getLocalProperties().get("template"); if (!StringUtils.isBlank(template)) { if (rows.size() >= 1) { return new SingleRowInterpreterResult(sparkRowToList(rows.get(0)), template, context).toHtml(); } else { return ""; } } StringBuilder msg = new StringBuilder(); msg.append("\n%table "); msg.append(StringUtils.join(TableDataUtils.normalizeColumns(columns), "\t")); msg.append("\n"); boolean isLargerThanMaxResult = rows.size() > maxResult; if (isLargerThanMaxResult) { rows = rows.subList(0, maxResult); } for (Row row : rows) { for (int i = 0; i < row.size(); ++i) { msg.append(TableDataUtils.normalizeColumn(row.get(i))); if (i != row.size() - 1) { msg.append("\t"); } } msg.append("\n"); } if (isLargerThanMaxResult) { msg.append("\n"); msg.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, "zeppelin.spark.maxResult")); } // append %text at the end, otherwise the following output will be put in table as well. msg.append("\n%text "); return msg.toString(); } else { return obj.toString(); } }
Example 12
Source File: Spark3Shims.java From zeppelin with Apache License 2.0 | 4 votes |
@Override public String showDataFrame(Object obj, int maxResult, InterpreterContext context) { if (obj instanceof Dataset) { Dataset<Row> df = ((Dataset) obj).toDF(); String[] columns = df.columns(); // DDL will empty DataFrame if (columns.length == 0) { return ""; } // fetch maxResult+1 rows so that we can check whether it is larger than zeppelin.spark.maxResult List<Row> rows = df.takeAsList(maxResult + 1); String template = context.getLocalProperties().get("template"); if (!StringUtils.isBlank(template)) { if (rows.size() >= 1) { return new SingleRowInterpreterResult(sparkRowToList(rows.get(0)), template, context).toHtml(); } else { return ""; } } StringBuilder msg = new StringBuilder(); msg.append("%table "); msg.append(StringUtils.join(TableDataUtils.normalizeColumns(columns), "\t")); msg.append("\n"); boolean isLargerThanMaxResult = rows.size() > maxResult; if (isLargerThanMaxResult) { rows = rows.subList(0, maxResult); } for (Row row : rows) { for (int i = 0; i < row.size(); ++i) { msg.append(TableDataUtils.normalizeColumn(row.get(i))); if (i != row.size() -1) { msg.append("\t"); } } msg.append("\n"); } if (isLargerThanMaxResult) { msg.append("\n"); msg.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, "zeppelin.spark.maxResult")); } // append %text at the end, otherwise the following output will be put in table as well. msg.append("\n%text "); return msg.toString(); } else { return obj.toString(); } }
Example 13
Source File: Spark2Shims.java From zeppelin with Apache License 2.0 | 4 votes |
@Override public String showDataFrame(Object obj, int maxResult, InterpreterContext context) { if (obj instanceof Dataset) { Dataset<Row> df = ((Dataset) obj).toDF(); String[] columns = df.columns(); // DDL will empty DataFrame if (columns.length == 0) { return ""; } // fetch maxResult+1 rows so that we can check whether it is larger than zeppelin.spark.maxResult List<Row> rows = df.takeAsList(maxResult + 1); String template = context.getLocalProperties().get("template"); if (!StringUtils.isBlank(template)) { if (rows.size() >= 1) { return new SingleRowInterpreterResult(sparkRowToList(rows.get(0)), template, context).toHtml(); } else { return ""; } } StringBuilder msg = new StringBuilder(); msg.append("\n%table "); msg.append(StringUtils.join(TableDataUtils.normalizeColumns(columns), "\t")); msg.append("\n"); boolean isLargerThanMaxResult = rows.size() > maxResult; if (isLargerThanMaxResult) { rows = rows.subList(0, maxResult); } for (Row row : rows) { for (int i = 0; i < row.size(); ++i) { msg.append(TableDataUtils.normalizeColumn(row.get(i))); if (i != row.size() -1) { msg.append("\t"); } } msg.append("\n"); } if (isLargerThanMaxResult) { msg.append("\n"); msg.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, "zeppelin.spark.maxResult")); } // append %text at the end, otherwise the following output will be put in table as well. msg.append("\n%text "); return msg.toString(); } else { return obj.toString(); } }
Example 14
Source File: FrameRDDConverterUtils.java From systemds with Apache License 2.0 | 4 votes |
@Override public Iterator<Tuple2<Long, FrameBlock>> call(Iterator<Tuple2<Row, Long>> arg0) throws Exception { ArrayList<Tuple2<Long,FrameBlock>> ret = new ArrayList<>(); long ix = -1; FrameBlock fb = null; Object[] tmprow = new Object[(int)_clen]; while( arg0.hasNext() ) { Tuple2<Row,Long> tmp = arg0.next(); Row row = tmp._1(); long rowix = tmp._2()+1; if( fb == null || fb.getNumRows() == _maxRowsPerBlock) { if( fb != null ) flushBlocksToList(ix, fb, ret); ix = rowix; fb = new FrameBlock(_schema, _colnames); } //process row data int off = _containsID ? 1 : 0; for(int i=off, pos=0; i<row.size(); i++) { if( i-off == _colVect ) { Vector vect = (Vector) row.get(i); for( int j=0; j<vect.size(); j++ ) tmprow[pos++] = vect.apply(j); } else { tmprow[pos] = UtilFunctions.objectToObject( _schema[pos], row.get(i)); pos++; } } fb.appendRow(tmprow); } //flush last blocks flushBlocksToList(ix, fb, ret); return ret.iterator(); }