Java Code Examples for org.apache.spark.sql.Row#size()

The following examples show how to use org.apache.spark.sql.Row#size() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataFrames.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * Convert a given Row to a list of writables, given the specified Schema
 *
 * @param schema Schema for the data
 * @param row    Row of data
 */
public static List<Writable> rowToWritables(Schema schema, Row row) {
    List<Writable> ret = new ArrayList<>();
    for (int i = 0; i < row.size(); i++) {
        switch (schema.getType(i)) {
            case Double:
                ret.add(new DoubleWritable(row.getDouble(i)));
                break;
            case Float:
                ret.add(new FloatWritable(row.getFloat(i)));
                break;
            case Integer:
                ret.add(new IntWritable(row.getInt(i)));
                break;
            case Long:
                ret.add(new LongWritable(row.getLong(i)));
                break;
            case String:
                ret.add(new Text(row.getString(i)));
                break;
            default:
                throw new IllegalStateException("Illegal type");
        }
    }
    return ret;
}
 
Example 2
Source File: NestDeriver.java    From envelope with Apache License 2.0 6 votes vote down vote up
@Override
public Row call(Tuple2<Iterable<Row>, Iterable<Row>> cogrouped) throws Exception {
  // There should only be one 'into' record per key
  Row intoRow = cogrouped._1().iterator().next();
  Row[] fromRows = Iterables.toArray(cogrouped._2(), Row.class);
  int intoRowNumFields = intoRow.size();

  Object[] nestedValues = new Object[intoRowNumFields + 1];
  for (int i = 0; i < intoRowNumFields; i++) {
    nestedValues[i] = intoRow.get(i);
  }
  nestedValues[intoRowNumFields] = fromRows;

  Row nested = RowFactory.create(nestedValues);

  return nested;
}
 
Example 3
Source File: JavaRDDToDataset.java    From mmtf-spark with Apache License 2.0 5 votes vote down vote up
/**
 * Converts a JavaRDD<Row> to a Dataset<Row>. This method only
 * supports simple data types and all data need to be not null.
 * 
 * @param data JavaRDD of Row objects
 * @param colNames names of the columns in a row
 * @return
 */
public static Dataset<Row> getDataset(JavaRDD<Row> data, String...colNames) {
	// create the schema for the dataset
	Row row = data.first();
	int length = row.length();
	
	if (length != colNames.length) {
		throw new IllegalArgumentException("colNames length does not match row length");
	}
	
	StructField[] sf = new StructField[length];
	
	for (int i = 0; i < row.size(); i++) {
		Object o = row.get(i);

		// TODO add more types
		if (o instanceof String) {
			sf[i] = DataTypes.createStructField(colNames[i], DataTypes.StringType, false);
		} else if (o instanceof Integer) {
			sf[i] = DataTypes.createStructField(colNames[i], DataTypes.IntegerType, false);
		} else if (o instanceof Long) {
			sf[i] = DataTypes.createStructField(colNames[i], DataTypes.LongType, false);
		} else if (o instanceof Float) {
			sf[i] = DataTypes.createStructField(colNames[i], DataTypes.FloatType, false);
		} else if (o instanceof Double) {
			sf[i] = DataTypes.createStructField(colNames[i], DataTypes.DoubleType, false);
		} else if (o instanceof Boolean) {
               sf[i] = DataTypes.createStructField(colNames[i], DataTypes.BooleanType, false);
		} else {
			System.out.println("Data type not implemented yet");
		}
	}
	StructType schema = new StructType(sf);

	// convert JavaRDD to Dataset
	SparkSession spark = SparkSession.builder().getOrCreate();
	return spark.createDataFrame(data, schema);
}
 
Example 4
Source File: ToRecord.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> call(Row v1) throws Exception {
    List<Writable> ret = new ArrayList<>();
    if (v1.size() != schema.numColumns())
        throw new IllegalArgumentException("Invalid number of columns for row " + v1.size()
                        + " should have matched schema columns " + schema.numColumns());
    for (int i = 0; i < v1.size(); i++) {
        if (v1.get(i) == null)
            throw new IllegalStateException("Row item " + i + " is null");
        switch (schema.getType(i)) {
            case Double:
                ret.add(new DoubleWritable(v1.getDouble(i)));
                break;
            case Float:
                ret.add(new FloatWritable(v1.getFloat(i)));
                break;
            case Integer:
                ret.add(new IntWritable(v1.getInt(i)));
                break;
            case Long:
                ret.add(new LongWritable(v1.getLong(i)));
                break;
            default:
                throw new IllegalStateException("Illegal type");
        }

    }
    return ret;
}
 
Example 5
Source File: LogOutput.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Override
public void call(Row mutation) throws Exception {
  if (joiner == null) {
    joiner = Joiner.on(delimiter).useForNull("");
  }

  List<Object> values = Lists.newArrayList();

  for (int fieldIndex = 0; fieldIndex < mutation.size(); fieldIndex++) {
    values.add(mutation.get(fieldIndex));
  }
  String log = joiner.join(values);

  switch (logLevel) {
    case "TRACE":
      LOG.trace(log);
      break;
    case "DEBUG":
      LOG.debug(log);
      break;
    case "INFO":
      LOG.info(log);
      break;
    case "WARN":
      LOG.warn(log);
      break;
    case "ERROR":
      LOG.error(log);
      break;
    default:
      throw new RuntimeException("Invalid log level: " + logLevel);
  }
}
 
Example 6
Source File: Spark1Shims.java    From zeppelin with Apache License 2.0 5 votes vote down vote up
private List sparkRowToList(Row row) {
  List list = new ArrayList();
  for (int i = 0; i< row.size(); i++) {
    list.add(row.get(i));
  }
  return list;
}
 
Example 7
Source File: Spark3Shims.java    From zeppelin with Apache License 2.0 5 votes vote down vote up
private List sparkRowToList(Row row) {
  List list = new ArrayList();
  for (int i = 0; i< row.size(); i++) {
    list.add(row.get(i));
  }
  return list;
}
 
Example 8
Source File: Spark2Shims.java    From zeppelin with Apache License 2.0 5 votes vote down vote up
private List sparkRowToList(Row row) {
  List list = new ArrayList();
  for (int i = 0; i< row.size(); i++) {
    list.add(row.get(i));
  }
  return list;
}
 
Example 9
Source File: ToRecord.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> call(Row v1) throws Exception {
    List<Writable> ret = new ArrayList<>();
    if (v1.size() != schema.numColumns())
        throw new IllegalArgumentException("Invalid number of columns for row " + v1.size()
                        + " should have matched schema columns " + schema.numColumns());
    for (int i = 0; i < v1.size(); i++) {
        if (v1.get(i) == null)
            throw new IllegalStateException("Row item " + i + " is null");
        switch (schema.getType(i)) {
            case Double:
                ret.add(new DoubleWritable(v1.getDouble(i)));
                break;
            case Float:
                ret.add(new FloatWritable(v1.getFloat(i)));
                break;
            case Integer:
                ret.add(new IntWritable(v1.getInt(i)));
                break;
            case Long:
                ret.add(new LongWritable(v1.getLong(i)));
                break;
            default:
                throw new IllegalStateException("Illegal type");
        }

    }
    return ret;
}
 
Example 10
Source File: FrameRDDConverterUtils.java    From systemds with Apache License 2.0 4 votes vote down vote up
@Override
public Iterator<Tuple2<Long, FrameBlock>> call(Iterator<Tuple2<Row, Long>> arg0) 
	throws Exception 
{
	ArrayList<Tuple2<Long,FrameBlock>> ret = new ArrayList<>();

	long ix = -1;
	FrameBlock fb = null;
	Object[] tmprow = new Object[(int)_clen];
	
	while( arg0.hasNext() )
	{
		Tuple2<Row,Long> tmp = arg0.next();
		Row row = tmp._1();
		long rowix = tmp._2()+1;
		
		if( fb == null || fb.getNumRows() == _maxRowsPerBlock) {
			if( fb != null )
				flushBlocksToList(ix, fb, ret);
			ix = rowix;
			fb = new FrameBlock(_schema, _colnames);
		}
		
		//process row data
		int off = _containsID ? 1 : 0;
		for(int i=off, pos=0; i<row.size(); i++) {
			if( i-off == _colVect ) {
				Vector vect = (Vector) row.get(i);
				for( int j=0; j<vect.size(); j++ )
					tmprow[pos++] = vect.apply(j);
			}
			else {
				tmprow[pos] = UtilFunctions.objectToObject(
					_schema[pos], row.get(i));
				pos++;
			}
		}
		fb.appendRow(tmprow);
	}

	//flush last blocks
	flushBlocksToList(ix, fb, ret);

	return ret.iterator();
}
 
Example 11
Source File: Spark1Shims.java    From zeppelin with Apache License 2.0 4 votes vote down vote up
@Override
public String showDataFrame(Object obj, int maxResult, InterpreterContext context) {
  if (obj instanceof DataFrame) {
    DataFrame df = (DataFrame) obj;
    String[] columns = df.columns();
    // DDL will empty DataFrame
    if (columns.length == 0) {
      return "";
    }

    // fetch maxResult+1 rows so that we can check whether it is larger than zeppelin.spark.maxResult
    List<Row> rows = df.takeAsList(maxResult + 1);
    String template = context.getLocalProperties().get("template");
    if (!StringUtils.isBlank(template)) {
      if (rows.size() >= 1) {
        return new SingleRowInterpreterResult(sparkRowToList(rows.get(0)), template, context).toHtml();
      } else {
        return "";
      }
    }

    StringBuilder msg = new StringBuilder();
    msg.append("\n%table ");
    msg.append(StringUtils.join(TableDataUtils.normalizeColumns(columns), "\t"));
    msg.append("\n");
    boolean isLargerThanMaxResult = rows.size() > maxResult;
    if (isLargerThanMaxResult) {
      rows = rows.subList(0, maxResult);
    }
    for (Row row : rows) {
      for (int i = 0; i < row.size(); ++i) {
        msg.append(TableDataUtils.normalizeColumn(row.get(i)));
        if (i != row.size() - 1) {
          msg.append("\t");
        }
      }
      msg.append("\n");
    }

    if (isLargerThanMaxResult) {
      msg.append("\n");
      msg.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, "zeppelin.spark.maxResult"));
    }
    // append %text at the end, otherwise the following output will be put in table as well.
    msg.append("\n%text ");
    return msg.toString();
  } else {
    return obj.toString();
  }
}
 
Example 12
Source File: Spark3Shims.java    From zeppelin with Apache License 2.0 4 votes vote down vote up
@Override
public String showDataFrame(Object obj, int maxResult, InterpreterContext context) {
  if (obj instanceof Dataset) {
    Dataset<Row> df = ((Dataset) obj).toDF();
    String[] columns = df.columns();
    // DDL will empty DataFrame
    if (columns.length == 0) {
      return "";
    }
    // fetch maxResult+1 rows so that we can check whether it is larger than zeppelin.spark.maxResult
    List<Row> rows = df.takeAsList(maxResult + 1);
    String template = context.getLocalProperties().get("template");
    if (!StringUtils.isBlank(template)) {
      if (rows.size() >= 1) {
        return new SingleRowInterpreterResult(sparkRowToList(rows.get(0)), template, context).toHtml();
      } else {
        return "";
      }
    }

    StringBuilder msg = new StringBuilder();
    msg.append("%table ");
    msg.append(StringUtils.join(TableDataUtils.normalizeColumns(columns), "\t"));
    msg.append("\n");
    boolean isLargerThanMaxResult = rows.size() > maxResult;
    if (isLargerThanMaxResult) {
      rows = rows.subList(0, maxResult);
    }
    for (Row row : rows) {
      for (int i = 0; i < row.size(); ++i) {
        msg.append(TableDataUtils.normalizeColumn(row.get(i)));
        if (i != row.size() -1) {
          msg.append("\t");
        }
      }
      msg.append("\n");
    }

    if (isLargerThanMaxResult) {
      msg.append("\n");
      msg.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, "zeppelin.spark.maxResult"));
    }
    // append %text at the end, otherwise the following output will be put in table as well.
    msg.append("\n%text ");
    return msg.toString();
  } else {
    return obj.toString();
  }
}
 
Example 13
Source File: Spark2Shims.java    From zeppelin with Apache License 2.0 4 votes vote down vote up
@Override
public String showDataFrame(Object obj, int maxResult, InterpreterContext context) {
  if (obj instanceof Dataset) {
    Dataset<Row> df = ((Dataset) obj).toDF();
    String[] columns = df.columns();
    // DDL will empty DataFrame
    if (columns.length == 0) {
      return "";
    }
    // fetch maxResult+1 rows so that we can check whether it is larger than zeppelin.spark.maxResult
    List<Row> rows = df.takeAsList(maxResult + 1);
    String template = context.getLocalProperties().get("template");
    if (!StringUtils.isBlank(template)) {
      if (rows.size() >= 1) {
        return new SingleRowInterpreterResult(sparkRowToList(rows.get(0)), template, context).toHtml();
      } else {
        return "";
      }
    }

    StringBuilder msg = new StringBuilder();
    msg.append("\n%table ");
    msg.append(StringUtils.join(TableDataUtils.normalizeColumns(columns), "\t"));
    msg.append("\n");
    boolean isLargerThanMaxResult = rows.size() > maxResult;
    if (isLargerThanMaxResult) {
      rows = rows.subList(0, maxResult);
    }
    for (Row row : rows) {
      for (int i = 0; i < row.size(); ++i) {
        msg.append(TableDataUtils.normalizeColumn(row.get(i)));
        if (i != row.size() -1) {
          msg.append("\t");
        }
      }
      msg.append("\n");
    }

    if (isLargerThanMaxResult) {
      msg.append("\n");
      msg.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, "zeppelin.spark.maxResult"));
    }
    // append %text at the end, otherwise the following output will be put in table as well.
    msg.append("\n%text ");
    return msg.toString();
  } else {
    return obj.toString();
  }
}
 
Example 14
Source File: FrameRDDConverterUtils.java    From systemds with Apache License 2.0 4 votes vote down vote up
@Override
public Iterator<Tuple2<Long, FrameBlock>> call(Iterator<Tuple2<Row, Long>> arg0) 
	throws Exception 
{
	ArrayList<Tuple2<Long,FrameBlock>> ret = new ArrayList<>();

	long ix = -1;
	FrameBlock fb = null;
	Object[] tmprow = new Object[(int)_clen];
	
	while( arg0.hasNext() )
	{
		Tuple2<Row,Long> tmp = arg0.next();
		Row row = tmp._1();
		long rowix = tmp._2()+1;
		
		if( fb == null || fb.getNumRows() == _maxRowsPerBlock) {
			if( fb != null )
				flushBlocksToList(ix, fb, ret);
			ix = rowix;
			fb = new FrameBlock(_schema, _colnames);
		}
		
		//process row data
		int off = _containsID ? 1 : 0;
		for(int i=off, pos=0; i<row.size(); i++) {
			if( i-off == _colVect ) {
				Vector vect = (Vector) row.get(i);
				for( int j=0; j<vect.size(); j++ )
					tmprow[pos++] = vect.apply(j);
			}
			else {
				tmprow[pos] = UtilFunctions.objectToObject(
					_schema[pos], row.get(i));
				pos++;
			}
		}
		fb.appendRow(tmprow);
	}

	//flush last blocks
	flushBlocksToList(ix, fb, ret);

	return ret.iterator();
}