Java Code Examples for org.apache.flink.types.Row#of()

The following examples show how to use org.apache.flink.types.Row#of() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BatchSQLTestProgram.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public Row next() {
	if (!hasNext()) {
		throw new NoSuchElementException();
	}
	Row row = Row.of(
		keyIndex,
		LocalDateTime.ofInstant(Instant.ofEpochMilli(ms + offsetMs), ZoneOffset.UTC),
		"Some payload...");
	++keyIndex;
	if (keyIndex >= numKeys) {
		keyIndex = 0;
		ms += stepMs;
	}
	return row;
}
 
Example 2
Source File: TableSummaryTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
private TableSummary testVisit() {
    Row[] data =
        new Row[]{
            Row.of("a", 1L, 1, 2.0, true),
            Row.of(null, 2L, 2, -3.0, true),
            Row.of("c", null, null, 2.0, false),
            Row.of("a", 0L, 0, null, null),
        };

    int[] numberIdxs = new int[]{1, 2, 3};
    String[] selectedColNames = new String[]{"f_string", "f_long", "f_int", "f_double", "f_boolean"};
    TableSummarizer summarizer = new TableSummarizer(selectedColNames, numberIdxs, false);
    for (Row aData : data) {
        summarizer.visit(aData);
    }

    return summarizer.toSummary();
}
 
Example 3
Source File: CsvRowDeSerializationSchemaTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private <T> void testField(
		TypeInformation<T> fieldInfo,
		String csvValue,
		T value,
		Consumer<CsvRowSerializationSchema.Builder> serializationConfig,
		Consumer<CsvRowDeserializationSchema.Builder> deserializationConfig,
		String fieldDelimiter) throws Exception {
	final TypeInformation<Row> rowInfo = Types.ROW(Types.STRING, fieldInfo, Types.STRING);
	final String expectedCsv = "BEGIN" + fieldDelimiter + csvValue + fieldDelimiter + "END\n";
	final Row expectedRow = Row.of("BEGIN", value, "END");

	// serialization
	final CsvRowSerializationSchema.Builder serSchemaBuilder = new CsvRowSerializationSchema.Builder(rowInfo);
	serializationConfig.accept(serSchemaBuilder);
	final byte[] serializedRow = serialize(serSchemaBuilder, expectedRow);
	assertEquals(expectedCsv, new String(serializedRow));

	// deserialization
	final CsvRowDeserializationSchema.Builder deserSchemaBuilder = new CsvRowDeserializationSchema.Builder(rowInfo);
	deserializationConfig.accept(deserSchemaBuilder);
	final Row deserializedRow = deserialize(deserSchemaBuilder, expectedCsv);
	assertEquals(expectedRow, deserializedRow);
}
 
Example 4
Source File: DocHashCountVectorizerModelMapperTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testWordCount() throws Exception {
    Row[] rows = new Row[] {
        Row.of(0L, "{\"numFeatures\":\"20\",\"minTF\":\"1.0\",\"featureType\":\"\\\"WORD_COUNT\\\"\"}"),
        Row.of(1048576L, "{\"16\":0.4054651081081644,\"7\":0.0,\"13\":0.4054651081081644,\"14\":-0.5108256237659907,"
            + "\"15\":-0.2876820724517809}")
    };
    List<Row> model = Arrays.asList(rows);

    Params params = new Params()
        .set(DocHashCountVectorizerPredictParams.SELECTED_COL, "sentence");

    DocHashCountVectorizerModelMapper mapper = new DocHashCountVectorizerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);

    assertEquals(mapper.map(Row.of("a b c d a a ")).getField(0),
        new SparseVector(20, new int[] {7, 13, 14, 15},
            new double[] {1.0, 1.0, 3.0, 1.0}));
}
 
Example 5
Source File: LibSvmSourceSinkTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testLibSvmBatchSink() throws Exception {
    Row[] rows = new Row[]{
        Row.of(1, "0:1 1:1"),
        Row.of(-1, "1:1 3:1"),
    };

    String fn = path + "libsvm1.txt";
    MemSourceBatchOp source = new MemSourceBatchOp(rows, new String[]{"label", "features"});

    new LibSvmSinkBatchOp().setFilePath(fn)
        .setLabelCol("label").setVectorCol("features").setOverwriteSink(true).linkFrom(source);

    BatchOperator.execute();

    List<String> lines = Files.readAllLines(Paths.get(fn));
    Assert.assertEquals(lines.size(), 2);
}
 
Example 6
Source File: ClusterEvaluationUtilTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void getClusterStatisticsEuclideanTest() {
    Row[] rows0 = new Row[] {
        Row.of(0, "0,0,0"),
        Row.of(0, "0.1,0.1,0.1"),
        Row.of(0, "0.2,0.2,0.2")
    };

    ClusterMetricsSummary clusterMetricsSummary = ClusterEvaluationUtil.getClusterStatistics(Arrays.asList(rows0),
        new EuclideanDistance());

    Assert.assertEquals(clusterMetricsSummary.k, 1);
    //Tuple6<String, Integer, Double, Double, Double, DenseVector> t = clusterMetricsSummary.map.get(0);
    Assert.assertEquals(clusterMetricsSummary.clusterId.get(0), "0");
    Assert.assertEquals(clusterMetricsSummary.clusterCnt.get(0).intValue(), 3);
    Assert.assertEquals(clusterMetricsSummary.compactness.get(0), 0.115, 0.001);
    Assert.assertEquals(clusterMetricsSummary.distanceSquareSum.get(0), 0.06, 0.01);
    Assert.assertEquals(clusterMetricsSummary.vectorNormL2Sum.get(0), 0.15, 0.01);
    Assert.assertEquals(clusterMetricsSummary.meanVector.get(0), new DenseVector(new double[]{0.1, 0.1, 0.1}));
    Assert.assertEquals(clusterMetricsSummary.k, 1);
    Assert.assertEquals(clusterMetricsSummary.total, 3);
}
 
Example 7
Source File: CsvFormatterTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testFormatter() throws Exception {
    TypeInformation[] types = new TypeInformation[]{Types.STRING, Types.DOUBLE, Types.LONG,
        Types.BOOLEAN, Types.SQL_TIMESTAMP};

    Row row = Row.of("string", 1.0, 1L, true, new java.sql.Timestamp(System.currentTimeMillis()));
    CsvFormatter formatter = new CsvFormatter(types, ",", '"');
    CsvParser parser = new CsvParser(types, ",", '"');
    String text = formatter.format(row);
    Row parsed = parser.parse(text).f1;

    Assert.assertEquals(parsed.getArity(), row.getArity());
    for (int i = 0; i < parsed.getArity(); i++) {
        Assert.assertEquals(parsed.getField(i), row.getField(i));
    }
}
 
Example 8
Source File: GenerateData.java    From Alink with Apache License 2.0 5 votes vote down vote up
public static Table getMultiTypeStreamTable() {
    Row[] testArray =
        new Row[]{
            Row.of("a", 1L, 1, 2.0, true),
            Row.of(null, 2L, 2, -3.0, true),
            Row.of("c", null, null, 2.0, false),
            Row.of("a", 0L, 0, null, null),
        };

    String[] colNames = new String[]{"f_string", "f_long", "f_int", "f_double", "f_boolean"};

    return MLEnvironmentFactory.getDefault().createStreamTable(Arrays.asList(testArray), colNames);
}
 
Example 9
Source File: SummarizerBatchOpTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void test() {
    Row[] testArray =
        new Row[]{
            Row.of("a", 1L, 1, 2.0, true),
            Row.of(null, 2L, 2, -3.0, true),
            Row.of("c", null, null, 2.0, false),
            Row.of("a", 0L, 0, null, null),
        };

    String[] colNames = new String[]{"f_string", "f_long", "f_int", "f_double", "f_boolean"};

    MemSourceBatchOp source = new MemSourceBatchOp(Arrays.asList(testArray), colNames);

    SummarizerBatchOp summarizer = new SummarizerBatchOp()
        .setSelectedCols("f_double", "f_int");

    summarizer.linkFrom(source);

    TableSummary srt = summarizer.collectSummary();

    System.out.println(srt);

    Assert.assertEquals(srt.getColNames().length, 2);
    Assert.assertEquals(srt.count(), 4);
    Assert.assertEquals(srt.numMissingValue("f_double"), 1, 10e-4);
    Assert.assertEquals(srt.numValidValue("f_double"), 3, 10e-4);
    Assert.assertEquals(srt.max("f_double"), 2.0, 10e-4);
    Assert.assertEquals(srt.min("f_int"), 0.0, 10e-4);
    Assert.assertEquals(srt.mean("f_double"), 0.3333333333333333, 10e-4);
    Assert.assertEquals(srt.variance("f_double"), 8.333333333333334, 10e-4);
    Assert.assertEquals(srt.standardDeviation("f_double"), 2.886751345948129, 10e-4);
    Assert.assertEquals(srt.normL1("f_double"), 7.0, 10e-4);
    Assert.assertEquals(srt.normL2("f_double"), 4.123105625617661, 10e-4);
}
 
Example 10
Source File: VectorSummarizerBatchOpTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void test() {
    Row[] testArray =
        new Row[]{
            Row.of("1.0 2.0"),
            Row.of("-1.0 -3.0"),
            Row.of("4.0 2.0"),
        };

    String selectedColName = "vec";
    String[] colNames = new String[]{selectedColName};

    MemSourceBatchOp source = new MemSourceBatchOp(Arrays.asList(testArray), colNames);

    VectorSummarizerBatchOp summarizer = new VectorSummarizerBatchOp()
        .setSelectedCol("vec");

    summarizer.linkFrom(source);

    BaseVectorSummary srt = summarizer.collectVectorSummary();

    System.out.println(srt);

    Assert.assertEquals(srt.vectorSize(), 2);
    Assert.assertEquals(srt.count(), 3);
    Assert.assertEquals(srt.max(0), 4.0, 10e-4);
    Assert.assertEquals(srt.min(0), -1.0, 10e-4);
    Assert.assertEquals(srt.mean(0), 1.3333333333333333, 10e-4);
    Assert.assertEquals(srt.variance(0), 6.333333333333334, 10e-4);
    Assert.assertEquals(srt.standardDeviation(0), 2.5166114784235836, 10e-4);
    Assert.assertEquals(srt.normL1(0), 6.0, 10e-4);
    Assert.assertEquals(srt.normL2(0), 4.242640687119285, 10e-4);
}
 
Example 11
Source File: EvaluationUtil.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Override
public Row map(BaseMetricsSummary baseMetricsSummary) throws Exception {
    BaseMetricsSummary metrics = baseMetricsSummary;
    BaseMetrics baseMetrics = metrics.toMetrics();
    Row row = baseMetrics.serialize();
    return Row.of(funtionName, row.getField(0));
}
 
Example 12
Source File: GenerateData.java    From Alink with Apache License 2.0 5 votes vote down vote up
public static Table getDenseStream() {
    Row[] testArray =
        new Row[]{
            Row.of("1.0 2.0"),
            Row.of("-1.0 -3.0"),
            Row.of("4.0 2.0"),
            Row.of(""),
            Row.of(new Object[]{null})
        };

    String selectedColName = "vec";
    String[] colNames = new String[]{selectedColName};

    return MLEnvironmentFactory.getDefault().createStreamTable(Arrays.asList(testArray), colNames);
}
 
Example 13
Source File: TableSummarizerTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
private Row[] geneData() {
    return
        new Row[]{
            Row.of("a", 1L, 1, 2.0, true),
            Row.of(null, 2L, 2, -3.0, true),
            Row.of("c", null, null, 2.0, false),
            Row.of("a", 0L, 0, null, null),
        };
}
 
Example 14
Source File: RandomForestTrainBatchOpTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void linkFrom7() throws Exception {
	Row[] testArray =
		new Row[] {
			Row.of(1, 2, 0.8),
			Row.of(1, 2, 0.7),
			Row.of(0, 3, 0.4),
			Row.of(0, 2, 0.4),
			Row.of(1, 3, 0.6),
			Row.of(4, 3, 0.2),
			Row.of(4, 4, 0.3)
		};

	String[] colNames = new String[] {"col0", "col1", "label"};

	MemSourceBatchOp memSourceBatchOp = new MemSourceBatchOp(Arrays.asList(testArray), colNames);

	RandomForestTrainBatchOp rfOp = new RandomForestTrainBatchOp()
		.setLabelCol(colNames[2])
		.setFeatureCols(colNames[0], colNames[1])
		.setNumTrees(3)
		.setTreeType("partition")
		.setTreePartition("1,2")
		.setCategoricalCols(colNames[0], colNames[1]);

	rfOp.linkFrom(memSourceBatchOp).print();

	RandomForestPredictBatchOp predictBatchOp = new RandomForestPredictBatchOp()
		.setPredictionCol("pred_result");

	predictBatchOp.linkFrom(rfOp.linkFrom(memSourceBatchOp), memSourceBatchOp).print();
}
 
Example 15
Source File: FileSystemLookupFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
public void eval(Object... values) {
	Preconditions.checkArgument(values.length == lookupCols.length, "Number of values and lookup keys mismatch");
	checkCacheReload();
	for (int i = 0; i < values.length; i++) {
		values[i] = converters[i].toExternal(values[i]);
	}
	Row probeKey = Row.of(values);
	List<RowData> matchedRows = cache.get(probeKey);
	if (matchedRows != null) {
		for (RowData matchedRow : matchedRows) {
			collect(matchedRow);
		}
	}
}
 
Example 16
Source File: VectorCorrelationBatchOpTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void test() {

    Row[] testArray =
        new Row[]{
            Row.of("1.0 2.0"),
            Row.of("-1.0 -3.0"),
            Row.of("4.0 2.0"),
        };

    String selectedColName = "vec";
    String[] colNames = new String[]{selectedColName};

    MemSourceBatchOp source = new MemSourceBatchOp(Arrays.asList(testArray), colNames);

    VectorCorrelationBatchOp corr = new VectorCorrelationBatchOp()
        .setSelectedCol("vec")
        .setMethod("pearson");

    corr.linkFrom(source);

    CorrelationResult corrMat = corr.collectCorrelation();

    System.out.println(corrMat);

    Assert.assertArrayEquals(corrMat.getCorrelationMatrix().getArrayCopy1D(true),
        new double[] {1.0, 0.802955068546966,
            0.802955068546966, 1.0},
        10e-4
    );
}
 
Example 17
Source File: DocCountVectorizerModelMapperTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void testTFIDFType() throws Exception {
    Row[] rows = new Row[] {
        Row.of(0L, "{\"minTF\":\"1.0\",\"featureType\":\"\\\"TF_IDF\\\"\"}"),
        Row.of(1048576L, "{\"f0\":\"i\",\"f1\":0.6931471805599453,\"f2\":6}"),
        Row.of(2097152L, "{\"f0\":\"e\",\"f1\":0.1823215567939546,\"f2\":2}"),
        Row.of(3145728L, "{\"f0\":\"a\",\"f1\":0.4054651081081644,\"f2\":0}"),
        Row.of(4194304L, "{\"f0\":\"b\",\"f1\":0.1823215567939546,\"f2\":1}"),
        Row.of(5242880L, "{\"f0\":\"c\",\"f1\":0.6931471805599453,\"f2\":7}"),
        Row.of(6291456L, "{\"f0\":\"h\",\"f1\":0.4054651081081644,\"f2\":3}"),
        Row.of(7340032L, "{\"f0\":\"d\",\"f1\":0.6931471805599453,\"f2\":4}"),
        Row.of(8388608L, "{\"f0\":\"j\",\"f1\":0.6931471805599453,\"f2\":5}"),
        Row.of(9437184L, "{\"f0\":\"g\",\"f1\":0.6931471805599453,\"f2\":8}"),
        Row.of(10485760L, "{\"f0\":\"n\",\"f1\":1.0986122886681098,\"f2\":9}"),
        Row.of(11534336L, "{\"f0\":\"f\",\"f1\":1.0986122886681098,\"f2\":10}")
    };
    List<Row> model = Arrays.asList(rows);

    Params params = new Params()
        .set(DocCountVectorizerPredictParams.SELECTED_COL, "sentence");

    DocCountVectorizerModelMapper mapper = new DocCountVectorizerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);

    assertEquals(mapper.map(Row.of("a b c d e")).getField(0),
        new SparseVector(11, new int[] {0, 1, 2, 4, 7},
            new double[] {0.08109302162163289, 0.03646431135879092, 0.03646431135879092, 0.13862943611198905,
                0.13862943611198905}));
}
 
Example 18
Source File: GenerateData.java    From Alink with Apache License 2.0 5 votes vote down vote up
public static Table getSparseBatch() {
    Row[] testArray =
        new Row[]{
            Row.of("0:1.0  1:2.0"),
            Row.of("0:-1.0  1:-3.0"),
            Row.of("0:4.0  1:2.0"),
            Row.of("")
        };

    String selectedColName = "vec";
    String[] colNames = new String[]{selectedColName};

    return MLEnvironmentFactory.getDefault().createBatchTable(Arrays.asList(testArray), colNames);
}
 
Example 19
Source File: RandomForestTrainBatchOpTest.java    From Alink with Apache License 2.0 4 votes vote down vote up
@Test
public void linkFromDecisionTreeModeParallel() throws Exception {
	Row[] testArray =
		new Row[] {
			Row.of(1, 2, 0.8),
			Row.of(1, 2, 0.7),
			Row.of(0, 3, 0.4),
			Row.of(0, 2, 0.4),
			Row.of(1, 3, 0.6),
			Row.of(4, 3, 0.2),
			Row.of(4, 4, 0.3)
		};

	String[] colNames = new String[] {"col0", "col1", "label"};

	MemSourceBatchOp memSourceBatchOp = new MemSourceBatchOp(Arrays.asList(testArray), colNames);

	DecisionTreeRegTrainBatchOp decisionTreeRegTrainBatchOp = new DecisionTreeRegTrainBatchOp()
		.setLabelCol(colNames[2])
		.setFeatureCols(colNames[0], colNames[1])
		.setMinSamplesPerLeaf(1)
		.setMaxDepth(4)
		.setMaxMemoryInMB(1)
		.setCreateTreeMode("parallel");

	DecisionTreeRegPredictBatchOp decisionTreeRegPredictBatchOp = new DecisionTreeRegPredictBatchOp()
		.setPredictionCol("pred");

	EvalRegressionBatchOp eval = new EvalRegressionBatchOp()
		.setLabelCol(colNames[2])
		.setPredictionCol("pred");

	Assert.assertEquals(
		new RegressionMetrics(
			decisionTreeRegPredictBatchOp
				.linkFrom(
					decisionTreeRegTrainBatchOp.linkFrom(memSourceBatchOp),
					memSourceBatchOp
				)
				.linkTo(eval)
				.collect()
				.get(0)
		).getRmse(),
		0.026726,
		1e-6);
}
 
Example 20
Source File: BaseMetrics.java    From Alink with Apache License 2.0 4 votes vote down vote up
/**
 * Serialize all the params into string.
 */
public Row serialize() {
    return Row.of(this.params.toJson());
}