Java Code Examples for org.apache.flink.types.Row#of()

The following examples show how to use org.apache.flink.types.Row#of() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Alink   File: TableSummaryTest.java    License: Apache License 2.0 6 votes vote down vote up
private TableSummary testVisit() {
    Row[] data =
        new Row[]{
            Row.of("a", 1L, 1, 2.0, true),
            Row.of(null, 2L, 2, -3.0, true),
            Row.of("c", null, null, 2.0, false),
            Row.of("a", 0L, 0, null, null),
        };

    int[] numberIdxs = new int[]{1, 2, 3};
    String[] selectedColNames = new String[]{"f_string", "f_long", "f_int", "f_double", "f_boolean"};
    TableSummarizer summarizer = new TableSummarizer(selectedColNames, numberIdxs, false);
    for (Row aData : data) {
        summarizer.visit(aData);
    }

    return summarizer.toSummary();
}
 
Example 2
private <T> void testField(
		TypeInformation<T> fieldInfo,
		String csvValue,
		T value,
		Consumer<CsvRowSerializationSchema.Builder> serializationConfig,
		Consumer<CsvRowDeserializationSchema.Builder> deserializationConfig,
		String fieldDelimiter) throws Exception {
	final TypeInformation<Row> rowInfo = Types.ROW(Types.STRING, fieldInfo, Types.STRING);
	final String expectedCsv = "BEGIN" + fieldDelimiter + csvValue + fieldDelimiter + "END\n";
	final Row expectedRow = Row.of("BEGIN", value, "END");

	// serialization
	final CsvRowSerializationSchema.Builder serSchemaBuilder = new CsvRowSerializationSchema.Builder(rowInfo);
	serializationConfig.accept(serSchemaBuilder);
	final byte[] serializedRow = serialize(serSchemaBuilder, expectedRow);
	assertEquals(expectedCsv, new String(serializedRow));

	// deserialization
	final CsvRowDeserializationSchema.Builder deserSchemaBuilder = new CsvRowDeserializationSchema.Builder(rowInfo);
	deserializationConfig.accept(deserSchemaBuilder);
	final Row deserializedRow = deserialize(deserSchemaBuilder, expectedCsv);
	assertEquals(expectedRow, deserializedRow);
}
 
Example 3
@Test
public void testWordCount() throws Exception {
    Row[] rows = new Row[] {
        Row.of(0L, "{\"numFeatures\":\"20\",\"minTF\":\"1.0\",\"featureType\":\"\\\"WORD_COUNT\\\"\"}"),
        Row.of(1048576L, "{\"16\":0.4054651081081644,\"7\":0.0,\"13\":0.4054651081081644,\"14\":-0.5108256237659907,"
            + "\"15\":-0.2876820724517809}")
    };
    List<Row> model = Arrays.asList(rows);

    Params params = new Params()
        .set(DocHashCountVectorizerPredictParams.SELECTED_COL, "sentence");

    DocHashCountVectorizerModelMapper mapper = new DocHashCountVectorizerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);

    assertEquals(mapper.map(Row.of("a b c d a a ")).getField(0),
        new SparseVector(20, new int[] {7, 13, 14, 15},
            new double[] {1.0, 1.0, 3.0, 1.0}));
}
 
Example 4
Source Project: Alink   File: ClusterEvaluationUtilTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void getClusterStatisticsEuclideanTest() {
    Row[] rows0 = new Row[] {
        Row.of(0, "0,0,0"),
        Row.of(0, "0.1,0.1,0.1"),
        Row.of(0, "0.2,0.2,0.2")
    };

    ClusterMetricsSummary clusterMetricsSummary = ClusterEvaluationUtil.getClusterStatistics(Arrays.asList(rows0),
        new EuclideanDistance());

    Assert.assertEquals(clusterMetricsSummary.k, 1);
    //Tuple6<String, Integer, Double, Double, Double, DenseVector> t = clusterMetricsSummary.map.get(0);
    Assert.assertEquals(clusterMetricsSummary.clusterId.get(0), "0");
    Assert.assertEquals(clusterMetricsSummary.clusterCnt.get(0).intValue(), 3);
    Assert.assertEquals(clusterMetricsSummary.compactness.get(0), 0.115, 0.001);
    Assert.assertEquals(clusterMetricsSummary.distanceSquareSum.get(0), 0.06, 0.01);
    Assert.assertEquals(clusterMetricsSummary.vectorNormL2Sum.get(0), 0.15, 0.01);
    Assert.assertEquals(clusterMetricsSummary.meanVector.get(0), new DenseVector(new double[]{0.1, 0.1, 0.1}));
    Assert.assertEquals(clusterMetricsSummary.k, 1);
    Assert.assertEquals(clusterMetricsSummary.total, 3);
}
 
Example 5
Source Project: Alink   File: CsvFormatterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFormatter() throws Exception {
    TypeInformation[] types = new TypeInformation[]{Types.STRING, Types.DOUBLE, Types.LONG,
        Types.BOOLEAN, Types.SQL_TIMESTAMP};

    Row row = Row.of("string", 1.0, 1L, true, new java.sql.Timestamp(System.currentTimeMillis()));
    CsvFormatter formatter = new CsvFormatter(types, ",", '"');
    CsvParser parser = new CsvParser(types, ",", '"');
    String text = formatter.format(row);
    Row parsed = parser.parse(text).f1;

    Assert.assertEquals(parsed.getArity(), row.getArity());
    for (int i = 0; i < parsed.getArity(); i++) {
        Assert.assertEquals(parsed.getField(i), row.getField(i));
    }
}
 
Example 6
Source Project: Alink   File: LibSvmSourceSinkTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testLibSvmBatchSink() throws Exception {
    Row[] rows = new Row[]{
        Row.of(1, "0:1 1:1"),
        Row.of(-1, "1:1 3:1"),
    };

    String fn = path + "libsvm1.txt";
    MemSourceBatchOp source = new MemSourceBatchOp(rows, new String[]{"label", "features"});

    new LibSvmSinkBatchOp().setFilePath(fn)
        .setLabelCol("label").setVectorCol("features").setOverwriteSink(true).linkFrom(source);

    BatchOperator.execute();

    List<String> lines = Files.readAllLines(Paths.get(fn));
    Assert.assertEquals(lines.size(), 2);
}
 
Example 7
Source Project: flink   File: BatchSQLTestProgram.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Row next() {
	if (!hasNext()) {
		throw new NoSuchElementException();
	}
	Row row = Row.of(
		keyIndex,
		LocalDateTime.ofInstant(Instant.ofEpochMilli(ms + offsetMs), ZoneOffset.UTC),
		"Some payload...");
	++keyIndex;
	if (keyIndex >= numKeys) {
		keyIndex = 0;
		ms += stepMs;
	}
	return row;
}
 
Example 8
Source Project: Alink   File: GenerateData.java    License: Apache License 2.0 5 votes vote down vote up
public static Table getSparseBatch() {
    Row[] testArray =
        new Row[]{
            Row.of("0:1.0  1:2.0"),
            Row.of("0:-1.0  1:-3.0"),
            Row.of("0:4.0  1:2.0"),
            Row.of("")
        };

    String selectedColName = "vec";
    String[] colNames = new String[]{selectedColName};

    return MLEnvironmentFactory.getDefault().createBatchTable(Arrays.asList(testArray), colNames);
}
 
Example 9
Source Project: Alink   File: GenerateData.java    License: Apache License 2.0 5 votes vote down vote up
public static Table getMultiTypeStreamTable() {
    Row[] testArray =
        new Row[]{
            Row.of("a", 1L, 1, 2.0, true),
            Row.of(null, 2L, 2, -3.0, true),
            Row.of("c", null, null, 2.0, false),
            Row.of("a", 0L, 0, null, null),
        };

    String[] colNames = new String[]{"f_string", "f_long", "f_int", "f_double", "f_boolean"};

    return MLEnvironmentFactory.getDefault().createStreamTable(Arrays.asList(testArray), colNames);
}
 
Example 10
@Test
public void testTFIDFType() throws Exception {
    Row[] rows = new Row[] {
        Row.of(0L, "{\"minTF\":\"1.0\",\"featureType\":\"\\\"TF_IDF\\\"\"}"),
        Row.of(1048576L, "{\"f0\":\"i\",\"f1\":0.6931471805599453,\"f2\":6}"),
        Row.of(2097152L, "{\"f0\":\"e\",\"f1\":0.1823215567939546,\"f2\":2}"),
        Row.of(3145728L, "{\"f0\":\"a\",\"f1\":0.4054651081081644,\"f2\":0}"),
        Row.of(4194304L, "{\"f0\":\"b\",\"f1\":0.1823215567939546,\"f2\":1}"),
        Row.of(5242880L, "{\"f0\":\"c\",\"f1\":0.6931471805599453,\"f2\":7}"),
        Row.of(6291456L, "{\"f0\":\"h\",\"f1\":0.4054651081081644,\"f2\":3}"),
        Row.of(7340032L, "{\"f0\":\"d\",\"f1\":0.6931471805599453,\"f2\":4}"),
        Row.of(8388608L, "{\"f0\":\"j\",\"f1\":0.6931471805599453,\"f2\":5}"),
        Row.of(9437184L, "{\"f0\":\"g\",\"f1\":0.6931471805599453,\"f2\":8}"),
        Row.of(10485760L, "{\"f0\":\"n\",\"f1\":1.0986122886681098,\"f2\":9}"),
        Row.of(11534336L, "{\"f0\":\"f\",\"f1\":1.0986122886681098,\"f2\":10}")
    };
    List<Row> model = Arrays.asList(rows);

    Params params = new Params()
        .set(DocCountVectorizerPredictParams.SELECTED_COL, "sentence");

    DocCountVectorizerModelMapper mapper = new DocCountVectorizerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);

    assertEquals(mapper.map(Row.of("a b c d e")).getField(0),
        new SparseVector(11, new int[] {0, 1, 2, 4, 7},
            new double[] {0.08109302162163289, 0.03646431135879092, 0.03646431135879092, 0.13862943611198905,
                0.13862943611198905}));
}
 
Example 11
@Test
public void test() {

    Row[] testArray =
        new Row[]{
            Row.of("1.0 2.0"),
            Row.of("-1.0 -3.0"),
            Row.of("4.0 2.0"),
        };

    String selectedColName = "vec";
    String[] colNames = new String[]{selectedColName};

    MemSourceBatchOp source = new MemSourceBatchOp(Arrays.asList(testArray), colNames);

    VectorCorrelationBatchOp corr = new VectorCorrelationBatchOp()
        .setSelectedCol("vec")
        .setMethod("pearson");

    corr.linkFrom(source);

    CorrelationResult corrMat = corr.collectCorrelation();

    System.out.println(corrMat);

    Assert.assertArrayEquals(corrMat.getCorrelationMatrix().getArrayCopy1D(true),
        new double[] {1.0, 0.802955068546966,
            0.802955068546966, 1.0},
        10e-4
    );
}
 
Example 12
Source Project: flink   File: FileSystemLookupFunction.java    License: Apache License 2.0 5 votes vote down vote up
public void eval(Object... values) {
	Preconditions.checkArgument(values.length == lookupCols.length, "Number of values and lookup keys mismatch");
	checkCacheReload();
	for (int i = 0; i < values.length; i++) {
		values[i] = converters[i].toExternal(values[i]);
	}
	Row probeKey = Row.of(values);
	List<RowData> matchedRows = cache.get(probeKey);
	if (matchedRows != null) {
		for (RowData matchedRow : matchedRows) {
			collect(matchedRow);
		}
	}
}
 
Example 13
@Test
public void linkFrom7() throws Exception {
	Row[] testArray =
		new Row[] {
			Row.of(1, 2, 0.8),
			Row.of(1, 2, 0.7),
			Row.of(0, 3, 0.4),
			Row.of(0, 2, 0.4),
			Row.of(1, 3, 0.6),
			Row.of(4, 3, 0.2),
			Row.of(4, 4, 0.3)
		};

	String[] colNames = new String[] {"col0", "col1", "label"};

	MemSourceBatchOp memSourceBatchOp = new MemSourceBatchOp(Arrays.asList(testArray), colNames);

	RandomForestTrainBatchOp rfOp = new RandomForestTrainBatchOp()
		.setLabelCol(colNames[2])
		.setFeatureCols(colNames[0], colNames[1])
		.setNumTrees(3)
		.setTreeType("partition")
		.setTreePartition("1,2")
		.setCategoricalCols(colNames[0], colNames[1]);

	rfOp.linkFrom(memSourceBatchOp).print();

	RandomForestPredictBatchOp predictBatchOp = new RandomForestPredictBatchOp()
		.setPredictionCol("pred_result");

	predictBatchOp.linkFrom(rfOp.linkFrom(memSourceBatchOp), memSourceBatchOp).print();
}
 
Example 14
Source Project: Alink   File: TableSummarizerTest.java    License: Apache License 2.0 5 votes vote down vote up
private Row[] geneData() {
    return
        new Row[]{
            Row.of("a", 1L, 1, 2.0, true),
            Row.of(null, 2L, 2, -3.0, true),
            Row.of("c", null, null, 2.0, false),
            Row.of("a", 0L, 0, null, null),
        };
}
 
Example 15
Source Project: Alink   File: GenerateData.java    License: Apache License 2.0 5 votes vote down vote up
public static Table getDenseStream() {
    Row[] testArray =
        new Row[]{
            Row.of("1.0 2.0"),
            Row.of("-1.0 -3.0"),
            Row.of("4.0 2.0"),
            Row.of(""),
            Row.of(new Object[]{null})
        };

    String selectedColName = "vec";
    String[] colNames = new String[]{selectedColName};

    return MLEnvironmentFactory.getDefault().createStreamTable(Arrays.asList(testArray), colNames);
}
 
Example 16
Source Project: Alink   File: EvaluationUtil.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Row map(BaseMetricsSummary baseMetricsSummary) throws Exception {
    BaseMetricsSummary metrics = baseMetricsSummary;
    BaseMetrics baseMetrics = metrics.toMetrics();
    Row row = baseMetrics.serialize();
    return Row.of(funtionName, row.getField(0));
}
 
Example 17
@Test
public void test() {
    Row[] testArray =
        new Row[]{
            Row.of("1.0 2.0"),
            Row.of("-1.0 -3.0"),
            Row.of("4.0 2.0"),
        };

    String selectedColName = "vec";
    String[] colNames = new String[]{selectedColName};

    MemSourceBatchOp source = new MemSourceBatchOp(Arrays.asList(testArray), colNames);

    VectorSummarizerBatchOp summarizer = new VectorSummarizerBatchOp()
        .setSelectedCol("vec");

    summarizer.linkFrom(source);

    BaseVectorSummary srt = summarizer.collectVectorSummary();

    System.out.println(srt);

    Assert.assertEquals(srt.vectorSize(), 2);
    Assert.assertEquals(srt.count(), 3);
    Assert.assertEquals(srt.max(0), 4.0, 10e-4);
    Assert.assertEquals(srt.min(0), -1.0, 10e-4);
    Assert.assertEquals(srt.mean(0), 1.3333333333333333, 10e-4);
    Assert.assertEquals(srt.variance(0), 6.333333333333334, 10e-4);
    Assert.assertEquals(srt.standardDeviation(0), 2.5166114784235836, 10e-4);
    Assert.assertEquals(srt.normL1(0), 6.0, 10e-4);
    Assert.assertEquals(srt.normL2(0), 4.242640687119285, 10e-4);
}
 
Example 18
Source Project: Alink   File: SummarizerBatchOpTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void test() {
    Row[] testArray =
        new Row[]{
            Row.of("a", 1L, 1, 2.0, true),
            Row.of(null, 2L, 2, -3.0, true),
            Row.of("c", null, null, 2.0, false),
            Row.of("a", 0L, 0, null, null),
        };

    String[] colNames = new String[]{"f_string", "f_long", "f_int", "f_double", "f_boolean"};

    MemSourceBatchOp source = new MemSourceBatchOp(Arrays.asList(testArray), colNames);

    SummarizerBatchOp summarizer = new SummarizerBatchOp()
        .setSelectedCols("f_double", "f_int");

    summarizer.linkFrom(source);

    TableSummary srt = summarizer.collectSummary();

    System.out.println(srt);

    Assert.assertEquals(srt.getColNames().length, 2);
    Assert.assertEquals(srt.count(), 4);
    Assert.assertEquals(srt.numMissingValue("f_double"), 1, 10e-4);
    Assert.assertEquals(srt.numValidValue("f_double"), 3, 10e-4);
    Assert.assertEquals(srt.max("f_double"), 2.0, 10e-4);
    Assert.assertEquals(srt.min("f_int"), 0.0, 10e-4);
    Assert.assertEquals(srt.mean("f_double"), 0.3333333333333333, 10e-4);
    Assert.assertEquals(srt.variance("f_double"), 8.333333333333334, 10e-4);
    Assert.assertEquals(srt.standardDeviation("f_double"), 2.886751345948129, 10e-4);
    Assert.assertEquals(srt.normL1("f_double"), 7.0, 10e-4);
    Assert.assertEquals(srt.normL2("f_double"), 4.123105625617661, 10e-4);
}
 
Example 19
Source Project: Alink   File: BaseMetrics.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Serialize all the params into string.
 */
public Row serialize() {
    return Row.of(this.params.toJson());
}
 
Example 20
@Test
public void linkFromDecisionTreeModeParallel() throws Exception {
	Row[] testArray =
		new Row[] {
			Row.of(1, 2, 0.8),
			Row.of(1, 2, 0.7),
			Row.of(0, 3, 0.4),
			Row.of(0, 2, 0.4),
			Row.of(1, 3, 0.6),
			Row.of(4, 3, 0.2),
			Row.of(4, 4, 0.3)
		};

	String[] colNames = new String[] {"col0", "col1", "label"};

	MemSourceBatchOp memSourceBatchOp = new MemSourceBatchOp(Arrays.asList(testArray), colNames);

	DecisionTreeRegTrainBatchOp decisionTreeRegTrainBatchOp = new DecisionTreeRegTrainBatchOp()
		.setLabelCol(colNames[2])
		.setFeatureCols(colNames[0], colNames[1])
		.setMinSamplesPerLeaf(1)
		.setMaxDepth(4)
		.setMaxMemoryInMB(1)
		.setCreateTreeMode("parallel");

	DecisionTreeRegPredictBatchOp decisionTreeRegPredictBatchOp = new DecisionTreeRegPredictBatchOp()
		.setPredictionCol("pred");

	EvalRegressionBatchOp eval = new EvalRegressionBatchOp()
		.setLabelCol(colNames[2])
		.setPredictionCol("pred");

	Assert.assertEquals(
		new RegressionMetrics(
			decisionTreeRegPredictBatchOp
				.linkFrom(
					decisionTreeRegTrainBatchOp.linkFrom(memSourceBatchOp),
					memSourceBatchOp
				)
				.linkTo(eval)
				.collect()
				.get(0)
		).getRmse(),
		0.026726,
		1e-6);
}