Java Code Examples for org.apache.flink.api.common.typeinfo.Types#STRING

The following examples show how to use org.apache.flink.api.common.typeinfo.Types#STRING . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: VectorSlicerTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
AlgoOperator getData(boolean isBatch) {
	TableSchema schema = new TableSchema(
		new String[] {"id", "c0", "c1", "c2"},
		new TypeInformation <?>[] {Types.STRING, Types.STRING, Types.STRING, Types.STRING}
	);

	List <Row> rows = new ArrayList <>();

	rows.add(Row.of(new Object[] {"0", "$6$1:2.0 2:3.0 5:4.3", "3.0 2.0 3.0", "1 4 6 8"}));
	rows.add(Row.of(new Object[] {"1", "$8$1:2.0 2:3.0 7:4.3", "3.0 2.0 3.0", "1 4 6 8"}));
	rows.add(Row.of(new Object[] {"2", "$8$1:2.0 2:3.0 7:4.3", "2.0 3.0", "1 4 6 8"}));

	if (isBatch) {
		return new MemSourceBatchOp(rows, schema);
	} else {
		return new MemSourceStreamOp(rows, schema);
	}
}
 
Example 2
Source File: TableUtil.java    From Alink with Apache License 2.0 6 votes vote down vote up
/**
 * Get the columns from featureCols who are included in the <code>categoricalCols</code>, and the columns whose
 * types are string or boolean.
 * <p>If <code>categoricalCols</code> is null, return all the categorical columns.
 *
 * @param tableSchema     TableSchema.
 * @param featureCols     the columns to chosen from.
 * @param categoricalCols the columns which are included in the final result whatever the types of them are. And it
 *                        must be a subset of featureCols.
 * @return the categoricalCols.
 */
public static String[] getCategoricalCols(
    TableSchema tableSchema, String[] featureCols, String[] categoricalCols) {
    if (null == featureCols) {
        return categoricalCols;
    }
    List<String> categoricalList = null == categoricalCols ? null : Arrays.asList(categoricalCols);
    List<String> featureList = Arrays.asList(featureCols);
    if (null != categoricalCols && !featureList.containsAll(categoricalList)) {
        throw new IllegalArgumentException("CategoricalCols must be included in featureCols!");
    }

    TypeInformation[] featureColTypes = findColTypes(tableSchema, featureCols);
    List<String> res = new ArrayList<>();
    for (int i = 0; i < featureCols.length; i++) {
        boolean included = null != categoricalList && categoricalList.contains(featureCols[i]);
        if (included || Types.BOOLEAN == featureColTypes[i] || Types.STRING == featureColTypes[i]) {
            res.add(featureCols[i]);
        }
    }

    return res.toArray(new String[0]);
}
 
Example 3
Source File: MinMaxScalerMapperTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testMinMaxScaler() throws Exception {
    Row[] rows = new Row[]{
        Row.of(0L, "{\"min\":\"0.0\",\"max\":\"1.0\",\"selectedCols\":\"[\\\"f_long\\\",\\\"f_int\\\",\\\"f_double\\\"]\"}", null, null, null),
        Row.of(1048576L, "[0.0,0.0,-3.0]", null, null, null),
        Row.of(2097152L, "[2.0, 2.0, 2.0]", null, null, null)
    };


    List<Row> model = Arrays.asList(rows);

    TableSchema dataSchema = new TableSchema(
        new String[]{"f_string", "f_long", "f_int", "f_double", "f_boolean"},
        new TypeInformation<?>[]{Types.STRING, Types.LONG, Types.INT, Types.DOUBLE, Types.BOOLEAN}
    );
    Params params = new Params();

    MinMaxScalerModelMapper mapper = new MinMaxScalerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);

    assertEquals((double) mapper.map(Row.of("d", 1L, 1, 2.0, true)).getField(1), 0.5, 10e-4);
    assertEquals((double) mapper.map(Row.of("a", 1L, 1, 2.0, true)).getField(2), 0.8, 10e-4);
    assertEquals((double) mapper.map(Row.of("a", 1L, 1, 2.0, true)).getField(3), 1.0, 10e-4);

}
 
Example 4
Source File: TaxiFareTableSource.java    From flink-training-exercises with Apache License 2.0 6 votes vote down vote up
/**
 * Specifies schema of the produced table.
 *
 * @return The schema of the produced table.
 */
@Override
public TypeInformation<Row> getReturnType() {

	TypeInformation<?>[] types = new TypeInformation[] {
			Types.LONG,
			Types.LONG,
			Types.LONG,
			Types.STRING,
			Types.FLOAT,
			Types.FLOAT,
			Types.FLOAT
	};

	String[] names = new String[]{
			"rideId",
			"taxiId",
			"driverId",
			"paymentType",
			"tip",
			"tolls",
			"totalFare"
	};

	return new RowTypeInfo(types, names);
}
 
Example 5
Source File: VectorStandardScalerMapperTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testDense() throws Exception {
    Row[] rows = new Row[]{
        Row.of(0L, "{\"withMean\":\"true\",\"selectedCol\":\"\\\"vec\\\"\",\"withStd\":\"true\"}", null),
        Row.of(1048576L, "[1.3333333333333333,0.3333333333333333]", null),
        Row.of(2097152L, "[2.5166114784235836,2.886751345948129]", null)
    };

    List<Row> model = Arrays.asList(rows);

    TableSchema dataSchema = new TableSchema(
        new String[]{"vec"},
        new TypeInformation<?>[]{Types.STRING}
    );
    Params params = new Params();

    VectorStandardScalerModelMapper mapper = new VectorStandardScalerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);

    assertEquals(mapper.map(Row.of(new DenseVector(new double[]{1.0, 2.0}))).getField(0),
            new DenseVector(new double[]{-0.13245323570650433, 0.5773502691896257}));
}
 
Example 6
Source File: LdaTrainBatchOp.java    From Alink with Apache License 2.0 6 votes vote down vote up
/**
 * Save the word-topic model in the sideOutputs.
 */
private void saveWordTopicModelAndPerplexity(DataSet<Row> model, int numTopic,
                                             Boolean ifOnline) {
    DataSet<Row> wordTopicDataSet;
    if (ifOnline) {
        wordTopicDataSet = model.mapPartition(new BuildWordTopicModelOnline()).setParallelism(1);
    } else {
        wordTopicDataSet = model.mapPartition(new BuildWordTopicModelGibbs()).setParallelism(1);
    }
    String[] colNames = new String[numTopic + 1];
    TypeInformation[] colTypes = new TypeInformation[colNames.length];
    colNames[0] = "word";
    colTypes[0] = Types.STRING;
    for (int i = 0; i < numTopic; i++) {
        colNames[1 + i] = "topic_" + i;
        colTypes[1 + i] = Types.DOUBLE;
    }

    DataSet<Row> logPerplexity = model.mapPartition(new CalculatePerplexityAndLikelihood()).setParallelism(1);
    this.setSideOutputTables(new Table[] {
        DataSetConversionUtil.toTable(getMLEnvironmentId(), wordTopicDataSet, colNames, colTypes),
        DataSetConversionUtil.toTable(getMLEnvironmentId(),
            logPerplexity, new String[]{"logPerplexity", "logLikelihood"},
            new TypeInformation[]{Types.DOUBLE, Types.DOUBLE})
    });
}
 
Example 7
Source File: VectorInteractionMapperTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSparse() throws Exception {
	TableSchema schema = new TableSchema(new String[] {"c0", "c1"},
		new TypeInformation <?>[] {Types.STRING, Types.STRING});

	TableSchema outSchema = new TableSchema(new String[] {"c0", "out"},
		new TypeInformation <?>[] {Types.STRING, VectorTypes.VECTOR});

	Params params = new Params()
		.set(VectorInteractionParams.SELECTED_COLS, new String[] {"c0", "c1"})
		.set(VectorInteractionParams.OUTPUT_COL, "out")
		.set(VectorInteractionParams.RESERVED_COLS, new String[] {"c0"});

	VectorInteractionMapper mapper = new VectorInteractionMapper(schema, params);

	assertEquals(mapper.map(Row.of(new SparseVector(10, new int[]{0, 9}, new double[]{1.0, 4.0}),
			new SparseVector(10, new int[]{0, 9}, new double[]{1.0, 4.0}))).getField(1),
		new SparseVector(100, new int[]{0, 9, 90, 99}, new double[]{1.0, 4.0, 4.0, 16.0}));
	assertEquals(mapper.getOutputSchema(), outSchema);
}
 
Example 8
Source File: VectorImputerMapperTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testMean() throws Exception {
    Row[] rows = new Row[]{
        Row.of(0L, "{\"selectedCol\":\"\\\"vec\\\"\",\"strategy\":\"\\\"mean\\\"\"}", null),
        Row.of(1048576L, "[1.3333333333333333,-0.3333333333333333]", null)
    };

    List<Row> model = Arrays.asList(rows);

    TableSchema dataSchema = new TableSchema(
        new String[]{"vec"},
        new TypeInformation<?>[]{Types.STRING}
    );
    Params params = new Params();

    VectorImputerModelMapper mapper = new VectorImputerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);

    assertEquals(mapper.map(Row.of(new DenseVector(new double[]{1.0, Double.NaN}))).getField(0),
            new DenseVector(new double[]{1.0, -0.3333333333333333}));
}
 
Example 9
Source File: NaiveBayesTextModelMapperTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testPredictReservedCol() throws Exception {
	TableSchema dataSchema = new TableSchema(
		new String[] {"vec"},
		new TypeInformation<?>[] {Types.STRING}
	);
	Params params = new Params()
		.set(NaiveBayesTextPredictParams.VECTOR_COL, "vec")
		.set(NaiveBayesTextPredictParams.PREDICTION_COL, "pred");

	NaiveBayesTextModelMapper mapper = new NaiveBayesTextModelMapper(modelSchema, dataSchema, params);
	mapper.loadModel(model);

	assertEquals(mapper.map(Row.of("1.0, 1.0, 0.0, 1.0")).getField(1), 1);
	assertEquals(mapper.getOutputSchema(), new TableSchema(new String[] {"vec", "pred"},
		new TypeInformation<?>[] {Types.STRING, Types.INT}));
}
 
Example 10
Source File: StopWordsRemoverMapperTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testStopWords() throws Exception {
    TableSchema schema = new TableSchema(new String[] {"sentence"}, new TypeInformation<?>[] {Types.STRING});

    Params params = new Params()
        .set(StopWordsRemoverParams.SELECTED_COL, "sentence")
        .set(StopWordsRemoverParams.STOP_WORDS, new String[]{"Test"});

    StopWordsRemoverMapper mapper = new StopWordsRemoverMapper(schema, params);
    mapper.open();

    assertEquals(mapper.map(Row.of("This is a unit test for filtering stopWords")).getField(0),
        "unit filtering stopWords");
    assertEquals(mapper.map(Row.of("Filter stopWords test")).getField(0),
        "Filter stopWords");
    assertEquals(mapper.map(Row.of("这 是 停用词 过滤 的 单元 测试")).getField(0), "停用词 过滤 单元 测试");
    assertEquals(mapper.getOutputSchema(), schema);
}
 
Example 11
Source File: OrcTableSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private TypeInformation[] getNestedFieldTypes() {
	return new TypeInformation[]{
		Types.BOOLEAN, Types.BYTE, Types.SHORT, Types.INT, Types.LONG, Types.FLOAT, Types.DOUBLE,
		PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO, Types.STRING,
		Types.ROW_NAMED(
			new String[]{"list"},
			ObjectArrayTypeInfo.getInfoFor(
				Types.ROW_NAMED(
					new String[]{"int1", "string1"},
					Types.INT, Types.STRING
				)
			)
		),
		ObjectArrayTypeInfo.getInfoFor(
			Types.ROW_NAMED(
				new String[]{"int1", "string1"},
				Types.INT, Types.STRING
			)
		),
		new MapTypeInfo<>(
			Types.STRING,
			Types.ROW_NAMED(
				new String[]{"int1", "string1"},
				Types.INT, Types.STRING
			)
		)
	};
}
 
Example 12
Source File: CustomTableSinkMain.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
        blinkStreamEnv.setParallelism(1);
        EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
                .useBlinkPlanner()
                .inStreamingMode()
                .build();
        StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

        String path = SQLExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();

        CsvTableSource csvTableSource = CsvTableSource.builder()
                .field("word", Types.STRING)
                .path(path)
                .build();
        blinkStreamTableEnv.registerTableSource("zhisheng", csvTableSource);

        RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"c", "word"}, new TypeInformation[]{Types.LONG, Types.STRING});
        //或者
//        RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"c", "word"}, new DataType[]{DataTypes.BIGINT(), DataTypes.STRING()});
        blinkStreamTableEnv.registerTableSink("sinkTable", retractStreamTableSink);

        Table wordWithCount = blinkStreamTableEnv.sqlQuery("SELECT count(word) AS c, word FROM zhisheng GROUP BY word");

        wordWithCount.insertInto("sinkTable");
        blinkStreamTableEnv.execute("Blink Custom Table Sink");
    }
 
Example 13
Source File: NGramMapperTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDefault() throws Exception {
    TableSchema schema = new TableSchema(new String[] {"sentence"}, new TypeInformation<?>[] {Types.STRING});

    Params params = new Params()
        .set(NGramParams.SELECTED_COL, "sentence");

    NGramMapper mapper = new NGramMapper(schema, params);

    assertEquals(mapper.map(Row.of("This is a unit test for mapper")).getField(0),
        "This_is is_a a_unit unit_test test_for for_mapper");
    assertEquals(mapper.getOutputSchema(), schema);
}
 
Example 14
Source File: CsvParserTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void testLongFieldSeparator() throws Exception {
    CsvParser parser = new CsvParser(new TypeInformation[]{Types.STRING, Types.STRING, Types.STRING}, "____", '"');
    Assert.assertEquals(parser.parse("hello_____world____").f1.getField(0), "hello");
    Assert.assertEquals(parser.parse("hello_____world____").f1.getField(1), "_world");
    Assert.assertEquals(parser.parse("hello_____world____").f1.getField(2), null);
    Assert.assertEquals(parser.parse("\"hello_____world____\"").f1.getField(0), "hello_____world____");
    Assert.assertEquals(parser.parse("\"hello_____world____\"").f1.getField(1), null);
    Assert.assertEquals(parser.parse("\"hello_____world____\"").f1.getField(2), null);
}
 
Example 15
Source File: JavaUserDefinedAggFunctions.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public DataViewTestAccum createAccumulator() {
	DataViewTestAccum accum = new DataViewTestAccum();
	accum.map = new MapView<>(Types.STRING, Types.INT);
	accum.count = 0L;
	return accum;
}
 
Example 16
Source File: CsvParserTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void testParser() throws Exception {
    CsvParser parser = new CsvParser(new TypeInformation[]{Types.STRING}, ",", '"');
    Assert.assertEquals(parser.parse("\"hello, world\"").f1.getField(0), "hello, world");
    Assert.assertEquals(parser.parse("").f1.getField(0), null);
    Assert.assertEquals(parser.parse("\"\"").f1.getField(0), "");
    Assert.assertEquals(parser.parse("\"\"\"\"\"\"").f1.getField(0), "\"\"");
}
 
Example 17
Source File: KMeansModelMapperTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDefault(){
    TableSchema dataSchema = new TableSchema(
        new String[] {"Y"}, new TypeInformation<?>[] {Types.STRING}
    );
    Params params = new Params()
        .set(KMeansPredictParams.PREDICTION_COL, "pred");

    KMeansModelMapper mapper = new KMeansModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);

    assertEquals(mapper.map(Row.of("0 0 0")).getField(1), 1L);
    assertEquals(mapper.getOutputSchema(), new TableSchema(new String[] {"Y", "pred"},
        new TypeInformation<?>[] {Types.STRING, Types.LONG}));
}
 
Example 18
Source File: VectorMaxAbsScalerModelDataConverter.java    From Alink with Apache License 2.0 4 votes vote down vote up
/**
 * Get the additional column types.
 */
@Override
protected TypeInformation[] initAdditionalColTypes() {
    return new TypeInformation[]{Types.STRING};
}
 
Example 19
Source File: JsonRowDeserializationSchema.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private Object convert(JsonNode node, TypeInformation<?> info) {
	if (info == Types.VOID || node.isNull()) {
		return null;
	} else if (info == Types.BOOLEAN) {
		return node.asBoolean();
	} else if (info == Types.STRING) {
		return node.asText();
	} else if (info == Types.BIG_DEC) {
		return node.decimalValue();
	} else if (info == Types.BIG_INT) {
		return node.bigIntegerValue();
	} else if (info == Types.SQL_DATE) {
		return Date.valueOf(node.asText());
	} else if (info == Types.SQL_TIME) {
		// according to RFC 3339 every full-time must have a timezone;
		// until we have full timezone support, we only support UTC;
		// users can parse their time as string as a workaround
		final String time = node.asText();
		if (time.indexOf('Z') < 0 || time.indexOf('.') >= 0) {
			throw new IllegalStateException(
				"Invalid time format. Only a time in UTC timezone without milliseconds is supported yet. " +
					"Format: HH:mm:ss'Z'");
		}
		return Time.valueOf(time.substring(0, time.length() - 1));
	} else if (info == Types.SQL_TIMESTAMP) {
		// according to RFC 3339 every date-time must have a timezone;
		// until we have full timezone support, we only support UTC;
		// users can parse their time as string as a workaround
		final String timestamp = node.asText();
		if (timestamp.indexOf('Z') < 0) {
			throw new IllegalStateException(
				"Invalid timestamp format. Only a timestamp in UTC timezone is supported yet. " +
					"Format: yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
		}
		return Timestamp.valueOf(timestamp.substring(0, timestamp.length() - 1).replace('T', ' '));
	} else if (info instanceof RowTypeInfo) {
		return convertRow(node, (RowTypeInfo) info);
	} else if (info instanceof ObjectArrayTypeInfo) {
		return convertObjectArray(node, ((ObjectArrayTypeInfo) info).getComponentInfo());
	} else if (info instanceof BasicArrayTypeInfo) {
		return convertObjectArray(node, ((BasicArrayTypeInfo) info).getComponentInfo());
	} else if (info instanceof PrimitiveArrayTypeInfo &&
			((PrimitiveArrayTypeInfo) info).getComponentType() == Types.BYTE) {
		return convertByteArray(node);
	} else {
		// for types that were specified without JSON schema
		// e.g. POJOs
		try {
			return objectMapper.treeToValue(node, info.getTypeClass());
		} catch (JsonProcessingException e) {
			throw new IllegalStateException("Unsupported type information '" + info + "' for node: " + node);
		}
	}
}
 
Example 20
Source File: HBaseTableFactoryTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTableSinkFactory() {
	String[] columnNames = {ROWKEY, FAMILY1, FAMILY2, FAMILY3};
	TypeInformation<Row> f1 = Types.ROW_NAMED(new String[]{COL1, COL2}, Types.DOUBLE, Types.INT);
	TypeInformation<Row> f2 = Types.ROW_NAMED(new String[]{COL1, COL3}, Types.INT, Types.LONG);
	TypeInformation<Row> f3 = Types.ROW_NAMED(new String[]{COL2, COL3}, Types.BOOLEAN, Types.STRING);
	TypeInformation[] columnTypes = new TypeInformation[]{Types.STRING, f1, f2, f3};
	DescriptorProperties descriptorProperties = createDescriptor(columnNames, columnTypes);

	TableSink sink = TableFactoryService
		.find(HBaseTableFactory.class, descriptorProperties.asMap())
		.createTableSink(descriptorProperties.asMap());

	Assert.assertTrue(sink instanceof HBaseUpsertTableSink);

	HBaseTableSchema hbaseSchema = ((HBaseUpsertTableSink) sink).getHBaseTableSchema();
	Assert.assertEquals(0, hbaseSchema.getRowKeyIndex());
	Assert.assertEquals(Optional.of(Types.STRING), hbaseSchema.getRowKeyTypeInfo());

	Assert.assertArrayEquals(new String[]{"f1", "f2", "f3"}, hbaseSchema.getFamilyNames());
	Assert.assertArrayEquals(new String[]{"c1", "c2"}, hbaseSchema.getQualifierNames("f1"));
	Assert.assertArrayEquals(new String[]{"c1", "c3"}, hbaseSchema.getQualifierNames("f2"));
	Assert.assertArrayEquals(new String[]{"c2", "c3"}, hbaseSchema.getQualifierNames("f3"));

	Assert.assertArrayEquals(new TypeInformation[]{Types.DOUBLE, Types.INT}, hbaseSchema.getQualifierTypes("f1"));
	Assert.assertArrayEquals(new TypeInformation[]{Types.INT, Types.LONG}, hbaseSchema.getQualifierTypes("f2"));
	Assert.assertArrayEquals(new TypeInformation[]{Types.BOOLEAN, Types.STRING}, hbaseSchema.getQualifierTypes("f3"));

	HBaseOptions expectedHBaseOptions = HBaseOptions.builder()
		.setTableName("testHBastTable")
		.setZkQuorum("localhost:2181")
		.setZkNodeParent("/flink")
		.build();
	HBaseOptions actualHBaseOptions = ((HBaseUpsertTableSink) sink).getHBaseOptions();
	Assert.assertEquals(expectedHBaseOptions, actualHBaseOptions);

	HBaseWriteOptions expectedWriteOptions = HBaseWriteOptions.builder()
		.setBufferFlushMaxRows(1000)
		.setBufferFlushIntervalMillis(10 * 1000)
		.setBufferFlushMaxSizeInBytes(10 * 1024 * 1024)
		.build();
	HBaseWriteOptions actualWriteOptions = ((HBaseUpsertTableSink) sink).getWriteOptions();
	Assert.assertEquals(expectedWriteOptions, actualWriteOptions);
}