org.apache.flink.table.api.Types#STRING

Source File: JoinTest.java From sylph with Apache License 2.0

6 votes

@Before
public void init()
{
    StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    execEnv.setParallelism(4);
    execEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
    tableEnv = (StreamTableEnvironmentImpl) StreamTableEnvironment.create(execEnv);

    tableEnv.registerFunction("from_unixtime", new TimeUtil.FromUnixTime());

    //---create stream source
    TypeInformation[] fieldTypes = {Types.STRING(), Types.STRING(), Types.LONG()};
    String[] fieldNames = {"topic", "user_id", "time"};
    RowTypeInfo rowTypeInfo = new RowTypeInfo(fieldTypes, fieldNames);
    DataStream<Row> dataSource = execEnv.fromCollection(new ArrayList<>(), rowTypeInfo);

    tableEnv.registerTableSource("tb1", new SylphTableSource(rowTypeInfo, dataSource));
    tableEnv.registerTableSource("tb0", new SylphTableSource(rowTypeInfo, dataSource));

    final AntlrSqlParser sqlParser = new AntlrSqlParser();
    this.dimTable = (CreateTable) sqlParser.createStatement(
            "create batch table users(id string, name string, city string) with(type = '"
                    + JoinOperator.class.getName() + "')");
}

Source File: Word2VecTest.java From Alink with Apache License 2.0

6 votes

@Test
public void train() throws Exception {
	TableSchema schema = new TableSchema(
		new String[] {"docid", "content"},
		new TypeInformation <?>[] {Types.LONG(), Types.STRING()}
	);
	List <Row> rows = new ArrayList <>();
	rows.add(Row.of(0L, "老王 是 我们 团队 里 最胖 的"));
	rows.add(Row.of(1L, "老黄 是 第二 胖 的"));
	rows.add(Row.of(2L, "胖"));
	rows.add(Row.of(3L, "胖 胖 胖"));

	MemSourceBatchOp source = new MemSourceBatchOp(rows, schema);

	Word2Vec word2Vec = new Word2Vec()
		.setSelectedCol("content")
		.setOutputCol("output")
		.setMinCount(1);

	List<Row> result = word2Vec.fit(source).transform(source).collect();

	Assert.assertEquals(rows.size(), result.size());
}

Source File: FeatureHasherMapperTest.java From Alink with Apache License 2.0

6 votes

@Test
public void test1() throws Exception {
    TableSchema schema = new TableSchema(new String[] {"double", "bool", "number", "str"},
        new TypeInformation<?>[] {Types.DOUBLE(), Types.BOOLEAN(), Types.STRING(), Types.STRING()});

    Params params = new Params()
        .set(FeatureHasherParams.SELECTED_COLS, new String[] {"double", "bool", "number", "str"})
        .set(FeatureHasherParams.OUTPUT_COL, "output")
        .set(FeatureHasherParams.RESERVED_COLS, new String[] {});

    FeatureHasherMapper mapper = new FeatureHasherMapper(schema, params);

    assertEquals(mapper.map(Row.of(1.1, true, "2", "A")).getField(0),
        new SparseVector(262144, new int[]{62393, 85133, 120275, 214318}, new double[]{1.0, 1.0, 1.0, 1.1}));
    assertEquals(mapper.map(Row.of(2.1, true, "1", "A")).getField(0),
        new SparseVector(262144, new int[]{76287, 85133, 120275, 214318}, new double[]{1.0, 1.0, 1.0, 2.1}));
    assertEquals(mapper.getOutputSchema(),
        new TableSchema(new String[] {"output"}, new TypeInformation<?>[] {VectorTypes.VECTOR})
    );
}

Source File: FeatureHasherMapperTest.java From Alink with Apache License 2.0

6 votes

@Test
public void test2() throws Exception {
    TableSchema schema = new TableSchema(new String[] {"double", "bool", "number", "str"},
        new TypeInformation<?>[] {Types.DOUBLE(), Types.BOOLEAN(), Types.STRING(), Types.STRING()});

    Params params = new Params()
        .set(FeatureHasherParams.SELECTED_COLS, new String[] {"double", "bool", "number", "str"})
        .set(FeatureHasherParams.OUTPUT_COL, "output")
        .set(FeatureHasherParams.NUM_FEATURES, 10);

    FeatureHasherMapper mapper = new FeatureHasherMapper(schema, params);

    assertEquals(mapper.map(Row.of(1.1, true, "2", "A")).getField(4),
        new SparseVector(10, new int[]{5, 8, 9}, new double[]{2.0, 1.1, 1.0}));
    assertEquals(mapper.map(Row.of(2.1, true, "1", "B")).getField(4),
        new SparseVector(10, new int[]{1, 5, 6, 8}, new double[]{1.0, 1.0, 1.0, 2.1}));
    assertEquals(mapper.getOutputSchema(),
        new TableSchema(new String[] {"double", "bool", "number", "str", "output"},
            new TypeInformation<?>[] {Types.DOUBLE(), Types.BOOLEAN(), Types.STRING(), Types.STRING(),
                VectorTypes.VECTOR}));
}

Source File: FeatureHasherMapperTest.java From Alink with Apache License 2.0

6 votes

@Test
public void test3() throws Exception {
    TableSchema schema = new TableSchema(new String[] {"double", "bool", "number", "str"},
        new TypeInformation<?>[] {Types.DOUBLE(), Types.BOOLEAN(), Types.STRING(), Types.STRING()});

    Params params = new Params()
        .set(FeatureHasherParams.SELECTED_COLS, new String[] {"double", "bool", "number", "str"})
        .set(FeatureHasherParams.OUTPUT_COL, "output")
        .set(FeatureHasherParams.NUM_FEATURES, 10)
        .set(FeatureHasherParams.CATEGORICAL_COLS, new String[] {"double"});

    FeatureHasherMapper mapper = new FeatureHasherMapper(schema, params);

    assertEquals(mapper.map(Row.of(1.1, true, "2", "A")).getField(4),
        new SparseVector(10, new int[]{0, 5, 9}, new double[]{1.0, 2.0, 1.0}));
    assertEquals(mapper.map(Row.of(2.1, true, "1", "B")).getField(4),
        new SparseVector(10, new int[]{1, 5, 6}, new double[]{2.0, 1.0, 1.0}));
}

Source File: DCTMapperTest.java From Alink with Apache License 2.0

6 votes

@Test
public void test() throws Exception {
	TableSchema schema = new TableSchema(new String[] {"vec"}, new TypeInformation <?>[] {Types.STRING()});

	DCTMapper dctMapper = new DCTMapper(schema, new Params().set(DCTParams.SELECTED_COL, "vec"));

	DCTMapper inverseDCTMapper = new DCTMapper(schema,
		new Params().set(DCTParams.SELECTED_COL, "vec").set(DCTParams.INVERSE, true));

	String[] vectors = new String[] {
		"1.0 2.0 3.0 4.0 5.0",
		"1.0 2.0 1.0 2.0",
		"1.0 100000.0 -5000.0 0.1 0.0000005"
	};

	for (String vector : vectors) {
		assertTrue(
			VectorUtil.parseDense((String) inverseDCTMapper.map(dctMapper.map(Row.of(vector))).getField(0))
				.minus(VectorUtil.parseDense(vector))
				.normL1() < 1e-10
		);
	}
}

Source File: SegmentMapperTest.java From Alink with Apache License 2.0

6 votes

@Test
public void test2() throws Exception {
	TableSchema schema = new TableSchema(new String[] {"sentence"}, new TypeInformation <?>[] {Types.STRING()});
	String[] dictArray = new String[] {"低风险"};

	Params params = new Params()
		.set(SegmentParams.SELECTED_COL, "sentence")
		.set(SegmentParams.USER_DEFINED_DICT, dictArray);

	SegmentMapper mapper = new SegmentMapper(schema, params);
	mapper.open();

	assertEquals(mapper.map(Row.of("我们辅助用户简单快速低成本低风险的实现系统权限安全管理")).getField(0),
		"我们 辅助 用户 简单 快速 低成本 低风险 的 实现 系统 权限 安全 管理");
	assertEquals(mapper.getOutputSchema(), schema);
}

Source File: PcaModelMapper.java From Alink with Apache License 2.0

5 votes

public PcaModelMapper(TableSchema modelSchema, TableSchema dataSchema, Params params) {
    super(modelSchema, dataSchema, params);

    transformType = this.params.get(PcaPredictParams.TRANSFORM_TYPE);

    String[] keepColNames = this.params.get(PcaPredictParams.RESERVED_COLS);
    String predResultColName = this.params.get(PcaPredictParams.PREDICTION_COL);
    this.outputColsHelper = new OutputColsHelper(dataSchema, predResultColName, Types.STRING(), keepColNames);
}

Source File: DCTMapperTest.java From Alink with Apache License 2.0

5 votes

@Test
public void test2() throws Exception {
	TableSchema schema = new TableSchema(new String[] {"vec"}, new TypeInformation <?>[] {Types.STRING()});

	DCTMapper dctMapper = new DCTMapper(schema, new Params().set(DCTParams.SELECTED_COL, "vec"));

	DCTMapper inverseDCTMapper = new DCTMapper(schema,
		new Params().set(DCTParams.SELECTED_COL, "vec").set(DCTParams.INVERSE, true));

	Random generator = new Random(1234);
	int data_num = 10;
	int col_num = 31;
	Row[] rows = new Row[data_num];
	for (int index = 0; index < data_num; index++) {
		double[] cur_double = new double[col_num];
		for (int index2 = 0; index2 < col_num; index2++) {
			cur_double[index2] = ((int) (generator.nextDouble() * 512) - 256) * 1.0;
		}
		rows[index] = Row.of(VectorUtil.toString(new DenseVector(cur_double)));
	}

	for (Row row : rows) {
		assertTrue(
			VectorUtil.parseDense((String) inverseDCTMapper.map(dctMapper.map(row)).getField(0))
				.minus(VectorUtil.parseDense((String) row.getField(0)))
				.normL1() < 1e-10
		);
	}

}

Source File: SegmentMapperTest.java From Alink with Apache License 2.0

5 votes

@Test
public void test1() throws Exception {
	TableSchema schema = new TableSchema(new String[] {"sentence"}, new TypeInformation <?>[] {Types.STRING()});

	Params params = new Params()
		.set(SegmentParams.SELECTED_COL, "sentence");

	SegmentMapper mapper = new SegmentMapper(schema, params);
	mapper.open();

	assertEquals(mapper.map(Row.of("我们辅助用户简单快速低成本低风险的实现系统权限安全管理")).getField(0),
		"我们 辅助 用户 简单 快速 低成本 低 风险 的 实现 系统 权限 安全 管理");
	assertEquals(mapper.getOutputSchema(), schema);
}

Source File: FlinkSqlTextBusiness.java From PoseidonX with Apache License 2.0

4 votes

private static TypeInformation[] convertStringToType(String[] dataTypeStrArray) throws FlinkSqlException {

        TypeInformation[] dataTypeArray = new TypeInformation[dataTypeStrArray.length];


        for(int i = 0;i< dataTypeStrArray.length;i++){

            String type = dataTypeStrArray[i];

            if("string".equals(type)){
                dataTypeArray[i] = Types.STRING();
            }
            else if("short".equals(type)){
                dataTypeArray[i] = Types.SHORT();
            }
            else if("int".equals(type)){
                dataTypeArray[i] = Types.INT();
            }
            else if("long".equals(type)){
                dataTypeArray[i] = Types.LONG();
            }
            else if("date".equals(type)){
                dataTypeArray[i] = Types.SQL_DATE();
            }
            else if("timestamp".equals(type)){
                dataTypeArray[i] = Types.SQL_TIMESTAMP();
            }
            else if("float".equals(type)){
                dataTypeArray[i] = Types.FLOAT();
            }
            else if("double".equals(type)){
                dataTypeArray[i] = Types.DOUBLE();
            }
            else if("byte".equals(type)){
                dataTypeArray[i] = Types.BYTE();
            }
            else{
                throw new FlinkSqlException("类型错误["+dataTypeArray[i]+"]");
            }
        }

        return dataTypeArray;

    }

Source File: CodeGenFlinkTable.java From df_data_service with Apache License 2.0

4 votes

public static void main(String args[]) {

		String transform = "flatMap(new FlinkUDF.LineSplitter()).groupBy(0).sum(1).print();\n";

		String transform2 = "select(\"name\");\n";

		String header = "package dynamic;\n" +
				"import org.apache.flink.api.table.Table;\n" +
				"import com.datafibers.util.*;\n";

		String javaCode = header +
				"public class FlinkScript implements DynamicRunner {\n" +
				"@Override \n" +
				"    public void runTransform(DataSet<String> ds) {\n" +
						"try {" +
						"ds."+ transform +
						"} catch (Exception e) {" +
						"};" +
				"}}";

		String javaCode2 = header +
				"public class FlinkScript implements DynamicRunner {\n" +
				"@Override \n" +
				"    public Table transTableObj(Table tbl) {\n" +
					"try {" +
					"return tbl."+ transform2 +
					"} catch (Exception e) {" +
					"};" +
					"return null;}}";

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
		CsvTableSource csvTableSource = new CsvTableSource(
				"/Users/will/Downloads/file.csv",
				new String[] { "name", "id", "score", "comments" },
				new TypeInformation<?>[] {
						Types.STRING(),
						Types.STRING(),
						Types.STRING(),
						Types.STRING()
				}); // lenient

		tableEnv.registerTableSource("mycsv", csvTableSource);
		TableSink sink = new CsvTableSink("/Users/will/Downloads/out.csv", "|");
		Table ingest = tableEnv.scan("mycsv");

		try {
			String className = "dynamic.FlinkScript";
			Class aClass = CompilerUtils.CACHED_COMPILER.loadFromJava(className, javaCode2);
			DynamicRunner runner = (DynamicRunner) aClass.newInstance();
			//runner.runTransform(ds);
			Table result = runner.transTableObj(ingest);
			// write the result Table to the TableSink
			result.writeToSink(sink);
			env.execute();

		} catch (Exception e) {
			e.printStackTrace();
		}
	}

Source File: WordCountStream.java From df_data_service with Apache License 2.0

4 votes

public static void main(String args[]) {

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);

		// Create a DataStream from a list of elements
		//DataStream<Integer> ds = env.fromElements(1, 2, 3, 4, 5);

		CsvTableSource csvTableSource = new CsvTableSource(
				"/Users/will/Downloads/file.csv",
				new String[] { "name", "id", "score", "comments" },
				new TypeInformation<?>[] {
						Types.STRING(),
						Types.STRING(),
						Types.STRING(),
						Types.STRING()
				}); // lenient

		tableEnv.registerTableSource("mycsv", csvTableSource);



		TableSink sink = new CsvTableSink("/Users/will/Downloads/out.csv", "|");


		//tableEnv.registerDataStream("tbl", ds, "a");
		//Table ingest = tableEnv.fromDataStream(ds, "name");
		Table in = tableEnv.scan("mycsv");
		//Table in = tableEnv.ingest("tbl");
		//Table in = tableEnv.fromDataStream(ds, "a");

		Table result = in.select("name");
		result.writeToSink(sink);
		try {
			env.execute();
		} catch (Exception e) {

		}

		System.out.print("DONE");
	}

Java Code Examples for org.apache.flink.table.api.Types#STRING