org.apache.flink.types.Row Java Examples

The following examples show how to use org.apache.flink.types.Row. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EuclideanDistanceTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCalDistanceVecMatrix(){
    FastDistanceMatrixData matrixData = initMatrixData();
    FastDistanceVectorData vectorData = distance.prepareVectorData(Row.of(0, sparseVector1), 1);

    double[] predict = distance.calc(matrixData, vectorData).getData();
    double[] expect = new double[]{5.47, 8.38};
    for(int i = 0; i < expect.length; i++){
        Assert.assertEquals(expect[i], predict[i], 0.01);
    }

    predict = distance.calc(vectorData, matrixData).getData();
    for(int i = 0; i < expect.length; i++){
        Assert.assertEquals(expect[i], predict[i], 0.01);
    }
}
 
Example #2
Source File: KafkaSourceTest.java    From sylph with Apache License 2.0 6 votes vote down vote up
@Test
public void createSource()
        throws Exception
{
    StreamTableEnvironment tableEnv = getTableEnv();
    String sql = "create input table tb1(\n" +
            "    _topic varchar,\n" +
            "    _message varchar\n" +
            ") with (\n" +
            "    type = '" + KafkaSource.class.getName() + "',\n" +
            "    kafka_topic = 'N603_A_1,N603_A_2,N603_A_3,N603_A_4,N603_A_5,N603_A_7',\n" +
            "    \"auto.offset.reset\" = latest,\n" +
            "    kafka_broker = 'localhost:9092',\n" +
            "    kafka_group_id = 'streamload1'\n" +
            ")";

    StreamSqlBuilder streamSqlBuilder = new StreamSqlBuilder(tableEnv, ConnectorStore.getDefault(), sqlParser);
    streamSqlBuilder.buildStreamBySql(sql);

    Table kafka = tableEnv.sqlQuery("select * from tb1");
    tableEnv.toAppendStream(kafka, Row.class).print();

    Assert.assertNotNull(((StreamTableEnvironmentImpl) tableEnv).execEnv().getStreamGraph().getJobGraph());
}
 
Example #3
Source File: Elasticsearch6UpsertTableSinkFactoryTest.java    From flink with Apache License 2.0 6 votes vote down vote up
public TestElasticsearch6UpsertTableSink(
		boolean isAppendOnly,
		TableSchema schema,
		List<Host> hosts,
		String index,
		String docType,
		String keyDelimiter,
		String keyNullLiteral,
		SerializationSchema<Row> serializationSchema,
		XContentType contentType,
		ActionRequestFailureHandler failureHandler,
		Map<SinkOption, String> sinkOptions) {

	super(
		isAppendOnly,
		schema,
		hosts,
		index,
		docType,
		keyDelimiter,
		keyNullLiteral,
		serializationSchema,
		contentType,
		failureHandler,
		sinkOptions);
}
 
Example #4
Source File: OrcBatchReader.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void readNonNullBytesColumnAsString(Object[] vals, int fieldIdx, BytesColumnVector bytes, int childCount) {
	if (bytes.isRepeating) { // fill complete column with first value
		String repeatingValue = readString(bytes.vector[0], bytes.start[0], bytes.length[0]);
		fillColumnWithRepeatingValue(vals, fieldIdx, repeatingValue, childCount);
	} else {
		if (fieldIdx == -1) { // set as an object
			for (int i = 0; i < childCount; i++) {
				vals[i] = readString(bytes.vector[i], bytes.start[i], bytes.length[i]);
			}
		} else { // set as a field of Row
			Row[] rows = (Row[]) vals;
			for (int i = 0; i < childCount; i++) {
				rows[i].setField(fieldIdx, readString(bytes.vector[i], bytes.start[i], bytes.length[i]));
			}
		}
	}
}
 
Example #5
Source File: BaseRandomForestTrainBatchOp.java    From Alink with Apache License 2.0 6 votes vote down vote up
private DataSet<Row> parallelTrain(BatchOperator<?> in) {
	BatchOperator<?> quantileModel = Preprocessing.generateQuantileDiscretizerModel(in, getParams());

	DataSet<Row> trainingDataSet = Preprocessing
		.castToQuantile(in, quantileModel, getParams())
		.getDataSet()
		// check null value in training dataset and throw exception when there are null values.
		.map(new CheckNullValue(in.getColNames()));

	final Params meta = getParams().clone();

	return new IterativeComQueue().setMaxIter(Integer.MAX_VALUE)
		.initWithPartitionedData("treeInput", trainingDataSet)
		.initWithBroadcastData("quantileModel", quantileModel.getDataSet())
		.initWithBroadcastData("stringIndexerModel", stringIndexerModel.getDataSet())
		.initWithBroadcastData("labels", labels)
		.add(new TreeInitObj(meta))
		.add(new TreeStat())
		.add(new AllReduce("allReduce", "allReduceCnt"))
		.add(new TreeSplit())
		.setCompareCriterionOfNode0(new Criterion())
		.closeWith(new SerializeModelCompleteResultFunction(meta))
		.exec();
}
 
Example #6
Source File: VectorChiSqSelectorBatchOpTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testDense() {

    Row[] testArray =
        new Row[]{
            Row.of("1.0 2.0 4.0", "a"),
            Row.of("-1.0 -3.0 4.0", "a"),
            Row.of("4.0 2.0 3.0", "b"),
            Row.of("3.4 5.1 5.0", "b")
        };

    String[] colNames = new String[]{"vec", "label"};

    MemSourceBatchOp source = new MemSourceBatchOp(Arrays.asList(testArray), colNames);

    VectorChiSqSelectorBatchOp selector = new VectorChiSqSelectorBatchOp()
        .setSelectedCol("vec")
        .setLabelCol("label")
        .setNumTopFeatures(2);

    selector.linkFrom(source);

    int[] selectedIndices = selector.collectResult();
    assertArrayEquals(selectedIndices, new int[] {2, 0});
}
 
Example #7
Source File: Kafka011TableSourceSinkFactoryTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
protected KafkaTableSourceBase getExpectedKafkaTableSource(
		TableSchema schema,
		Optional<String> proctimeAttribute,
		List<RowtimeAttributeDescriptor> rowtimeAttributeDescriptors,
		Map<String, String> fieldMapping,
		String topic,
		Properties properties,
		DeserializationSchema<Row> deserializationSchema,
		StartupMode startupMode,
		Map<KafkaTopicPartition, Long> specificStartupOffsets,
		long startupTimestampMillis) {

	return new Kafka011TableSource(
		schema,
		proctimeAttribute,
		rowtimeAttributeDescriptors,
		Optional.of(fieldMapping),
		topic,
		properties,
		deserializationSchema,
		startupMode,
		specificStartupOffsets,
		startupTimestampMillis
	);
}
 
Example #8
Source File: StopWordsRemoverMapperTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCaseSensitive() throws Exception {
    TableSchema schema = new TableSchema(new String[] {"sentence"}, new TypeInformation<?>[] {Types.STRING});

    Params params = new Params()
        .set(StopWordsRemoverParams.SELECTED_COL, "sentence")
        .set(StopWordsRemoverParams.CASE_SENSITIVE, true)
        .set(StopWordsRemoverParams.STOP_WORDS, new String[]{"Test"});

    StopWordsRemoverMapper mapper = new StopWordsRemoverMapper(schema, params);
    mapper.open();

    assertEquals(mapper.map(Row.of("This is a unit test for filtering stopWords")).getField(0),
        "This unit test filtering stopWords");
    assertEquals(mapper.map(Row.of("Filter stopWords test")).getField(0),
        "Filter stopWords test");
    assertEquals(mapper.getOutputSchema(), schema);
}
 
Example #9
Source File: ModelMapBatchOp.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Override
public T linkFrom(BatchOperator<?>... inputs) {
	checkOpSize(2, inputs);

	try {
		BroadcastVariableModelSource modelSource = new BroadcastVariableModelSource(BROADCAST_MODEL_TABLE_NAME);
		ModelMapper mapper = this.mapperBuilder.apply(
				inputs[0].getSchema(),
				inputs[1].getSchema(),
				this.getParams());
		DataSet<Row> modelRows = inputs[0].getDataSet().rebalance();
		DataSet<Row> resultRows = inputs[1].getDataSet()
				.map(new ModelMapperAdapter(mapper, modelSource))
				.withBroadcastSet(modelRows, BROADCAST_MODEL_TABLE_NAME);

		TableSchema outputSchema = mapper.getOutputSchema();
		this.setOutput(resultRows, outputSchema);
		return (T) this;
	} catch (Exception ex) {
		throw new RuntimeException(ex);
	}
}
 
Example #10
Source File: ImputerMapperTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testMean() throws Exception {
    Row[] rows = new Row[]{
        Row.of(0L, "{\"selectedCols\":\"[\\\"f_double\\\",\\\"f_long\\\",\\\"f_int\\\"]\",\"strategy\":\"\\\"mean\\\"\"}", null, null, null),
        Row.of(1048576L, "[0.3333333333333333,1.0,1.0]", null, null, null)
    };

    List<Row> model = Arrays.asList(rows);

    TableSchema dataSchema = new TableSchema(
        new String[]{"f_string", "f_long", "f_int", "f_double", "f_boolean"},
        new TypeInformation<?>[]{Types.STRING, Types.LONG, Types.INT, Types.DOUBLE, Types.BOOLEAN}
    );
    Params params = new Params();

    ImputerModelMapper mapper = new ImputerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);

    assertEquals(mapper.map(Row.of("a", null, null, null, true)).getField(1), 1L);
    assertEquals(mapper.map(Row.of("a", null, null, null, true)).getField(2), 1);
    assertEquals((double) mapper.map(Row.of("a", null, null, null, true)).getField(3), 0.333333333, 10e-4);
}
 
Example #11
Source File: ManHattanDistanceTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCalDistanceVecMatrix() {
    FastDistanceMatrixData matrixData = initMatrixData();
    FastDistanceVectorData vectorData = distance.prepareVectorData(Row.of(0, sparseVector1), 1);

    double[] predict = distance.calc(matrixData, vectorData).getData();
    double[] expect = new double[] {10.5, 16.5};
    for (int i = 0; i < expect.length; i++) {
        Assert.assertEquals(expect[i], predict[i], 0.01);
    }

    predict = distance.calc(vectorData, matrixData).getData();
    for (int i = 0; i < expect.length; i++) {
        Assert.assertEquals(expect[i], predict[i], 0.01);
    }
}
 
Example #12
Source File: RowComparator.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public int hash(Row record) {
	int code = 0;
	int i = 0;

	try {
		for (; i < keyPositions.length; i++) {
			code *= TupleComparatorBase.HASH_SALT[i & 0x1F];
			Object element = record.getField(keyPositions[i]); // element can be null
			code += comparators[i].hash(element);
		}
	} catch (IndexOutOfBoundsException e) {
		throw new KeyFieldOutOfBoundsException(keyPositions[i]);
	}

	return code;
}
 
Example #13
Source File: KafkaTableSourceBase.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a generic Kafka {@link StreamTableSource}.
 *
 * @param schema                      Schema of the produced table.
 * @param proctimeAttribute           Field name of the processing time attribute.
 * @param rowtimeAttributeDescriptors Descriptor for a rowtime attribute
 * @param fieldMapping                Mapping for the fields of the table schema to
 *                                    fields of the physical returned type.
 * @param topic                       Kafka topic to consume.
 * @param properties                  Properties for the Kafka consumer.
 * @param deserializationSchema       Deserialization schema for decoding records from Kafka.
 * @param startupMode                 Startup mode for the contained consumer.
 * @param specificStartupOffsets      Specific startup offsets; only relevant when startup
 *                                    mode is {@link StartupMode#SPECIFIC_OFFSETS}.
 * @param startupTimestampMillis	  Startup timestamp for offsets; only relevant when startup
 *                                    mode is {@link StartupMode#TIMESTAMP}.
 */
protected KafkaTableSourceBase(
		TableSchema schema,
		Optional<String> proctimeAttribute,
		List<RowtimeAttributeDescriptor> rowtimeAttributeDescriptors,
		Optional<Map<String, String>> fieldMapping,
		String topic,
		Properties properties,
		DeserializationSchema<Row> deserializationSchema,
		StartupMode startupMode,
		Map<KafkaTopicPartition, Long> specificStartupOffsets,
		long startupTimestampMillis) {
	this.schema = TableSchemaUtils.checkNoGeneratedColumns(schema);
	this.proctimeAttribute = validateProctimeAttribute(proctimeAttribute);
	this.rowtimeAttributeDescriptors = validateRowtimeAttributeDescriptors(rowtimeAttributeDescriptors);
	this.fieldMapping = fieldMapping;
	this.topic = Preconditions.checkNotNull(topic, "Topic must not be null.");
	this.properties = Preconditions.checkNotNull(properties, "Properties must not be null.");
	this.deserializationSchema = Preconditions.checkNotNull(
		deserializationSchema, "Deserialization schema must not be null.");
	this.startupMode = Preconditions.checkNotNull(startupMode, "Startup mode must not be null.");
	this.specificStartupOffsets = Preconditions.checkNotNull(
		specificStartupOffsets, "Specific offsets must not be null.");
	this.startupTimestampMillis = startupTimestampMillis;
}
 
Example #14
Source File: MaxAbsScalerModelMapper.java    From Alink with Apache License 2.0 6 votes vote down vote up
/**
 * Map operation method.
 *
 * @param row the input Row type data.
 * @return one Row type data.
 * @throws Exception This method may throw exceptions. Throwing
 *                   an exception will cause the operation to fail.
 */
@Override
public Row map(Row row) throws Exception {
    if (null == row) {
        return null;
    }
    Row r = new Row(selectedColIndices.length);
    for (int i = 0; i < this.selectedColIndices.length; i++) {
        Object obj = row.getField(this.selectedColIndices[i]);
        if (null != obj) {
            double d;
            if (obj instanceof Number) {
                d = ((Number) obj).doubleValue();
            } else {
                d = Double.parseDouble(obj.toString());
            }
            r.setField(i, ScalerUtil.maxAbsScaler(this.maxAbs[i], d));
        }
    }
    return this.predictResultColsHelper.getResultRow(row, r);
}
 
Example #15
Source File: JavaSqlITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testFilter() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
	StreamITCase.clear();

	DataStream<Tuple5<Integer, Long, Integer, String, Long>> ds = JavaStreamTestData.get5TupleDataStream(env);
	tableEnv.registerDataStream("MyTable", ds, "a, b, c, d, e");

	String sqlQuery = "SELECT a, b, e FROM MyTable WHERE c < 4";
	Table result = tableEnv.sqlQuery(sqlQuery);

	DataStream<Row> resultSet = tableEnv.toAppendStream(result, Row.class);
	resultSet.addSink(new StreamITCase.StringSink<Row>());
	env.execute();

	List<String> expected = new ArrayList<>();
	expected.add("1,1,1");
	expected.add("2,2,2");
	expected.add("2,3,1");
	expected.add("3,4,2");

	StreamITCase.compareWithList(expected);
}
 
Example #16
Source File: OrcBatchReader.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void readNonNullDecimalColumn(Object[] vals, int fieldIdx, DecimalColumnVector vector, int childCount) {

		if (vector.isRepeating) { // fill complete column with first value
			fillColumnWithRepeatingValue(vals, fieldIdx, readBigDecimal(vector.vector[0]), childCount);
		} else {
			if (fieldIdx == -1) { // set as an object
				for (int i = 0; i < childCount; i++) {
					vals[i] = readBigDecimal(vector.vector[i]);
				}
			} else { // set as a field of Row
				Row[] rows = (Row[]) vals;
				for (int i = 0; i < childCount; i++) {
					rows[i].setField(fieldIdx, readBigDecimal(vector.vector[i]));
				}
			}
		}
	}
 
Example #17
Source File: ModelConverterUtils.java    From Alink with Apache License 2.0 6 votes vote down vote up
/**
 * Extract from a collection of rows the model meta and model data.
 *
 * @param rows Model rows.
 * @return A tuple of model meta and serialized model data.
 */
static Tuple2<Params, Iterable<String>> extractModelMetaAndData(List<Row> rows) {
    Integer[] order = orderModelRows(rows);

    // extract meta
    List<String> metaSegments = new ArrayList<>();
    for (int i = 0; i < order.length; i++) {
        long id = (Long) rows.get(order[i]).getField(0);
        int currStringId = getStringIndex(id);
        if (currStringId == 0) {
            metaSegments.add((String) rows.get(order[i]).getField(1));
        } else {
            break;
        }
    }
    String metaStr = mergeString(metaSegments);

    return Tuple2.of(Params.fromJson(metaStr), new StringDataIterable(rows, order));
}
 
Example #18
Source File: FlinkTransFrom.java    From sylph with Apache License 2.0 6 votes vote down vote up
@Override
public void flatMap(Row row, Collector<Row> collector)
        throws Exception
{
    ideal.sylph.etl.Collector<Record> rowCollector = new ideal.sylph.etl.Collector<Record>()
    {
        @Override
        public void collect(Record record)
        {
            collector.collect(FlinkRecord.parserRow(record));
        }

        @Override
        public void close()
        {
            collector.close();
        }
    };
    realTimeTransForm.process(new FlinkRecord(row, typeInformation), rowCollector);
}
 
Example #19
Source File: SelectMapperTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testHashFunctions() throws Exception {
    TableSchema dataSchema = TableSchema.builder().fields(
        new String[] {"id", "name"},
        new DataType[] {DataTypes.INT(), DataTypes.STRING()}).build();
    Params params = new Params();
    params.set(HasClause.CLAUSE,
        "id, MD5(name), SHA1(name), SHA224(name), SHA256(name), SHA384(name), SHA512(name), SHA2(name, 512)"
    );
    SelectMapper selectMapper = new SelectMapper(dataSchema, params);
    selectMapper.open();
    Row expected = Row.of(1, "e41225f8921fffcead7a35a3ddabdeeb", "ff13f5e89c51b0b9af963d080ef0899c7a169080",
        "66f30b83556e5b5b18559273e292cc64fff896dc1b9375f54c7f2b21",
        "62d9e539628b195b8df54c6b8fb6242fb0ba8da6aa793f7a482bdf723dd3edb5",
        "43b359d46d9c98d66a74be2e3ce99f9bbcc9195885af3aaf1ade323eb5eba45a51ec9b579fe0708bde6d2267a540d135",
        "3a08526868871f1d5f4efdf2f1229d65802818772a054a4a8cd272183275d53db5e40730d68af3dcdd8bfcd95bc1e97167947692e3c7b8d0dbd59cedb4aa650a",
        "3a08526868871f1d5f4efdf2f1229d65802818772a054a4a8cd272183275d53db5e40730d68af3dcdd8bfcd95bc1e97167947692e3c7b8d0dbd59cedb4aa650a");
    Row output = selectMapper.map(Row.of(1, "'abc'"));
    try {
        assertEquals(expected, output);
    } finally {
        selectMapper.close();
    }
}
 
Example #20
Source File: TableExampleWordCount.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    String path = TableExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();
    blinkStreamTableEnv
            .connect(new FileSystem().path(path))
            .withFormat(new OldCsv().field("word", Types.STRING).lineDelimiter("\n"))
            .withSchema(new Schema().field("word", Types.STRING))
            .inAppendMode()
            .registerTableSource("FlieSourceTable");

    Table wordWithCount = blinkStreamTableEnv.scan("FlieSourceTable")
            .groupBy("word")
            .select("word,count(word) as _count");
    blinkStreamTableEnv.toRetractStream(wordWithCount, Row.class).print();

    //打印结果中的 true 和 false,可能会有点疑问,为啥会多出一个字段。
    //Sink 做的事情是先删除再插入,false 表示删除上一条数据,true 表示插入该条数据

    blinkStreamTableEnv.execute("Blink Stream SQL Job");
}
 
Example #21
Source File: GenerateData.java    From Alink with Apache License 2.0 5 votes vote down vote up
public static Table getStreamTable() {
    Row[] testArray =
        new Row[]{
            Row.of(1.0, 2.0),
            Row.of(-1.0, -3.0),
            Row.of(4.0, 2.0),
            Row.of(null, null),
        };

    String[] colNames = new String[]{"f0", "f1"};

    return MLEnvironmentFactory.getDefault().createStreamTable(Arrays.asList(testArray), colNames);
}
 
Example #22
Source File: AvroRowDeSerializationSchemaTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testGenericSerializeDeserialize() throws IOException {
	final Tuple3<GenericRecord, Row, Schema> testData = AvroTestUtils.getGenericTestData();

	final AvroRowSerializationSchema serializationSchema = new AvroRowSerializationSchema(testData.f2.toString());
	final AvroRowDeserializationSchema deserializationSchema = new AvroRowDeserializationSchema(testData.f2.toString());

	final byte[] bytes = serializationSchema.serialize(testData.f1);
	final Row actual = deserializationSchema.deserialize(bytes);

	assertEquals(testData.f1, actual);
}
 
Example #23
Source File: StringParsersTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void testKvParser() throws Exception {
    String kvStr = "f1=1,f2=2.0,f3=false,f4=val,f5=2018-09-10,f6=14:22:20,f7=2018-09-10 14:22:20";
    String schemaStr = "f1 bigint, f2 double, f3 boolean, f4 string, f5 date, f6 time, f7 timestamp";

    TableSchema schema = CsvUtil.schemaStr2Schema(schemaStr);
    StringParsers.KvParser parser = new StringParsers.KvParser(schema.getFieldNames(), schema.getFieldTypes(), ",", "=");
    Tuple2<Boolean, Row> parsed = parser.parse(kvStr);
    Assert.assertTrue(parsed.f0);
    Assert.assertEquals(parsed.f1.getArity(), 7);
}
 
Example #24
Source File: JsonRowDeserializationSchema.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public Row deserialize(byte[] message) throws IOException {
	try {
		final JsonNode root = objectMapper.readTree(message);
		return convertRow(root, (RowTypeInfo) typeInfo);
	} catch (Throwable t) {
		throw new IOException("Failed to deserialize JSON object.", t);
	}
}
 
Example #25
Source File: ArrowPythonScalarFunctionRunnerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private AbstractArrowPythonScalarFunctionRunner<Row> createPassThroughArrowPythonScalarFunctionRunner(
	FnDataReceiver<byte[]> receiver,
	PythonFunctionInfo[] pythonFunctionInfos,
	RowType inputType,
	RowType outputType,
	int maxArrowBatchSize,
	JobBundleFactory jobBundleFactory) {

	final PythonEnvironmentManager environmentManager = createTestEnvironmentManager();

	return new PassThroughArrowPythonScalarFunctionRunner<Row>(
		"testPythonRunner",
		receiver,
		pythonFunctionInfos,
		environmentManager,
		inputType,
		outputType,
		maxArrowBatchSize,
		Collections.emptyMap(),
		jobBundleFactory,
		PythonTestUtils.createMockFlinkMetricContainer()) {
		@Override
		public ArrowWriter<Row> createArrowWriter() {
			return ArrowUtils.createRowArrowWriter(root, getInputType());
		}
	};
}
 
Example #26
Source File: RowCsvInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void ignoreSingleCharPrefixComments() throws Exception {
	String fileContent =
		"#description of the data\n" +
			"#successive commented line\n" +
			"this is|1|2.0|\n" +
			"a test|3|4.0|\n" +
			"#next|5|6.0|\n";

	FileInputSplit split = createTempFile(fileContent);

	TypeInformation[] fieldTypes = new TypeInformation[]{
		BasicTypeInfo.STRING_TYPE_INFO,
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.DOUBLE_TYPE_INFO};

	RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
	format.setCommentPrefix("#");
	format.configure(new Configuration());
	format.open(split);

	Row result = new Row(3);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("this is", result.getField(0));
	assertEquals(1, result.getField(1));
	assertEquals(2.0, result.getField(2));

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("a test", result.getField(0));
	assertEquals(3, result.getField(1));
	assertEquals(4.0, result.getField(2));

	result = format.nextRecord(result);
	assertNull(result);
}
 
Example #27
Source File: OrcTableSourceITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testFullScan() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tEnv = BatchTableEnvironment.create(env);

	OrcTableSource orc = OrcTableSource.builder()
		.path(getPath(TEST_FILE_FLAT))
		.forOrcSchema(TEST_SCHEMA_FLAT)
		.build();
	tEnv.registerTableSource("OrcTable", orc);

	String query =
		"SELECT COUNT(*), " +
			"MIN(_col0), MAX(_col0), " +
			"MIN(_col1), MAX(_col1), " +
			"MIN(_col2), MAX(_col2), " +
			"MIN(_col3), MAX(_col3), " +
			"MIN(_col4), MAX(_col4), " +
			"MIN(_col5), MAX(_col5), " +
			"MIN(_col6), MAX(_col6), " +
			"MIN(_col7), MAX(_col7), " +
			"MIN(_col8), MAX(_col8) " +
		"FROM OrcTable";
	Table t = tEnv.sqlQuery(query);

	DataSet<Row> dataSet = tEnv.toDataSet(t, Row.class);
	List<Row> result = dataSet.collect();

	assertEquals(1, result.size());
	assertEquals(
		"1920800,1,1920800,F,M,D,W,2 yr Degree,Unknown,500,10000,Good,Unknown,0,6,0,6,0,6",
		result.get(0).toString());
}
 
Example #28
Source File: GroupingSetsITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private void compareSql(String query1, String query2) throws Exception {

		// Function to map row to string
		MapFunction<Row, String> mapFunction = new MapFunction<Row, String>() {

			@Override
			public String map(Row value) throws Exception {
				return value == null ? "null" : value.toString();
			}
		};

		// Execute first query and store results
		Table resultTable1 = tableEnv.sqlQuery(query1);
		DataSet<Row> resultDataSet1 = tableEnv.toDataSet(resultTable1, Row.class);
		List<String> results1 = resultDataSet1.map(mapFunction).collect();

		// Execute second query and store results
		Table resultTable2 = tableEnv.sqlQuery(query2);
		DataSet<Row> resultDataSet2 = tableEnv.toDataSet(resultTable2, Row.class);
		List<String> results2 = resultDataSet2.map(mapFunction).collect();

		// Compare results
		TestBaseUtils.compareResultCollections(results1, results2, new Comparator<String>() {

			@Override
			public int compare(String o1, String o2) {
				return o2 == null ? o1 == null ? 0 : 1 : o1.compareTo(o2);
			}
		});
	}
 
Example #29
Source File: JdbcUpsertTableSinkITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testBatchSink() throws Exception {
	EnvironmentSettings bsSettings = EnvironmentSettings.newInstance()
			.useBlinkPlanner().inBatchMode().build();
	TableEnvironment tEnv = TableEnvironment.create(bsSettings);

	tEnv.executeSql(
		"CREATE TABLE USER_RESULT(" +
			"NAME VARCHAR," +
			"SCORE BIGINT" +
			") WITH ( " +
			"'connector.type' = 'jdbc'," +
			"'connector.url'='" + DB_URL + "'," +
			"'connector.table' = '" + OUTPUT_TABLE3 + "'" +
			")");

	TableResult tableResult  = tEnv.executeSql("INSERT INTO USER_RESULT\n" +
			"SELECT user_name, score " +
			"FROM (VALUES (1, 'Bob'), (22, 'Tom'), (42, 'Kim'), " +
			"(42, 'Kim'), (1, 'Bob')) " +
			"AS UserCountTable(score, user_name)");
	// wait to finish
	tableResult.getJobClient().get().getJobExecutionResult(Thread.currentThread().getContextClassLoader()).get();

	check(new Row[] {
			Row.of("Bob", 1),
			Row.of("Tom", 22),
			Row.of("Kim", 42),
			Row.of("Kim", 42),
			Row.of("Bob", 1)
	}, DB_URL, OUTPUT_TABLE3, new String[]{"NAME", "SCORE"});
}
 
Example #30
Source File: StatisticsHelper.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Override
public Row map(Row in) throws Exception {
    //table cols and reserved cols, table cols will be transform to double type.
    Row out = new Row(selectedColIndices.length + reservedColIndices.length);
    for (int i = 0; i < this.selectedColIndices.length; ++i) {
        out.setField(i, ((Number) in.getField(this.selectedColIndices[i])).doubleValue());
    }
    for (int i = 0; i < reservedColIndices.length; i++) {
        out.setField(i + selectedColIndices.length, in.getField(reservedColIndices[i]));
    }
    return out;
}