org.apache.flink.api.common.serialization.Encoder Java Examples
The following examples show how to use
org.apache.flink.api.common.serialization.Encoder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestUtils.java From flink with Apache License 2.0 | 6 votes |
static <ID> OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createCustomizedRescalingTestSink( final File outDir, final int totalParallelism, final int taskIdx, final long bucketCheckInterval, final BucketAssigner<Tuple2<String, Integer>, ID> bucketer, final Encoder<Tuple2<String, Integer>> writer, final RollingPolicy<Tuple2<String, Integer>, ID> rollingPolicy, final BucketFactory<Tuple2<String, Integer>, ID> bucketFactory) throws Exception { StreamingFileSink<Tuple2<String, Integer>> sink = StreamingFileSink .forRowFormat(new Path(outDir.toURI()), writer) .withNewBucketAssignerAndPolicy(bucketer, rollingPolicy) .withBucketCheckInterval(bucketCheckInterval) .withBucketFactory(bucketFactory) .build(); return new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), MAX_PARALLELISM, totalParallelism, taskIdx); }
Example #2
Source File: TestUtils.java From flink with Apache License 2.0 | 6 votes |
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createRescalingTestSink( final File outDir, final int totalParallelism, final int taskIdx, final long bucketCheckInterval, final BucketAssigner<Tuple2<String, Integer>, String> bucketer, final Encoder<Tuple2<String, Integer>> writer, final RollingPolicy<Tuple2<String, Integer>, String> rollingPolicy, final BucketFactory<Tuple2<String, Integer>, String> bucketFactory) throws Exception { StreamingFileSink<Tuple2<String, Integer>> sink = StreamingFileSink .forRowFormat(new Path(outDir.toURI()), writer) .withBucketAssigner(bucketer) .withRollingPolicy(rollingPolicy) .withBucketCheckInterval(bucketCheckInterval) .withBucketFactory(bucketFactory) .build(); return new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), MAX_PARALLELISM, totalParallelism, taskIdx); }
Example #3
Source File: StreamingFileSink.java From flink with Apache License 2.0 | 6 votes |
protected RowFormatBuilder( Path basePath, Encoder<IN> encoder, BucketAssigner<IN, BucketID> assigner, RollingPolicy<IN, BucketID> policy, long bucketCheckInterval, BucketFactory<IN, BucketID> bucketFactory, OutputFileConfig outputFileConfig) { this.basePath = Preconditions.checkNotNull(basePath); this.encoder = Preconditions.checkNotNull(encoder); this.bucketAssigner = Preconditions.checkNotNull(assigner); this.rollingPolicy = Preconditions.checkNotNull(policy); this.bucketCheckInterval = bucketCheckInterval; this.bucketFactory = Preconditions.checkNotNull(bucketFactory); this.outputFileConfig = Preconditions.checkNotNull(outputFileConfig); }
Example #4
Source File: TestUtils.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createCustomRescalingTestSink( final File outDir, final int totalParallelism, final int taskIdx, final long bucketCheckInterval, final BucketAssigner<Tuple2<String, Integer>, String> bucketer, final Encoder<Tuple2<String, Integer>> writer, final RollingPolicy<Tuple2<String, Integer>, String> rollingPolicy, final BucketFactory<Tuple2<String, Integer>, String> bucketFactory) throws Exception { StreamingFileSink<Tuple2<String, Integer>> sink = StreamingFileSink .forRowFormat(new Path(outDir.toURI()), writer) .withBucketAssigner(bucketer) .withRollingPolicy(rollingPolicy) .withBucketCheckInterval(bucketCheckInterval) .withBucketFactory(bucketFactory) .build(); return new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), MAX_PARALLELISM, totalParallelism, taskIdx); }
Example #5
Source File: TestUtils.java From flink with Apache License 2.0 | 6 votes |
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createCustomRescalingTestSink( final File outDir, final int totalParallelism, final int taskIdx, final long bucketCheckInterval, final BucketAssigner<Tuple2<String, Integer>, String> bucketer, final Encoder<Tuple2<String, Integer>> writer, final RollingPolicy<Tuple2<String, Integer>, String> rollingPolicy, final BucketFactory<Tuple2<String, Integer>, String> bucketFactory) throws Exception { StreamingFileSink<Tuple2<String, Integer>> sink = StreamingFileSink .forRowFormat(new Path(outDir.toURI()), writer) .withBucketAssigner(bucketer) .withRollingPolicy(rollingPolicy) .withBucketCheckInterval(bucketCheckInterval) .withBucketFactory(bucketFactory) .build(); return new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), MAX_PARALLELISM, totalParallelism, taskIdx); }
Example #6
Source File: StreamingFileSink.java From flink with Apache License 2.0 | 6 votes |
private RowFormatBuilder( Path basePath, Encoder<IN> encoder, BucketAssigner<IN, BucketID> assigner, RollingPolicy<IN, BucketID> policy, long bucketCheckInterval, BucketFactory<IN, BucketID> bucketFactory, String partFilePrefix, String partFileSuffix) { this.basePath = Preconditions.checkNotNull(basePath); this.encoder = Preconditions.checkNotNull(encoder); this.bucketAssigner = Preconditions.checkNotNull(assigner); this.rollingPolicy = Preconditions.checkNotNull(policy); this.bucketCheckInterval = bucketCheckInterval; this.bucketFactory = Preconditions.checkNotNull(bucketFactory); this.partFilePrefix = Preconditions.checkNotNull(partFilePrefix); this.partFileSuffix = Preconditions.checkNotNull(partFileSuffix); }
Example #7
Source File: FileSystemTableSink.java From flink with Apache License 2.0 | 5 votes |
private static OutputFormat<RowData> createEncoderOutputFormat( Encoder<RowData> encoder, Path path) { return new OutputFormat<RowData>() { private static final long serialVersionUID = 1L; private transient FSDataOutputStream output; @Override public void configure(Configuration parameters) { } @Override public void open(int taskNumber, int numTasks) throws IOException { this.output = path.getFileSystem() .create(path, FileSystem.WriteMode.OVERWRITE); } @Override public void writeRecord(RowData record) throws IOException { encoder.encode(record, output); } @Override public void close() throws IOException { this.output.flush(); this.output.close(); } }; }
Example #8
Source File: RowWisePartWriter.java From flink with Apache License 2.0 | 5 votes |
RowWisePartWriter( final BucketID bucketId, final RecoverableFsDataOutputStream currentPartStream, final Encoder<IN> encoder, final long creationTime) { super(bucketId, currentPartStream, creationTime); this.encoder = Preconditions.checkNotNull(encoder); }
Example #9
Source File: CsvFileSystemFormatFactory.java From flink with Apache License 2.0 | 5 votes |
@Override public Optional<Encoder<RowData>> createEncoder(WriterContext context) { ReadableConfig options = context.getFormatOptions(); validateFormatOptions(options); CsvRowDataSerializationSchema.Builder builder = new CsvRowDataSerializationSchema.Builder( context.getFormatRowType()); options.getOptional(FIELD_DELIMITER).map(s -> s.charAt(0)) .ifPresent(builder::setFieldDelimiter); options.getOptional(LINE_DELIMITER) .ifPresent(builder::setLineDelimiter); if (options.get(DISABLE_QUOTE_CHARACTER)) { builder.disableQuoteCharacter(); } else { options.getOptional(QUOTE_CHARACTER).map(s -> s.charAt(0)).ifPresent(builder::setQuoteCharacter); } options.getOptional(ARRAY_ELEMENT_DELIMITER) .ifPresent(builder::setArrayElementDelimiter); options.getOptional(ESCAPE_CHARACTER).map(s -> s.charAt(0)) .ifPresent(builder::setEscapeCharacter); options.getOptional(NULL_LITERAL) .ifPresent(builder::setNullLiteral); final CsvRowDataSerializationSchema serializationSchema = builder.build(); return Optional.of((record, stream) -> stream.write(serializationSchema.serialize(record))); }
Example #10
Source File: TestCsvFileSystemFormatFactory.java From flink with Apache License 2.0 | 5 votes |
@Override public Optional<Encoder<RowData>> createEncoder(WriterContext context) { if (useBulkWriter(context)) { return Optional.empty(); } DataType[] types = context.getFormatFieldTypes(); return Optional.of((rowData, stream) -> { writeCsvToStream(types, rowData, stream); }); }
Example #11
Source File: FileSystemTableSink.java From flink with Apache License 2.0 | 5 votes |
private Object createWriter() { FileSystemFormatFactory formatFactory = createFormatFactory(properties); Configuration conf = new Configuration(); properties.forEach(conf::setString); FileSystemFormatFactory.WriterContext context = new FileSystemFormatFactory.WriterContext() { @Override public TableSchema getSchema() { return schema; } @Override public ReadableConfig getFormatOptions() { return new DelegatingConfiguration(conf, formatFactory.factoryIdentifier() + "."); } @Override public List<String> getPartitionKeys() { return partitionKeys; } }; Optional<Encoder<RowData>> encoder = formatFactory.createEncoder(context); Optional<BulkWriter.Factory<RowData>> bulk = formatFactory.createBulkWriterFactory(context); if (encoder.isPresent()) { return encoder.get(); } else if (bulk.isPresent()) { return bulk.get(); } else { throw new TableException( formatFactory + " format should implement at least one Encoder or BulkWriter"); } }
Example #12
Source File: FileSystemTableSink.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private OutputFormatFactory<RowData> createOutputFormatFactory() { Object writer = createWriter(); return writer instanceof Encoder ? path -> createEncoderOutputFormat((Encoder<RowData>) writer, path) : path -> createBulkWriterOutputFormat((BulkWriter.Factory<RowData>) writer, path); }
Example #13
Source File: TestUtils.java From flink with Apache License 2.0 | 5 votes |
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createRescalingTestSink( File outDir, int totalParallelism, int taskIdx, long inactivityInterval, long partMaxSize) throws Exception { final RollingPolicy<Tuple2<String, Integer>, String> rollingPolicy = DefaultRollingPolicy .create() .withMaxPartSize(partMaxSize) .withRolloverInterval(inactivityInterval) .withInactivityInterval(inactivityInterval) .build(); final BucketAssigner<Tuple2<String, Integer>, String> bucketer = new TupleToStringBucketer(); final Encoder<Tuple2<String, Integer>> encoder = (element, stream) -> { stream.write((element.f0 + '@' + element.f1).getBytes(StandardCharsets.UTF_8)); stream.write('\n'); }; return createCustomRescalingTestSink( outDir, totalParallelism, taskIdx, 10L, bucketer, encoder, rollingPolicy, new DefaultBucketFactoryImpl<>()); }
Example #14
Source File: RowWisePartWriter.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private RowWisePartWriter( final BucketID bucketId, final RecoverableFsDataOutputStream currentPartStream, final Encoder<IN> encoder, final long creationTime) { super(bucketId, currentPartStream, creationTime); this.encoder = Preconditions.checkNotNull(encoder); }
Example #15
Source File: RowWisePartWriter.java From flink with Apache License 2.0 | 5 votes |
private RowWisePartWriter( final BucketID bucketId, final RecoverableFsDataOutputStream currentPartStream, final Encoder<IN> encoder, final long creationTime) { super(bucketId, currentPartStream, creationTime); this.encoder = Preconditions.checkNotNull(encoder); }
Example #16
Source File: TestUtils.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createRescalingTestSink( File outDir, int totalParallelism, int taskIdx, long inactivityInterval, long partMaxSize) throws Exception { final RollingPolicy<Tuple2<String, Integer>, String> rollingPolicy = DefaultRollingPolicy .create() .withMaxPartSize(partMaxSize) .withRolloverInterval(inactivityInterval) .withInactivityInterval(inactivityInterval) .build(); final BucketAssigner<Tuple2<String, Integer>, String> bucketer = new TupleToStringBucketer(); final Encoder<Tuple2<String, Integer>> encoder = (element, stream) -> { stream.write((element.f0 + '@' + element.f1).getBytes(StandardCharsets.UTF_8)); stream.write('\n'); }; return createCustomRescalingTestSink( outDir, totalParallelism, taskIdx, 10L, bucketer, encoder, rollingPolicy, new DefaultBucketFactoryImpl<>()); }
Example #17
Source File: StreamingFileSink.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private RowFormatBuilder( Path basePath, Encoder<IN> encoder, BucketAssigner<IN, BucketID> assigner, RollingPolicy<IN, BucketID> policy, long bucketCheckInterval, BucketFactory<IN, BucketID> bucketFactory) { this.basePath = Preconditions.checkNotNull(basePath); this.encoder = Preconditions.checkNotNull(encoder); this.bucketAssigner = Preconditions.checkNotNull(assigner); this.rollingPolicy = Preconditions.checkNotNull(policy); this.bucketCheckInterval = bucketCheckInterval; this.bucketFactory = Preconditions.checkNotNull(bucketFactory); }
Example #18
Source File: JsonFileSystemFormatFactory.java From flink with Apache License 2.0 | 4 votes |
@Override public Optional<Encoder<RowData>> createEncoder(WriterContext context) { return Optional.of(new JsonRowDataEncoder(new JsonRowDataSerializationSchema(context.getFormatRowType(), JsonOptions.getTimestampFormat(context.getFormatOptions())))); }
Example #19
Source File: RowWisePartWriter.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
Factory(Encoder<IN> encoder) { this.encoder = encoder; }
Example #20
Source File: StreamingFileSink.java From flink with Apache License 2.0 | 4 votes |
private DefaultRowFormatBuilder(Path basePath, Encoder<IN> encoder, BucketAssigner<IN, String> bucketAssigner) { super(basePath, encoder, bucketAssigner); }
Example #21
Source File: StreamingFileSink.java From flink with Apache License 2.0 | 4 votes |
protected RowFormatBuilder(Path basePath, Encoder<IN> encoder, BucketAssigner<IN, BucketID> bucketAssigner) { this(basePath, encoder, bucketAssigner, DefaultRollingPolicy.builder().build(), DEFAULT_BUCKET_CHECK_INTERVAL, new DefaultBucketFactoryImpl<>(), OutputFileConfig.builder().build()); }
Example #22
Source File: RowWiseBucketWriter.java From flink with Apache License 2.0 | 4 votes |
RowWiseBucketWriter(final RecoverableWriter recoverableWriter, final Encoder<IN> encoder) { super(recoverableWriter); this.encoder = encoder; }
Example #23
Source File: StreamingFileSink.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
RowFormatBuilder(Path basePath, Encoder<IN> encoder, BucketAssigner<IN, BucketID> bucketAssigner) { this(basePath, encoder, bucketAssigner, DefaultRollingPolicy.create().build(), 60L * 1000L, new DefaultBucketFactoryImpl<>()); }
Example #24
Source File: StreamSQLTestProgram.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); String planner = params.get("planner", "blink"); final EnvironmentSettings.Builder builder = EnvironmentSettings.newInstance(); builder.inStreamingMode(); if (planner.equals("old")) { builder.useOldPlanner(); } else if (planner.equals("blink")) { builder.useBlinkPlanner(); } final EnvironmentSettings settings = builder.build(); final StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(); sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart( 3, Time.of(10, TimeUnit.SECONDS) )); sEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); sEnv.enableCheckpointing(4000); sEnv.getConfig().setAutoWatermarkInterval(1000); final StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv, settings); ((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table1", new GeneratorTableSource(10, 100, 60, 0)); ((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table2", new GeneratorTableSource(5, 0.2f, 60, 5)); int overWindowSizeSeconds = 1; int tumbleWindowSizeSeconds = 10; String overQuery = String.format( "SELECT " + " key, " + " rowtime, " + " COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " + "FROM table1", overWindowSizeSeconds); String tumbleQuery = String.format( "SELECT " + " key, " + " CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " + " TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " + " TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " + "FROM (%s) " + "WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " + "GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)", tumbleWindowSizeSeconds, tumbleWindowSizeSeconds, overQuery, tumbleWindowSizeSeconds); String joinQuery = String.format( "SELECT " + " t1.key, " + " t2.rowtime AS rowtime, " + " t2.correct," + " t2.wStart " + "FROM table2 t1, (%s) t2 " + "WHERE " + " t1.key = t2.key AND " + " t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND", tumbleQuery, tumbleWindowSizeSeconds); String finalAgg = String.format( "SELECT " + " SUM(correct) AS correct, " + " TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " + "FROM (%s) " + "GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)", joinQuery); // get Table for SQL query Table result = tEnv.sqlQuery(finalAgg); // convert Table into append-only DataStream DataStream<Row> resultStream = tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP)); final StreamingFileSink<Row> sink = StreamingFileSink .forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> { PrintStream out = new PrintStream(stream); out.println(element.toString()); }) .withBucketAssigner(new KeyBucketAssigner()) .withRollingPolicy(OnCheckpointRollingPolicy.build()) .build(); resultStream // inject a KillMapper that forwards all records but terminates the first execution attempt .map(new KillMapper()).setParallelism(1) // add sink function .addSink(sink).setParallelism(1); sEnv.execute(); }
Example #25
Source File: OrcFileSystemFormatFactory.java From flink with Apache License 2.0 | 4 votes |
@Override public Optional<Encoder<RowData>> createEncoder(WriterContext context) { return Optional.empty(); }
Example #26
Source File: ParquetFileSystemFormatFactory.java From flink with Apache License 2.0 | 4 votes |
@Override public Optional<Encoder<RowData>> createEncoder(WriterContext context) { return Optional.empty(); }
Example #27
Source File: StreamingFileSink.java From flink with Apache License 2.0 | 4 votes |
RowFormatBuilder(Path basePath, Encoder<IN> encoder, BucketAssigner<IN, BucketID> bucketAssigner) { this(basePath, encoder, bucketAssigner, DefaultRollingPolicy.create().build(), 60L * 1000L, new DefaultBucketFactoryImpl<>(), PartFileConfig.DEFAULT_PART_PREFIX, PartFileConfig.DEFAULT_PART_SUFFIX); }
Example #28
Source File: AvroFileSystemFormatFactory.java From flink with Apache License 2.0 | 4 votes |
@Override public Optional<Encoder<RowData>> createEncoder(WriterContext context) { return Optional.empty(); }
Example #29
Source File: FileSystemTableSink.java From flink with Apache License 2.0 | 4 votes |
private ProjectionEncoder(Encoder<RowData> encoder, RowDataPartitionComputer computer) { this.encoder = encoder; this.computer = computer; }
Example #30
Source File: StreamSQLTestProgram.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(); sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart( 3, Time.of(10, TimeUnit.SECONDS) )); sEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); sEnv.enableCheckpointing(4000); sEnv.getConfig().setAutoWatermarkInterval(1000); StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv); tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0)); tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5)); int overWindowSizeSeconds = 1; int tumbleWindowSizeSeconds = 10; String overQuery = String.format( "SELECT " + " key, " + " rowtime, " + " COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " + "FROM table1", overWindowSizeSeconds); String tumbleQuery = String.format( "SELECT " + " key, " + " CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " + " TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " + " TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " + "FROM (%s) " + "WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " + "GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)", tumbleWindowSizeSeconds, tumbleWindowSizeSeconds, overQuery, tumbleWindowSizeSeconds); String joinQuery = String.format( "SELECT " + " t1.key, " + " t2.rowtime AS rowtime, " + " t2.correct," + " t2.wStart " + "FROM table2 t1, (%s) t2 " + "WHERE " + " t1.key = t2.key AND " + " t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND", tumbleQuery, tumbleWindowSizeSeconds); String finalAgg = String.format( "SELECT " + " SUM(correct) AS correct, " + " TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " + "FROM (%s) " + "GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)", joinQuery); // get Table for SQL query Table result = tEnv.sqlQuery(finalAgg); // convert Table into append-only DataStream DataStream<Row> resultStream = tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP)); final StreamingFileSink<Row> sink = StreamingFileSink .forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> { PrintStream out = new PrintStream(stream); out.println(element.toString()); }) .withBucketAssigner(new KeyBucketAssigner()) .withRollingPolicy(OnCheckpointRollingPolicy.build()) .build(); resultStream // inject a KillMapper that forwards all records but terminates the first execution attempt .map(new KillMapper()).setParallelism(1) // add sink function .addSink(sink).setParallelism(1); sEnv.execute(); }