org.apache.flink.core.fs.FileInputSplit Java Examples
The following examples show how to use
org.apache.flink.core.fs.FileInputSplit.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DelimitedInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testDelimiterOnBufferBoundary() throws IOException { String[] records = new String[]{"1234567890<DEL?NO!>1234567890", "1234567890<DEL?NO!>1234567890", "<DEL?NO!>"}; String delimiter = "<DELIM>"; String fileContent = StringUtils.join(records, delimiter); final FileInputSplit split = createTempFile(fileContent); final Configuration parameters = new Configuration(); format.setBufferSize(12); format.setDelimiter(delimiter); format.configure(parameters); format.open(split); for (String record : records) { String value = format.nextRecord(null); assertEquals(record, value); } assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); }
Example #2
Source File: ParquetRowInputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testReadRowFromNestedRecord() throws IOException { Tuple3<Class<? extends SpecificRecord>, SpecificRecord, Row> nested = TestUtil.getNestedRecordTestData(); Path path = TestUtil.createTempParquetFile(tempRoot.newFolder(), TestUtil.NESTED_SCHEMA, Collections.singletonList(nested.f1)); MessageType nestedType = SCHEMA_CONVERTER.convert(TestUtil.NESTED_SCHEMA); ParquetRowInputFormat inputFormat = new ParquetRowInputFormat(path, nestedType); inputFormat.setRuntimeContext(TestUtil.getMockRuntimeContext()); FileInputSplit[] splits = inputFormat.createInputSplits(1); assertEquals(1, splits.length); inputFormat.open(splits[0]); Row row = inputFormat.nextRecord(null); assertNotNull(row); assertEquals(7, row.getArity()); assertEquals(nested.f2.getField(0), row.getField(0)); assertEquals(nested.f2.getField(1), row.getField(1)); assertArrayEquals((Long[]) nested.f2.getField(3), (Long[]) row.getField(3)); assertArrayEquals((String[]) nested.f2.getField(4), (String[]) row.getField(4)); assertEquals(nested.f2.getField(5), row.getField(5)); assertArrayEquals((Row[]) nested.f2.getField(6), (Row[]) row.getField(6)); }
Example #3
Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testReadDecimalTypeFile() throws IOException { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration()); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); assertEquals(1, splits.length); rowOrcInputFormat.openInputFormat(); rowOrcInputFormat.open(splits[0]); assertFalse(rowOrcInputFormat.reachedEnd()); Row row = rowOrcInputFormat.nextRecord(null); // validate first row assertNotNull(row); assertEquals(1, row.getArity()); assertEquals(BigDecimal.valueOf(-1000.5d), row.getField(0)); // check correct number of rows long cnt = 1; while (!rowOrcInputFormat.reachedEnd()) { assertNotNull(rowOrcInputFormat.nextRecord(null)); cnt++; } assertEquals(6000, cnt); }
Example #4
Source File: EnumerateNestedFilesTest.java From flink with Apache License 2.0 | 6 votes |
/** * Test without nested directory and recursive.file.enumeration = true */ @Test public void testNoNestedDirectoryTrue() { try { String filePath = TestFileUtils.createTempFile("foo"); this.format.setFilePath(new Path(filePath)); this.config.setBoolean("recursive.file.enumeration", true); format.configure(this.config); FileInputSplit[] splits = format.createInputSplits(1); Assert.assertEquals(1, splits.length); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } }
Example #5
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind map and changing parallelism. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindMapChangingparallelism() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .map(new IdMap()).setParallelism(DEFAULT_PARALLELISM+1) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #6
Source File: CsvInputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testPojoTypeWithMappingInfoAndPartialField() throws Exception { File tempFile = File.createTempFile("CsvReaderPojoType", "tmp"); tempFile.deleteOnExit(); tempFile.setWritable(true); OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile)); wrt.write("123,3.123,AAA,BBB\n"); wrt.write("456,1.123,BBB,AAA\n"); wrt.close(); @SuppressWarnings("unchecked") PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.class); CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo, new String[]{"field1", "field4"}, new boolean[]{true, false, false, true}); inputFormat.configure(new Configuration()); FileInputSplit[] splits = inputFormat.createInputSplits(1); inputFormat.open(splits[0]); PojoItem item = new PojoItem(); inputFormat.nextRecord(item); assertEquals(123, item.field1); assertEquals("BBB", item.field4); }
Example #7
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testReadDecimalTypeFile() throws IOException { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration()); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); assertEquals(1, splits.length); rowOrcInputFormat.openInputFormat(); rowOrcInputFormat.open(splits[0]); assertFalse(rowOrcInputFormat.reachedEnd()); Row row = rowOrcInputFormat.nextRecord(null); // validate first row assertNotNull(row); assertEquals(1, row.getArity()); assertEquals(BigDecimal.valueOf(-1000.5d), row.getField(0)); // check correct number of rows long cnt = 1; while (!rowOrcInputFormat.reachedEnd()) { assertNotNull(rowOrcInputFormat.nextRecord(null)); cnt++; } assertEquals(6000, cnt); }
Example #8
Source File: CsvInputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testPojoType() throws Exception { File tempFile = File.createTempFile("CsvReaderPojoType", "tmp"); tempFile.deleteOnExit(); tempFile.setWritable(true); OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile)); wrt.write("123,AAA,3.123,BBB\n"); wrt.write("456,BBB,1.123,AAA\n"); wrt.close(); @SuppressWarnings("unchecked") PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.class); CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo); inputFormat.configure(new Configuration()); FileInputSplit[] splits = inputFormat.createInputSplits(1); inputFormat.open(splits[0]); validatePojoItem(inputFormat); }
Example #9
Source File: AvroInputFormat.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
private DataFileReader<E> initReader(FileInputSplit split) throws IOException { DatumReader<E> datumReader; if (org.apache.avro.generic.GenericRecord.class == avroValueType) { datumReader = new GenericDatumReader<E>(); } else { datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType) ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType); } if (LOG.isInfoEnabled()) { LOG.info("Opening split {}", split); } SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen()); DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader); if (LOG.isDebugEnabled()) { LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema()); } end = split.getStart() + split.getLength(); recordsReadSinceLastSync = 0; return dataFileReader; }
Example #10
Source File: AvroInputFormat.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public void reopen(FileInputSplit split, Tuple2<Long, Long> state) throws IOException { Preconditions.checkNotNull(split, "reopen() cannot be called on a null split."); Preconditions.checkNotNull(state, "reopen() cannot be called with a null initial state."); try { this.open(split); } finally { if (state.f0 != -1) { lastSync = state.f0; recordsReadSinceLastSync = state.f1; } } if (lastSync != -1) { // open and read until the record we were before // the checkpoint and discard the values dataFileReader.seek(lastSync); for (int i = 0; i < recordsReadSinceLastSync; i++) { dataFileReader.next(null); } } }
Example #11
Source File: ParquetMapInputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("unchecked") public void testProjectedReadMapFromNestedRecord() throws IOException { Tuple3<Class<? extends SpecificRecord>, SpecificRecord, Row> nested = TestUtil.getNestedRecordTestData(); Path path = TestUtil.createTempParquetFile(tempRoot.getRoot(), TestUtil.NESTED_SCHEMA, Collections.singletonList(nested.f1)); MessageType nestedType = SCHEMA_CONVERTER.convert(TestUtil.NESTED_SCHEMA); ParquetMapInputFormat inputFormat = new ParquetMapInputFormat(path, nestedType); inputFormat.selectFields(Collections.singletonList("nestedMap").toArray(new String[0])); inputFormat.setRuntimeContext(TestUtil.getMockRuntimeContext()); FileInputSplit[] splits = inputFormat.createInputSplits(1); assertEquals(1, splits.length); inputFormat.open(splits[0]); Map map = inputFormat.nextRecord(null); assertNotNull(map); assertEquals(1, map.size()); Map<String, String> mapItem = (Map<String, String>) ((Map) map.get("nestedMap")).get("mapItem"); assertEquals(2, mapItem.size()); assertEquals("map", mapItem.get("type")); assertEquals("hashMap", mapItem.get("value")); }
Example #12
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind reduce. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindReduce() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .reduce(new LastReduce()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #13
Source File: AvroInputFormat.java From flink with Apache License 2.0 | 6 votes |
@Override public void reopen(FileInputSplit split, Tuple2<Long, Long> state) throws IOException { Preconditions.checkNotNull(split, "reopen() cannot be called on a null split."); Preconditions.checkNotNull(state, "reopen() cannot be called with a null initial state."); try { this.open(split); } finally { if (state.f0 != -1) { lastSync = state.f0; recordsReadSinceLastSync = state.f1; } } if (lastSync != -1) { // open and read until the record we were before // the checkpoint and discard the values dataFileReader.seek(lastSync); for (int i = 0; i < recordsReadSinceLastSync; i++) { dataFileReader.next(null); } } }
Example #14
Source File: GenericCsvInputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testReadTooShortInputLenient() throws IOException { try { final String fileContent = "666|777|888|999|555\n111|222|333|444\n666|777|888|999|555"; final FileInputSplit split = createTempFile(fileContent); final Configuration parameters = new Configuration(); format.setFieldDelimiter("|"); format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class, IntValue.class); format.setLenient(true); format.configure(parameters); format.open(split); Value[] values = createIntValues(5); assertNotNull(format.nextRecord(values)); // line okay assertNull(format.nextRecord(values)); // line too short assertNotNull(format.nextRecord(values)); // line okay } catch (Exception ex) { fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage()); } }
Example #15
Source File: DelimitedInputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testReadWithoutTrailingDelimiter() throws IOException { // 2. test case final String myString = "my key|my val$$$my key2\n$$ctd.$$|my value2"; final FileInputSplit split = createTempFile(myString); final Configuration parameters = new Configuration(); // default delimiter = '\n' format.configure(parameters); format.open(split); String first = format.nextRecord(null); String second = format.nextRecord(null); assertNotNull(first); assertNotNull(second); assertEquals("my key|my val$$$my key2", first); assertEquals("$$ctd.$$|my value2", second); assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); }
Example #16
Source File: GenericCsvInputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testReadTooShortInputLenient() throws IOException { try { final String fileContent = "666|777|888|999|555\n111|222|333|444\n666|777|888|999|555"; final FileInputSplit split = createTempFile(fileContent); final Configuration parameters = new Configuration(); format.setFieldDelimiter("|"); format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class, IntValue.class); format.setLenient(true); format.configure(parameters); format.open(split); Value[] values = createIntValues(5); assertNotNull(format.nextRecord(values)); // line okay assertNull(format.nextRecord(values)); // line too short assertNotNull(format.nextRecord(values)); // line okay } catch (Exception ex) { fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage()); } }
Example #17
Source File: GraphCreationWithCsvITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testCreateWithOnlyEdgesCsvFile() throws Exception { /* * Test with one Csv file one with Edges data. Also tests the configuration method ignoreFistLineEdges() */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); final String fileContent2 = "header\n1,2,ot\n" + "3,2,tt\n" + "3,1,to\n"; final FileInputSplit split2 = createTempFile(fileContent2); Graph<Long, NullValue, String> graph = Graph.fromCsvReader(split2.getPath().toString(), env) .ignoreFirstLineEdges() .ignoreCommentsVertices("hi") .edgeTypes(Long.class, String.class); List<Triplet<Long, NullValue, String>> result = graph.getTriplets().collect(); expectedResult = "1,2,(null),(null),ot\n" + "3,2,(null),(null),tt\n" + "3,1,(null),(null),to\n"; compareResultAsTuples(result, expectedResult); }
Example #18
Source File: CsvInputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testPojoTypeWithMappingInformation() throws Exception { File tempFile = File.createTempFile("CsvReaderPojoType", "tmp"); tempFile.deleteOnExit(); tempFile.setWritable(true); OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile)); wrt.write("123,3.123,AAA,BBB\n"); wrt.write("456,1.123,BBB,AAA\n"); wrt.close(); @SuppressWarnings("unchecked") PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.class); CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo, new String[]{"field1", "field3", "field2", "field4"}); inputFormat.configure(new Configuration()); FileInputSplit[] splits = inputFormat.createInputSplits(1); inputFormat.open(splits[0]); validatePojoItem(inputFormat); }
Example #19
Source File: DelimitedInputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testReadWithTrailingDelimiter() throws IOException { // 2. test case final String myString = "my key|my val$$$my key2\n$$ctd.$$|my value2\n"; final FileInputSplit split = createTempFile(myString); final Configuration parameters = new Configuration(); // default delimiter = '\n' format.configure(parameters); format.open(split); String first = format.nextRecord(null); String second = format.nextRecord(null); assertNotNull(first); assertNotNull(second); assertEquals("my key|my val$$$my key2", first); assertEquals("$$ctd.$$|my value2", second); assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); }
Example #20
Source File: EnumerateNestedFilesTest.java From flink with Apache License 2.0 | 6 votes |
/** * Test with one nested directory and recursive.file.enumeration = true */ @Test public void testOneNestedDirectoryTrue() { try { String firstLevelDir = TestFileUtils.randomFileName(); String secondLevelDir = TestFileUtils.randomFileName(); File insideNestedDir = tempFolder.newFolder(firstLevelDir, secondLevelDir); File nestedDir = insideNestedDir.getParentFile(); // create a file in the first-level and two files in the nested dir TestFileUtils.createTempFileInDirectory(nestedDir.getAbsolutePath(), "paella"); TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "kalamari"); TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "fideua"); this.format.setFilePath(new Path(nestedDir.toURI().toString())); this.config.setBoolean("recursive.file.enumeration", true); format.configure(this.config); FileInputSplit[] splits = format.createInputSplits(1); Assert.assertEquals(3, splits.length); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } }
Example #21
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind rebalance. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindRebalance() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .rebalance() .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #22
Source File: FlinkBitcoinDataSourceTest.java From hadoopcryptoledger with Apache License 2.0 | 6 votes |
@Test public void parseBitcoinRawBlock() throws HadoopCryptoLedgerConfigurationException, IOException { ClassLoader classLoader = getClass().getClassLoader(); String fileName="genesis.blk"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); FileInputSplit blockInputSplit = new FileInputSplit(0,file,0, -1, null); BitcoinRawBlockFlinkInputFormat inputFormat = new BitcoinRawBlockFlinkInputFormat(1024*1024,"F9BEB4D9",false); inputFormat.open(blockInputSplit); assertFalse(inputFormat.reachedEnd(),"End not reached"); BytesWritable reuse = new BytesWritable(); BytesWritable nextBlock = inputFormat.nextRecord(reuse); assertNotNull(nextBlock,"First Block returned"); assertEquals( 293, nextBlock.getLength(),"First Block must have size of 293"); nextBlock=inputFormat.nextRecord(reuse); assertNull(nextBlock,"No further block"); assertTrue(inputFormat.reachedEnd(),"End reached"); }
Example #23
Source File: PrimitiveInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testIntegerInput() throws IOException { try { final String fileContent = "111|222|"; final FileInputSplit split = createInputSplit(fileContent); final PrimitiveInputFormat<Integer> format = new PrimitiveInputFormat<Integer>(PATH, "|", Integer.class); format.configure(new Configuration()); format.open(split); Integer result = null; result = format.nextRecord(result); assertEquals(Integer.valueOf(111), result); result = format.nextRecord(result); assertEquals(Integer.valueOf(222), result); result = format.nextRecord(result); assertNull(result); assertTrue(format.reachedEnd()); } catch (Exception ex) { fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage()); } }
Example #24
Source File: DelimitedInputFormat.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Opens the given input split. This method opens the input stream to the specified file, allocates read buffers * and positions the stream at the correct position, making sure that any partial record at the beginning is skipped. * * @param split The input split to open. * * @see org.apache.flink.api.common.io.FileInputFormat#open(org.apache.flink.core.fs.FileInputSplit) */ @Override public void open(FileInputSplit split) throws IOException { super.open(split); initBuffers(); this.offset = splitStart; if (this.splitStart != 0) { this.stream.seek(offset); readLine(); // if the first partial record already pushes the stream over // the limit of our split, then no record starts within this split if (this.overLimit) { this.end = true; } } else { fillBuffer(0); } }
Example #25
Source File: GenericCsvInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testLongLongLong() { try { final String fileContent = "1,2,3\n3,2,1"; final FileInputSplit split = createTempFile(fileContent); final Configuration parameters = new Configuration(); format.setFieldDelimiter(","); format.setFieldTypesGeneric(LongValue.class, LongValue.class, LongValue.class); format.configure(parameters); format.open(split); Value[] values = createLongValues(3); values = format.nextRecord(values); assertNotNull(values); assertEquals(1L, ((LongValue) values[0]).getValue()); assertEquals(2L, ((LongValue) values[1]).getValue()); assertEquals(3L, ((LongValue) values[2]).getValue()); values = format.nextRecord(values); assertNotNull(values); assertEquals(3L, ((LongValue) values[0]).getValue()); assertEquals(2L, ((LongValue) values[1]).getValue()); assertEquals(1L, ((LongValue) values[2]).getValue()); assertNull(format.nextRecord(values)); assertTrue(format.reachedEnd()); } catch (Exception ex) { System.err.println(ex.getMessage()); ex.printStackTrace(); fail("Test erroneous"); } }
Example #26
Source File: TsFileInputFormat.java From incubator-iotdb with Apache License 2.0 | 5 votes |
@Override public void open(FileInputSplit split) throws IOException { super.open(split); if (config != null) { TSFileConfigUtil.setGlobalTSFileConfig(config); } TsFileInput in; try { if (currentSplit.getPath().getFileSystem().isDistributedFS()) { // HDFS in = new HDFSInput(new org.apache.hadoop.fs.Path(new URI(currentSplit.getPath().getPath())), hadoopConf); } else { // Local File System in = new LocalTsFileInput(Paths.get(currentSplit.getPath().toUri())); } } catch (URISyntaxException e) { throw new FlinkRuntimeException(e); } TsFileSequenceReader reader = new TsFileSequenceReader(in); readTsFile = new ReadOnlyTsFile(reader); queryDataSet = readTsFile.query( // The query method call will change the content of the param query expression, // the original query expression should not be passed to the query method as it may // be used several times. QueryExpression.create(expression.getSelectedSeries(), expression.getExpression()), currentSplit.getStart(), currentSplit.getStart() + currentSplit.getLength()); }
Example #27
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testReadWithProjection() throws IOException { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration()); rowOrcInputFormat.selectFields(7, 0, 10, 8); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); assertEquals(1, splits.length); rowOrcInputFormat.openInputFormat(); rowOrcInputFormat.open(splits[0]); assertFalse(rowOrcInputFormat.reachedEnd()); Row row = rowOrcInputFormat.nextRecord(null); // validate first row assertNotNull(row); assertEquals(4, row.getArity()); // check binary assertArrayEquals(new byte[]{0, 1, 2, 3, 4}, (byte[]) row.getField(0)); // check boolean assertEquals(false, row.getField(1)); // check list assertTrue(row.getField(2) instanceof Object[]); Object[] list1 = (Object[]) row.getField(2); assertEquals(2, list1.length); assertEquals(Row.of(3, "good"), list1[0]); assertEquals(Row.of(4, "bad"), list1[1]); // check string assertEquals("hi", row.getField(3)); // check that there is a second row with four fields assertFalse(rowOrcInputFormat.reachedEnd()); row = rowOrcInputFormat.nextRecord(null); assertNotNull(row); assertEquals(4, row.getArity()); assertTrue(rowOrcInputFormat.reachedEnd()); }
Example #28
Source File: RowCsvInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private static FileInputSplit createTempFile(String content) throws IOException { File tempFile = File.createTempFile("test_contents", "tmp"); tempFile.deleteOnExit(); OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile), StandardCharsets.UTF_8); wrt.write(content); wrt.close(); return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[]{"localhost"}); }
Example #29
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDecimalPredicate() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration()); rowOrcInputFormat.addPredicate( new OrcRowInputFormat.Not( // decimal pred new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5)))); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); rowOrcInputFormat.openInputFormat(); // mock options to check configuration of ORC reader OrcRowInputFormat spy = spy(rowOrcInputFormat); Reader.Options options = new Reader.Options(); doReturn(options).when(spy).getOptions(any()); spy.openInputFormat(); spy.open(splits[0]); // verify predicate configuration SearchArgument sarg = options.getSearchArgument(); assertNotNull(sarg); assertEquals("(not leaf-0)", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString()); }
Example #30
Source File: RowCsvInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void ignoreMultiCharPrefixComments() throws Exception { String fileContent = "//description of the data\n" + "//successive commented line\n" + "this is|1|2.0|\n" + "a test|3|4.0|\n" + "//next|5|6.0|\n"; FileInputSplit split = createTempFile(fileContent); TypeInformation[] fieldTypes = new TypeInformation[]{ BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO}; RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|"); format.setCommentPrefix("//"); format.configure(new Configuration()); format.open(split); Row result = new Row(3); result = format.nextRecord(result); assertNotNull(result); assertEquals("this is", result.getField(0)); assertEquals(1, result.getField(1)); assertEquals(2.0, result.getField(2)); result = format.nextRecord(result); assertNotNull(result); assertEquals("a test", result.getField(0)); assertEquals(3, result.getField(1)); assertEquals(4.0, result.getField(2)); result = format.nextRecord(result); assertNull(result); }