Java Code Examples for org.apache.flink.core.fs.FileSystem

The following examples show how to use org.apache.flink.core.fs.FileSystem. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Alink   Source File: CsvSinkBatchOp.java    License: Apache License 2.0 7 votes vote down vote up
@Override
public CsvSinkBatchOp sinkFrom(BatchOperator in) {
    final String filePath = getFilePath();
    final String fieldDelim = getFieldDelimiter();
    final int numFiles = getNumFiles();
    final TypeInformation[] types = in.getColTypes();
    final Character quoteChar = getQuoteChar();

    FileSystem.WriteMode mode = FileSystem.WriteMode.NO_OVERWRITE;
    if (getOverwriteSink()) {
        mode = FileSystem.WriteMode.OVERWRITE;
    }

    DataSet<String> textLines = ((DataSet<Row>) in.getDataSet())
        .map(new CsvUtil.FormatCsvFunc(types, fieldDelim, quoteChar))
        .map(new MapFunction<Row, String>() {
            @Override
            public String map(Row value) throws Exception {
                return (String) value.getField(0);
            }
        });

    textLines.writeAsText(filePath, mode).name("csv_sink").setParallelism(numFiles);
    return this;
}
 
Example 2
Source Project: Flink-CEPplus   Source File: HadoopMapFunctionITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testConfigurableMapper() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	JobConf conf = new JobConf();
	conf.set("my.filterPrefix", "Hello");

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
	DataSet<Tuple2<IntWritable, Text>> hellos = ds.
			flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new ConfigurableMapper(), conf));

	String resultPath = tempFolder.newFile().toURI().toString();

	hellos.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();

	String expected = "(2,Hello)\n" +
			"(3,Hello world)\n" +
			"(4,Hello world, how are you?)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 3
Source Project: Flink-CEPplus   Source File: AvroOutputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCompression() throws Exception {
	// given
	final Path outputPath = new Path(File.createTempFile("avro-output-file", "avro").getAbsolutePath());
	final AvroOutputFormat<User> outputFormat = new AvroOutputFormat<>(outputPath, User.class);
	outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);

	final Path compressedOutputPath = new Path(File.createTempFile("avro-output-file", "compressed.avro").getAbsolutePath());
	final AvroOutputFormat<User> compressedOutputFormat = new AvroOutputFormat<>(compressedOutputPath, User.class);
	compressedOutputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
	compressedOutputFormat.setCodec(AvroOutputFormat.Codec.SNAPPY);

	// when
	output(outputFormat);
	output(compressedOutputFormat);

	// then
	assertTrue(fileSize(outputPath) > fileSize(compressedOutputPath));

	// cleanup
	FileSystem fs = FileSystem.getLocalFileSystem();
	fs.delete(outputPath, false);
	fs.delete(compressedOutputPath, false);
}
 
Example 4
/**
 * Tests that the underlying stream file is deleted if the closeAndGetHandle method fails.
 */
@Test
public void testCleanupWhenFailingCloseAndGetHandle() throws IOException {
	final Path folder = new Path(tmp.newFolder().toURI());
	final String fileName = "test_name";
	final Path filePath = new Path(folder, fileName);

	final FileSystem fs = spy(new TestFs((path) -> new FailingCloseStream(new File(path.getPath()))));

	FSDataOutputStream stream = createTestStream(fs, folder, fileName);
	stream.write(new byte[] {1, 2, 3, 4, 5});

	try {
		closeAndGetResult(stream);
		fail("Expected IOException");
	}
	catch (IOException ignored) {
		// expected exception
	}

	verify(fs).delete(filePath, false);
}
 
Example 5
Source Project: flink   Source File: BinaryInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
protected List<FileStatus> getFiles() throws IOException {
	// get all the files that are involved in the splits
	List<FileStatus> files = new ArrayList<>();

	for (Path filePath: getFilePaths()) {
		final FileSystem fs = filePath.getFileSystem();
		final FileStatus pathFile = fs.getFileStatus(filePath);

		if (pathFile.isDir()) {
			// input is directory. list all contained files
			final FileStatus[] partials = fs.listStatus(filePath);
			for (FileStatus partial : partials) {
				if (!partial.isDir()) {
					files.add(partial);
				}
			}
		} else {
			files.add(pathFile);
		}
	}
	return files;
}
 
Example 6
Source Project: flink   Source File: FileReadFunction.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void flatMap(Tuple3<String, Long, Long> value, Collector<String> out) throws Exception {
	FSDataInputStream stream = FileSystem.get(new URI(value.f0)).open(new Path(value.f0));
	stream.seek(value.f1);

	BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
	String line;

	try {
		while ((line = reader.readLine()) != null && (value.f2 == -1L || stream.getPos() <= value.f2)) {
			out.collect(line);
		}
	} finally {
		reader.close();
	}
}
 
Example 7
Source Project: gelly-streaming   Source File: TestGetDegrees.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGetInDegrees() throws Exception {
	/*
	 * Test getInDegrees() with the sample graph
     */
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,1\n" +
               "2,1\n" +
               "3,1\n" +
               "3,2\n" +
               "4,1\n" +
               "5,1\n" +
               "5,2\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	GraphStream<Long, NullValue, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);

	graph.getInDegrees().writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();

       compareResultsByLinesInMemory(expectedResult, resultPath);
   }
 
Example 8
Source Project: Flink-CEPplus   Source File: CsvOutputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNullDisallowOnDefault() throws Exception {
	final CsvOutputFormat<Tuple3<String, String, Integer>> csvOutputFormat = new CsvOutputFormat<>(new Path(path));
	try {
		csvOutputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
		csvOutputFormat.setOutputDirectoryMode(FileOutputFormat.OutputDirectoryMode.PARONLY);
		csvOutputFormat.open(0, 1);
		try {
			csvOutputFormat.writeRecord(new Tuple3<String, String, Integer>("One", null, 8));
			fail("should fail with an exception");
		} catch (RuntimeException e) {
			// expected
		}

	}
	finally {
		csvOutputFormat.close();
	}
}
 
Example 9
Source Project: Flink-CEPplus   Source File: TypeSerializerFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected BinaryOutputFormat<Tuple2<Integer, String>> createOutputFormat(String path, Configuration configuration) throws IOException {
	TypeSerializerOutputFormat<Tuple2<Integer, String>> outputFormat = new
			TypeSerializerOutputFormat<Tuple2<Integer, String>>();

	outputFormat.setSerializer(serializer);
	outputFormat.setOutputFilePath(new Path(path));
	outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);

	configuration = configuration == null ? new Configuration() : configuration;

	outputFormat.configure(configuration);
	outputFormat.open(0, 1);

	return outputFormat;
}
 
Example 10
Source Project: flink   Source File: ContinuousFileMonitoringFunction.java    License: Apache License 2.0 6 votes vote down vote up
private void monitorDirAndForwardSplits(FileSystem fs,
										SourceContext<TimestampedFileInputSplit> context) throws IOException {
	assert (Thread.holdsLock(checkpointLock));

	Map<Path, FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(path));
	Map<Long, List<TimestampedFileInputSplit>> splitsSortedByModTime = getInputSplitsSortedByModTime(eligibleFiles);

	for (Map.Entry<Long, List<TimestampedFileInputSplit>> splits: splitsSortedByModTime.entrySet()) {
		long modificationTime = splits.getKey();
		for (TimestampedFileInputSplit split: splits.getValue()) {
			LOG.info("Forwarding split: " + split);
			context.collect(split);
		}
		// update the global modification time
		globalModificationTime = Math.max(globalModificationTime, modificationTime);
	}
}
 
Example 11
Source Project: Flink-CEPplus   Source File: FileUtils.java    License: Apache License 2.0 6 votes vote down vote up
private static void addToZip(Path fileOrDirectory, FileSystem fs, Path rootDir, ZipOutputStream out) throws IOException {
	String relativePath = fileOrDirectory.getPath().replace(rootDir.getPath() + '/', "");
	if (fs.getFileStatus(fileOrDirectory).isDir()) {
		out.putNextEntry(new ZipEntry(relativePath + '/'));
		for (FileStatus containedFile : fs.listStatus(fileOrDirectory)) {
			addToZip(containedFile.getPath(), fs, rootDir, out);
		}
	} else {
		ZipEntry entry = new ZipEntry(relativePath);
		out.putNextEntry(entry);

		try (FSDataInputStream in = fs.open(fileOrDirectory)) {
			IOUtils.copyBytes(in, out, false);
		}
		out.closeEntry();
	}
}
 
Example 12
Source Project: Flink-CEPplus   Source File: BucketStateSerializerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSerializationEmpty() throws IOException {
	final File testFolder = tempFolder.newFolder();
	final FileSystem fs = FileSystem.get(testFolder.toURI());
	final RecoverableWriter writer = fs.createRecoverableWriter();

	final Path testBucket = new Path(testFolder.getPath(), "test");

	final BucketState<String> bucketState = new BucketState<>(
			"test", testBucket, Long.MAX_VALUE, null, new HashMap<>());

	final SimpleVersionedSerializer<BucketState<String>> serializer =
			new BucketStateSerializer<>(
					writer.getResumeRecoverableSerializer(),
					writer.getCommitRecoverableSerializer(),
					SimpleVersionedStringSerializer.INSTANCE
			);

	byte[] bytes = SimpleVersionedSerialization.writeVersionAndSerialize(serializer, bucketState);
	final BucketState<String> recoveredState =  SimpleVersionedSerialization.readVersionAndDeSerialize(serializer, bytes);

	Assert.assertEquals(testBucket, recoveredState.getBucketPath());
	Assert.assertNull(recoveredState.getInProgressResumableFile());
	Assert.assertTrue(recoveredState.getCommittableFilesPerCheckpoint().isEmpty());
}
 
Example 13
Source Project: flink   Source File: FsNegativeRunningJobsRegistry.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new registry that writes its files to the given FileSystem at
 * the given working directory path.
 * 
 * <p>The initialization will attempt to write to the given working directory, in
 * order to catch setup/configuration errors early.
 *
 * @param fileSystem The FileSystem to use for the marker files.
 * @param workingDirectory The working directory for files to track the job status.
 *
 * @throws IOException Thrown, if the specified directory cannot be accessed.
 */
public FsNegativeRunningJobsRegistry(FileSystem fileSystem, Path workingDirectory) throws IOException {
	this.fileSystem = checkNotNull(fileSystem, "fileSystem");
	this.basePath = checkNotNull(workingDirectory, "workingDirectory");

	// to be safe, attempt to write to the working directory, to
	// catch problems early
	final Path testFile = new Path(workingDirectory, ".registry_test");
	try {
		createFile(testFile, false);
	}
	catch (IOException e) {
		throw new IOException("Unable to write to working directory: " + workingDirectory, e);
	}
	finally {
		fileSystem.delete(testFile, false);
	}
}
 
Example 14
Source Project: flink   Source File: FileSystemStateStorageHelper.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public RetrievableStateHandle<T> store(T state) throws Exception {
	Exception latestException = null;

	for (int attempt = 0; attempt < 10; attempt++) {
		Path filePath = getNewFilePath();

		try (FSDataOutputStream outStream = fs.create(filePath, FileSystem.WriteMode.NO_OVERWRITE)) {
			InstantiationUtil.serializeObject(outStream, state);
			return new RetrievableStreamStateHandle<T>(filePath, outStream.getPos());
		}
		catch (Exception e) {
			latestException = e;
		}
	}

	throw new Exception("Could not open output stream for state backend", latestException);
}
 
Example 15
Source Project: bahir-flink   Source File: SiddhiCEPITCase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * @see <a href="https://docs.wso2.com/display/CEP300/Joins">https://docs.wso2.com/display/CEP300/Sequences</a>
 */
@Test
public void testUnboundedPojoStreamSimpleSequences() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input1 = env.addSource(new RandomEventSource(5).closeDelay(1500), "input1");
    DataStream<Map<String, Object>> output = SiddhiCEP
        .define("inputStream1", input1.keyBy("name"), "id", "name", "price", "timestamp")
        .union("inputStream2", input1.keyBy("name"), "id", "name", "price", "timestamp")
        .cql(
            "from every s1 = inputStream1[id == 2]+ , "
                + "s2 = inputStream2[id == 3]? "
                + "within 1000 second "
                + "select s1[0].name as n1, s2.name as n2 "
                + "insert into outputStream"
        )
        .returnAsMap("outputStream");

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(1, getLineCount(resultPath));
}
 
Example 16
Source Project: flink-siddhi   Source File: SiddhiCEPITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMultipleUnboundedPojoStreamSimpleUnion() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input1 = env.addSource(new RandomEventSource(10), "input1");
    DataStream<Event> input2 = env.addSource(new RandomEventSource(10), "input2");
    DataStream<Event> input3 = env.addSource(new RandomEventSource(10), "input2");
    DataStream<Event> output = SiddhiCEP
        .define("inputStream1", input1, "id", "name", "price", "timestamp")
        .union("inputStream2", input2, "id", "name", "price", "timestamp")
        .union("inputStream3", input3, "id", "name", "price", "timestamp")
        .cql(
            "from inputStream1 select timestamp, id, name, price insert into outputStream;"
                + "from inputStream2 select timestamp, id, name, price insert into outputStream;"
                + "from inputStream3 select timestamp, id, name, price insert into outputStream;"
        )
        .returns("outputStream", Event.class);

    final String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(30, getLineCount(resultPath));
}
 
Example 17
Source Project: flink   Source File: HadoopS3RecoverableWriterITCase.java    License: Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void checkCredentialsAndSetup() throws IOException {
	// check whether credentials exist
	S3TestCredentials.assumeCredentialsAvailable();

	basePath = new Path(S3TestCredentials.getTestBucketUri() + "tests-" + UUID.randomUUID());

	// initialize configuration with valid credentials
	final Configuration conf = new Configuration();
	conf.setString("s3.access.key", S3TestCredentials.getS3AccessKey());
	conf.setString("s3.secret.key", S3TestCredentials.getS3SecretKey());

	conf.setLong(PART_UPLOAD_MIN_SIZE, PART_UPLOAD_MIN_SIZE_VALUE);
	conf.setInteger(MAX_CONCURRENT_UPLOADS, MAX_CONCURRENT_UPLOADS_VALUE);

	final String defaultTmpDir = TEMP_FOLDER.getRoot().getAbsolutePath() + "s3_tmp_dir";
	conf.setString(CoreOptions.TMP_DIRS, defaultTmpDir);

	FileSystem.initialize(conf);

	skipped = false;
}
 
Example 18
Source Project: gelly-streaming   Source File: TestMapEdges.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWithSameType() throws Exception {
	/*
	 * Test mapEdges() keeping the same edge types
     */
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,2,13\n" +
               "1,3,14\n" +
               "2,3,24\n" +
               "3,4,35\n" +
               "3,5,36\n" +
               "4,5,46\n" +
               "5,1,52\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	GraphStream<Long, NullValue, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	graph.mapEdges(new AddOneMapper())
               .getEdges()
               .writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();
       compareResultsByLinesInMemory(expectedResult, resultPath);
   }
 
Example 19
Source Project: flink   Source File: HadoopMapFunctionITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testConfigurableMapper() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	JobConf conf = new JobConf();
	conf.set("my.filterPrefix", "Hello");

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
	DataSet<Tuple2<IntWritable, Text>> hellos = ds.
			flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new ConfigurableMapper(), conf));

	String resultPath = tempFolder.newFile().toURI().toString();

	hellos.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();

	String expected = "(2,Hello)\n" +
			"(3,Hello world)\n" +
			"(4,Hello world, how are you?)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 20
Source Project: Flink-CEPplus   Source File: HadoopSwiftFileSystemITCase.java    License: Apache License 2.0 6 votes vote down vote up
@AfterClass
public static void cleanUp() throws IOException {
	if (!skipTest) {
		// initialize configuration with valid credentials
		final Configuration conf = createConfiguration();
		FileSystem.initialize(conf);

		final Path directory = new Path("swift://" + CONTAINER + '.' + SERVICENAME + '/' + TEST_DATA_DIR);
		final FileSystem fs = directory.getFileSystem();

		// clean up
		fs.delete(directory, true);

		// now directory must be gone
		assertFalse(fs.exists(directory));

		// reset configuration
		FileSystem.initialize(new Configuration());
	}
}
 
Example 21
Source Project: flink   Source File: LocalFileSystemTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testRenameToNonEmptyTargetDir() throws IOException {
	final FileSystem fs = FileSystem.getLocalFileSystem();

	// a source folder with a file
	final File srcFolder = temporaryFolder.newFolder();
	final File srcFile = new File(srcFolder, "someFile.txt");
	assertTrue(srcFile.createNewFile());

	// a non-empty destination folder
	final File dstFolder = temporaryFolder.newFolder();
	final File dstFile  = new File(dstFolder, "target");
	assertTrue(dstFile.createNewFile());

	// this cannot succeed because the destination folder is not empty
	assertFalse(fs.rename(new Path(srcFolder.toURI()), new Path(dstFolder.toURI())));

	// retry after deleting the occupying target file
	assertTrue(dstFile.delete());
	assertTrue(fs.rename(new Path(srcFolder.toURI()), new Path(dstFolder.toURI())));
	assertTrue(new File(dstFolder, srcFile.getName()).exists());
}
 
Example 22
Source Project: gelly-streaming   Source File: TestSlice.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReduceOnNeighborsDefault() throws Exception {
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,25\n" +
               "2,23\n" +
               "3,69\n" +
               "4,45\n" +
               "5,51\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	SimpleEdgeStream<Long, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	DataStream<Tuple2<Long, Long>> sum = graph.slice(Time.of(1, TimeUnit.SECONDS))
		.reduceOnEdges(new SumEdgeValuesReduce());
	sum.writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();
       compareResultsByLinesInMemory(expectedResult, resultPath);
}
 
Example 23
Source Project: flink-siddhi   Source File: SiddhiCEPITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testUnboundedPojoStreamAndReturnPojo() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input = env.addSource(new RandomEventSource(5));
    input.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Event>() {
        @Override
        public long extractAscendingTimestamp(Event element) {
            return element.getTimestamp();
        }
    });

    DataStream<Event> output = SiddhiCEP
        .define("inputStream", input, "id", "name", "price", "timestamp")
        .cql("from inputStream select timestamp, id, name, price insert into  outputStream")
        .returns("outputStream", Event.class);

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(5, getLineCount(resultPath));
}
 
Example 24
Source Project: gelly-streaming   Source File: TestGraphStreamCreation.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testProgram() throws Exception {
       /*
	 * Test create() with vertex and edge data streams
     */
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1,2,12\n" +
               "1,3,13\n" +
               "2,3,23\n" +
               "3,4,34\n" +
               "3,5,35\n" +
               "4,5,45\n" +
               "5,1,51\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	GraphStream<Long, NullValue, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	graph.getEdges().writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();
       compareResultsByLinesInMemory(expectedResult, resultPath);
   }
 
Example 25
/**
 * This recreates the new working directory of the recovered RocksDB instance and links/copies the contents from
 * a local state.
 */
private void restoreInstanceDirectoryFromPath(Path source, String instanceRocksDBPath) throws IOException {

	FileSystem fileSystem = source.getFileSystem();

	final FileStatus[] fileStatuses = fileSystem.listStatus(source);

	if (fileStatuses == null) {
		throw new IOException("Cannot list file statues. Directory " + source + " does not exist.");
	}

	for (FileStatus fileStatus : fileStatuses) {
		final Path filePath = fileStatus.getPath();
		final String fileName = filePath.getName();
		File restoreFile = new File(source.getPath(), fileName);
		File targetFile = new File(instanceRocksDBPath, fileName);
		if (fileName.endsWith(SST_FILE_SUFFIX)) {
			// hardlink'ing the immutable sst-files.
			Files.createLink(targetFile.toPath(), restoreFile.toPath());
		} else {
			// true copy for all other files.
			Files.copy(restoreFile.toPath(), targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
		}
	}
}
 
Example 26
Source Project: flink   Source File: DistributedCacheDfsTest.java    License: Apache License 2.0 5 votes vote down vote up
private static Path writeFile(FileSystem dfs, Path rootDir, String fileName) throws IOException {
	Path file = new Path(rootDir, fileName);
	try (
		DataOutputStream outStream = new DataOutputStream(dfs.create(file,
			FileSystem.WriteMode.OVERWRITE))) {
		outStream.writeUTF(testFileContent);
	}
	return file;
}
 
Example 27
Source Project: flink   Source File: FileOutputFormat.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Initialization of the distributed file system if it is used.
 *
 * @param parallelism The task parallelism.
 */
@Override
public void initializeGlobal(int parallelism) throws IOException {
	final Path path = getOutputFilePath();
	final FileSystem fs = path.getFileSystem();
	
	// only distributed file systems can be initialized at start-up time.
	if (fs.isDistributedFS()) {
		
		final WriteMode writeMode = getWriteMode();
		final OutputDirectoryMode outDirMode = getOutputDirectoryMode();

		if (parallelism == 1 && outDirMode == OutputDirectoryMode.PARONLY) {
			// output is not written in parallel and should be written to a single file.
			// prepare distributed output path
			if(!fs.initOutPathDistFS(path, writeMode, false)) {
				// output preparation failed! Cancel task.
				throw new IOException("Output path could not be initialized.");
			}

		} else {
			// output should be written to a directory

			// only distributed file systems can be initialized at start-up time.
			if(!fs.initOutPathDistFS(path, writeMode, true)) {
				throw new IOException("Output directory could not be created.");
			}
		}
	}
}
 
Example 28
Source Project: flink   Source File: FileUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static Path expandDirectory(Path file, Path targetDirectory) throws IOException {
	FileSystem sourceFs = file.getFileSystem();
	FileSystem targetFs = targetDirectory.getFileSystem();
	Path rootDir = null;
	try (ZipInputStream zis = new ZipInputStream(sourceFs.open(file))) {
		ZipEntry entry;
		while ((entry = zis.getNextEntry()) != null) {
			Path relativePath = new Path(entry.getName());
			if (rootDir == null) {
				// the first entry contains the name of the original directory that was zipped
				rootDir = relativePath;
			}

			Path newFile = new Path(targetDirectory, relativePath);
			if (entry.isDirectory()) {
				targetFs.mkdirs(newFile);
			} else {
				try (FSDataOutputStream fileStream = targetFs.create(newFile, FileSystem.WriteMode.NO_OVERWRITE)) {
					// do not close the streams here as it prevents access to further zip entries
					IOUtils.copyBytes(zis, fileStream, false);
				}
			}
			zis.closeEntry();
		}
	}
	return new Path(targetDirectory, rootDir);
}
 
Example 29
Source Project: Flink-CEPplus   Source File: TextOutputFormatITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void failPathWriteMode() throws Exception {
	OutputFormatTestPrograms.wordCountToText(WordCountData.TEXT, resultPath);
	try {
		OutputFormatTestPrograms.wordCountToText(WordCountData.TEXT, resultPath, FileSystem.WriteMode.NO_OVERWRITE);
		fail("File should exist.");
	} catch (Exception e) {
		assertTrue(e.getCause().getMessage().contains("File already exists"));
	}
}
 
Example 30
Source Project: flink   Source File: HDFSTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testChangingFileNames() {
	org.apache.hadoop.fs.Path hdfsPath = new org.apache.hadoop.fs.Path(hdfsURI + "/hdfsTest");
	Path path = new Path(hdfsPath.toString());

	String type = "one";
	TextOutputFormat<String> outputFormat = new TextOutputFormat<>(path);

	outputFormat.setWriteMode(FileSystem.WriteMode.NO_OVERWRITE);
	outputFormat.setOutputDirectoryMode(FileOutputFormat.OutputDirectoryMode.ALWAYS);

	try {
		outputFormat.open(0, 2);
		outputFormat.writeRecord(type);
		outputFormat.close();

		outputFormat.open(1, 2);
		outputFormat.writeRecord(type);
		outputFormat.close();

		assertTrue("No result file present", hdfs.exists(hdfsPath));
		FileStatus[] files = hdfs.listStatus(hdfsPath);
		Assert.assertEquals(2, files.length);
		for (FileStatus file : files) {
			assertTrue("1".equals(file.getPath().getName()) || "2".equals(file.getPath().getName()));
		}

	} catch (IOException e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}