Java Code Examples for org.apache.hadoop.fs.FileSystem#create()
The following examples show how to use
org.apache.hadoop.fs.FileSystem#create() .
These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop-gpu File: DistributedPentomino.java License: Apache License 2.0 | 6 votes |
/** * Create the input file with all of the possible combinations of the * given depth. * @param fs the filesystem to write into * @param dir the directory to write the input file into * @param pent the puzzle * @param depth the depth to explore when generating prefixes */ private static void createInputDirectory(FileSystem fs, Path dir, Pentomino pent, int depth ) throws IOException { fs.mkdirs(dir); List<int[]> splits = pent.getSplits(depth); PrintStream file = new PrintStream(new BufferedOutputStream (fs.create(new Path(dir, "part1")), 64*1024)); for(int[] prefix: splits) { for(int i=0; i < prefix.length; ++i) { if (i != 0) { file.print(','); } file.print(prefix[i]); } file.print('\n'); } file.close(); }
Example 2
Source Project: tajo File: TajoMaster.java License: Apache License 2.0 | 6 votes |
private void writeSystemConf() throws IOException { // Storing the system configs Path systemConfPath = TajoConf.getSystemConfPath(systemConf); if (!defaultFS.exists(systemConfPath.getParent())) { defaultFS.mkdirs(systemConfPath.getParent()); } if (defaultFS.exists(systemConfPath)) { defaultFS.delete(systemConfPath, false); } // In TajoMaster HA, some master might see LeaseExpiredException because of lease mismatch. Thus, // we need to create below xml file at HdfsServiceTracker::writeSystemConf. if (!systemConf.getBoolVar(TajoConf.ConfVars.TAJO_MASTER_HA_ENABLE)) { try (FSDataOutputStream out = FileSystem.create(defaultFS, systemConfPath, new FsPermission(SYSTEM_CONF_FILE_PERMISSION))) { systemConf.writeXml(out); } defaultFS.setReplication(systemConfPath, (short) systemConf.getIntVar(ConfVars.SYSTEM_CONF_REPLICA_COUNT)); } }
Example 3
Source Project: hadoop File: TestIFile.java License: Apache License 2.0 | 6 votes |
@Test /** Same as above but create a reader. */ public void testIFileReaderWithCodec() throws Exception { Configuration conf = new Configuration(); FileSystem localFs = FileSystem.getLocal(conf); FileSystem rfs = ((LocalFileSystem)localFs).getRaw(); Path path = new Path(new Path("build/test.ifile"), "data"); DefaultCodec codec = new GzipCodec(); codec.setConf(conf); FSDataOutputStream out = rfs.create(path); IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, out, Text.class, Text.class, codec, null); writer.close(); FSDataInputStream in = rfs.open(path); IFile.Reader<Text, Text> reader = new IFile.Reader<Text, Text>(conf, in, rfs.getFileStatus(path).getLen(), codec, null); reader.close(); // test check sum byte[] ab= new byte[100]; int readed= reader.checksumIn.readWithChecksum(ab, 0, ab.length); assertEquals( readed,reader.checksumIn.getChecksum().length); }
Example 4
Source Project: phoenix File: RegexBulkLoadToolIT.java License: Apache License 2.0 | 5 votes |
@Ignore @Test public void testImportWithIndex() throws Exception { Statement stmt = conn.createStatement(); stmt.execute("CREATE TABLE TABLE3 (ID INTEGER NOT NULL PRIMARY KEY, " + "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)"); String ddl = "CREATE INDEX TABLE3_IDX ON TABLE3 " + " (FIRST_NAME ASC)" + " INCLUDE (LAST_NAME)"; stmt.execute(ddl); FileSystem fs = FileSystem.get(getUtility().getConfiguration()); FSDataOutputStream outputStream = fs.create(new Path("/tmp/input3.csv")); PrintWriter printWriter = new PrintWriter(outputStream); printWriter.println("1,FirstName 1,LastName 1"); printWriter.println("2,FirstName 2,LastName 2"); printWriter.close(); RegexBulkLoadTool regexBulkLoadTool = new RegexBulkLoadTool(); regexBulkLoadTool.setConf(getUtility().getConfiguration()); int exitCode = regexBulkLoadTool.run(new String[] { "--input", "/tmp/input3.csv", "--table", "table3", "--regex", "([^,]*),([^,]*),([^,]*)", "--zookeeper", zkQuorum}); assertEquals(0, exitCode); ResultSet rs = stmt.executeQuery("SELECT id, FIRST_NAME FROM TABLE3 where first_name='FirstName 2'"); assertTrue(rs.next()); assertEquals(2, rs.getInt(1)); assertEquals("FirstName 2", rs.getString(2)); rs.close(); stmt.close(); }
Example 5
Source Project: RDFS File: TestRaidDfs.java License: Apache License 2.0 | 5 votes |
public static long createTestFile(FileSystem fileSys, Path name, int repl, long fileSize, long blockSize, int seed) throws IOException { CRC32 crc = new CRC32(); Random rand = new Random(seed); FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf().getInt("io.file.buffer.size", 4096), (short)repl, blockSize); LOG.info("create file " + name + " size: " + fileSize + " blockSize: " + blockSize + " repl: " + repl); // fill random data into file byte[] b = new byte[(int)blockSize]; long numBlocks = fileSize / blockSize; for (int i = 0; i < numBlocks; i++) { rand.nextBytes(b); stm.write(b); crc.update(b); } long lastBlock = fileSize - numBlocks * blockSize; if (lastBlock > 0) { b = new byte[(int)lastBlock]; rand.nextBytes(b); stm.write(b); crc.update(b); } stm.close(); return crc.getValue(); }
Example 6
Source Project: sparkboost File: DataUtils.java License: Apache License 2.0 | 5 votes |
/** * Generate a new LibSvm output file giving each document an index corresponding to the index tha documents had on * original input LibSvm file. * * @param sc The spark context. * @param dataFile The data file. * @param outputFile The output file. */ public static void generateLibSvmFileWithIDs(JavaSparkContext sc, String dataFile, String outputFile) { if (sc == null) throw new NullPointerException("The Spark Context is 'null'"); if (dataFile == null || dataFile.isEmpty()) throw new IllegalArgumentException("The dataFile is 'null'"); ArrayList<MultilabelPoint> points = new ArrayList<>(); try { Path pt = new Path(dataFile); FileSystem fs = FileSystem.get(pt.toUri(), new Configuration()); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(pt))); Path ptOut = new Path(outputFile); BufferedWriter bw = new BufferedWriter((new OutputStreamWriter(fs.create(ptOut)))); try { int docID = 0; String line = br.readLine(); while (line != null) { bw.write("" + docID + "\t" + line + "\n"); line = br.readLine(); docID++; } } finally { br.close(); bw.close(); } } catch (Exception e) { throw new RuntimeException("Reading input LibSVM data file", e); } }
Example 7
Source Project: RDFS File: JobControlTestUtils.java License: Apache License 2.0 | 5 votes |
/** * Generates data that can be used for Job Control tests. * * @param fs FileSystem to create data in. * @param dirPath Path to create the data in. * @throws IOException If an error occurs creating the data. */ static void generateData(FileSystem fs, Path dirPath) throws IOException { FSDataOutputStream out = fs.create(new Path(dirPath, "data.txt")); for (int i = 0; i < 10000; i++) { String line = generateRandomLine(); out.write(line.getBytes("UTF-8")); } out.close(); }
Example 8
Source Project: components File: AvroHdfsFileSink.java License: Apache License 2.0 | 5 votes |
@Override protected void mergeOutput(FileSystem fs, String sourceFolder, String targetFile) throws IOException { try (DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>())) { FileStatus[] sourceStatuses = FileSystemUtil.listSubFiles(fs, sourceFolder); Schema schema = null; String inputCodec = null; OutputStream output = new BufferedOutputStream(fs.create(new Path(targetFile))); for (FileStatus sourceStatus : sourceStatuses) { try (DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>( new BufferedInputStream(fs.open(sourceStatus.getPath())), new GenericDatumReader<GenericRecord>())) { if (schema == null) { schema = reader.getSchema(); for (String key : reader.getMetaKeys()) { if (!DataFileWriter.isReservedMeta(key)) { writer.setMeta(key, reader.getMeta(key)); } } inputCodec = reader.getMetaString(DataFileConstants.CODEC); if (inputCodec == null) { inputCodec = DataFileConstants.NULL_CODEC; } writer.setCodec(CodecFactory.fromString(inputCodec)); writer.create(schema, output); } writer.appendAllFrom(reader, false); } } } }
Example 9
Source Project: aegisthus File: Distcp.java License: Apache License 2.0 | 5 votes |
protected void writeManifest(Job job, List<FileStatus> files) throws IOException { Path out = new Path(job.getConfiguration().get(OPT_DISTCP_TARGET)); FileSystem fsOut = out.getFileSystem(job.getConfiguration()); DataOutputStream dos = fsOut.create(new Path(out, "_manifest/.manifest")); for (FileStatus file : files) { Path output = new Path(out, file.getPath().getName()); dos.writeBytes(output.toUri().toString()); dos.write('\n'); } dos.close(); }
Example 10
Source Project: RDFS File: TestSeekBug.java License: Apache License 2.0 | 5 votes |
private void writeFile(FileSystem fileSys, Path name) throws IOException { // create and write a file that contains 1MB DataOutputStream stm = fileSys.create(name); byte[] buffer = new byte[ONEMB]; Random rand = new Random(seed); rand.nextBytes(buffer); stm.write(buffer); stm.close(); }
Example 11
Source Project: hadoop-gpu File: TestSetTimes.java License: Apache License 2.0 | 5 votes |
private FSDataOutputStream writeFile(FileSystem fileSys, Path name, int repl) throws IOException { FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf().getInt("io.file.buffer.size", 4096), (short)repl, (long)blockSize); byte[] buffer = new byte[fileSize]; Random rand = new Random(seed); rand.nextBytes(buffer); stm.write(buffer); return stm; }
Example 12
Source Project: succinct File: TestUtils.java License: Apache License 2.0 | 5 votes |
public static FSDataInputStream getStream(ShortBuffer buf) throws IOException { File tmpDir = Files.createTempDir(); Path filePath = new Path(tmpDir.getAbsolutePath() + "/testOut"); FileSystem fs = FileSystem.get(filePath.toUri(), new Configuration()); FSDataOutputStream fOut = fs.create(filePath); buf.rewind(); while (buf.hasRemaining()) { fOut.writeShort(buf.get()); } fOut.close(); buf.rewind(); return fs.open(filePath); }
Example 13
Source Project: big-c File: TestLineRecordReaderJobs.java License: Apache License 2.0 | 5 votes |
/** * Writes the input test file * * @param conf * @throws IOException */ public void createInputFile(Configuration conf) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Path file = new Path(inputDir, "test.txt"); Writer writer = new OutputStreamWriter(localFs.create(file)); writer.write("abc\ndef\t\nghi\njkl"); writer.close(); }
Example 14
Source Project: emr-sample-apps File: CopyFromS3.java License: Apache License 2.0 | 5 votes |
/** * This method constructs the JobConf to be used to run the map reduce job to * download the files from S3. This is a potentially expensive method since it * makes multiple calls to S3 to get a listing of all the input data. Clients * are encouraged to cache the returned JobConf reference and not call this * method multiple times unless necessary. * * @return the JobConf to be used to run the map reduce job to download the * files from S3. */ public JobConf getJobConf() throws IOException, ParseException { JobConf conf = new JobConf(CopyFromS3.class); conf.setJobName("CopyFromS3"); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(S3CopyMapper.class); // We configure a reducer, even though we don't use it right now. // The idea is that, in the future we may. conf.setReducerClass(HDFSWriterReducer.class); conf.setNumReduceTasks(0); FileInputFormat.setInputPaths(conf, new Path(tempFile)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setOutputFormat(TextOutputFormat.class); conf.setCompressMapOutput(true); JobClient jobClient = new JobClient(conf); FileSystem inputFS = FileSystem.get(URI.create(inputPathPrefix), conf); DatePathFilter datePathFilter = new DatePathFilter(startDate, endDate); List<Path> filePaths = getFilePaths(inputFS, new Path(inputPathPrefix), datePathFilter, jobClient.getDefaultMaps()); // Write the file names to a temporary index file to be used // as input to the map tasks. FileSystem outputFS = FileSystem.get(URI.create(tempFile), conf); FSDataOutputStream outputStream = outputFS.create(new Path(tempFile), true); try { for (Path path : filePaths) { outputStream.writeBytes(path.toString() + "\n"); } } finally { outputStream.close(); } conf.setNumMapTasks(Math.min(filePaths.size(), jobClient.getDefaultMaps())); return conf; }
Example 15
Source Project: Knowage-Server File: PersistedHDFSManager.java License: GNU Affero General Public License v3.0 | 5 votes |
public FSDataOutputStream openHdfsFile(String fileName, String folderName) { logger.debug("Begin file opening"); FSDataOutputStream fsOS = null; Path filePath = null; try { FileSystem fs = hdfs.getFs(); filePath = fs.getWorkingDirectory(); if (folderName != null && folderName.length() > 0) { filePath = Path.mergePaths(filePath, new Path(Path.SEPARATOR, folderName)); if (!fs.exists(filePath) || !fs.isDirectory(filePath)) { fs.mkdirs(filePath); } } filePath = Path.mergePaths(filePath, new Path(Path.SEPARATOR + fileName)); boolean existsFile = fs.exists(filePath); if (existsFile) { logger.debug("File is already present in folder, it will be deleted and replaced with new file"); fs.delete(filePath, true); } fsOS = fs.create(filePath, true); } catch (IOException e) { logger.error("Impossible to open file in File System"); throw new SpagoBIRuntimeException("Impossible to open file in File System" + e); } logger.debug("File opened"); return fsOS; }
Example 16
Source Project: big-c File: UtilsForTests.java License: Apache License 2.0 | 5 votes |
/** * This creates a file in the dfs * @param dfs FileSystem Local File System where file needs to be picked * @param URIPATH Path dfs path where file needs to be copied * @param permission FsPermission File permission * @return returns the DataOutputStream */ public static DataOutputStream createTmpFileDFS(FileSystem dfs, Path URIPATH, FsPermission permission, String input) throws Exception { //Creating the path with the file DataOutputStream file = FileSystem.create(dfs, URIPATH, permission); file.writeBytes(input); file.close(); return file; }
Example 17
Source Project: hadoop-gpu File: TestTrash.java License: Apache License 2.0 | 5 votes |
protected static Path writeFile(FileSystem fs, Path f) throws IOException { DataOutputStream out = fs.create(f); out.writeBytes("dhruba: " + f); out.close(); assertTrue(fs.exists(f)); return f; }
Example 18
Source Project: hadoop File: TestBlockTokenWithDFS.java License: Apache License 2.0 | 4 votes |
private void createFile(FileSystem fs, Path filename) throws IOException { FSDataOutputStream out = fs.create(filename); out.write(rawData); out.close(); }
Example 19
Source Project: big-c File: TestDistributedFileSystem.java License: Apache License 2.0 | 4 votes |
@Test public void testCreateWithCustomChecksum() throws Exception { Configuration conf = getTestConfiguration(); MiniDFSCluster cluster = null; Path testBasePath = new Path("/test/csum"); // create args Path path1 = new Path(testBasePath, "file_wtih_crc1"); Path path2 = new Path(testBasePath, "file_with_crc2"); ChecksumOpt opt1 = new ChecksumOpt(DataChecksum.Type.CRC32C, 512); ChecksumOpt opt2 = new ChecksumOpt(DataChecksum.Type.CRC32, 512); // common args FsPermission perm = FsPermission.getDefault().applyUMask( FsPermission.getUMask(conf)); EnumSet<CreateFlag> flags = EnumSet.of(CreateFlag.OVERWRITE, CreateFlag.CREATE); short repl = 1; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); FileSystem dfs = cluster.getFileSystem(); dfs.mkdirs(testBasePath); // create two files with different checksum types FSDataOutputStream out1 = dfs.create(path1, perm, flags, 4096, repl, 131072L, null, opt1); FSDataOutputStream out2 = dfs.create(path2, perm, flags, 4096, repl, 131072L, null, opt2); for (int i = 0; i < 1024; i++) { out1.write(i); out2.write(i); } out1.close(); out2.close(); // the two checksums must be different. MD5MD5CRC32FileChecksum sum1 = (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path1); MD5MD5CRC32FileChecksum sum2 = (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path2); assertFalse(sum1.equals(sum2)); // check the individual params assertEquals(DataChecksum.Type.CRC32C, sum1.getCrcType()); assertEquals(DataChecksum.Type.CRC32, sum2.getCrcType()); } finally { if (cluster != null) { cluster.getFileSystem().delete(testBasePath, true); cluster.shutdown(); } } }
Example 20
Source Project: RDFS File: TestFileAppend4.java License: Apache License 2.0 | 4 votes |
void replicationTest(int badDN) throws Exception { LOG.info("START"); cluster = new MiniDFSCluster(conf, 3, true, null); FileSystem fs1 = cluster.getFileSystem(); try { int halfBlock = (int)BLOCK_SIZE/2; short rep = 3; // replication assertTrue(BLOCK_SIZE%4 == 0); file1 = new Path("/appendWithReplication.dat"); // write 1/2 block & sync stm = fs1.create(file1, true, (int)BLOCK_SIZE*2, rep, BLOCK_SIZE); AppendTestUtil.write(stm, 0, halfBlock); stm.sync(); assertNumCurrentReplicas(rep); // close one of the datanodes cluster.stopDataNode(badDN); // write 1/4 block & sync AppendTestUtil.write(stm, halfBlock, (int)BLOCK_SIZE/4); stm.sync(); assertNumCurrentReplicas((short)(rep - 1)); // restart the cluster /* * we put the namenode in safe mode first so he doesn't process * recoverBlock() commands from the remaining DFSClient as datanodes * are serially shutdown */ cluster.getNameNode().setSafeMode(SafeModeAction.SAFEMODE_ENTER); fs1.close(); cluster.shutdown(); LOG.info("STOPPED first instance of the cluster"); cluster = new MiniDFSCluster(conf, 3, false, null); cluster.getNameNode().getNamesystem().stallReplicationWork(); cluster.waitActive(); fs1 = cluster.getFileSystem(); LOG.info("START second instance."); recoverFile(fs1); LOG.info("Recovered file"); // the 2 DNs with the larger sequence number should win BlockLocation[] bl = fs1.getFileBlockLocations( fs1.getFileStatus(file1), 0, BLOCK_SIZE); LOG.info("Checking blocks"); assertTrue("Should have one block", bl.length == 1); // Wait up to 1 second for block replication - we may have // only replication 1 for a brief moment after close, since // closing only waits for fs.replcation.min replicas, and // it may take some millis before the other DN reports block waitForBlockReplication(fs1, file1.toString(), 2, 1); assertFileSize(fs1, BLOCK_SIZE*3/4); checkFile(fs1, BLOCK_SIZE*3/4); LOG.info("Checking replication"); // verify that, over time, the block has been replicated to 3 DN cluster.getNameNode().getNamesystem().restartReplicationWork(); waitForBlockReplication(fs1, file1.toString(), 3, 20); } finally { fs1.close(); cluster.shutdown(); } }