Java Code Examples for org.apache.hadoop.fs.FileSystem#create()

The following examples show how to use org.apache.hadoop.fs.FileSystem#create() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may want to check out the right sidebar which shows the related API usage.
Example 1
Source Project: hadoop-gpu   File: DistributedPentomino.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create the input file with all of the possible combinations of the 
 * given depth.
 * @param fs the filesystem to write into
 * @param dir the directory to write the input file into
 * @param pent the puzzle 
 * @param depth the depth to explore when generating prefixes
 */
private static void createInputDirectory(FileSystem fs, 
                                         Path dir,
                                         Pentomino pent,
                                         int depth
                                         ) throws IOException {
  fs.mkdirs(dir);
  List<int[]> splits = pent.getSplits(depth);
  PrintStream file = 
    new PrintStream(new BufferedOutputStream
                    (fs.create(new Path(dir, "part1")), 64*1024));
  for(int[] prefix: splits) {
    for(int i=0; i < prefix.length; ++i) {
      if (i != 0) {
        file.print(',');          
      }
      file.print(prefix[i]);
    }
    file.print('\n');
  }
  file.close();
}
 
Example 2
Source Project: tajo   File: TajoMaster.java    License: Apache License 2.0 6 votes vote down vote up
private void writeSystemConf() throws IOException {
  // Storing the system configs
  Path systemConfPath = TajoConf.getSystemConfPath(systemConf);

  if (!defaultFS.exists(systemConfPath.getParent())) {
    defaultFS.mkdirs(systemConfPath.getParent());
  }

  if (defaultFS.exists(systemConfPath)) {
    defaultFS.delete(systemConfPath, false);
  }

  // In TajoMaster HA, some master might see LeaseExpiredException because of lease mismatch. Thus,
  // we need to create below xml file at HdfsServiceTracker::writeSystemConf.
  if (!systemConf.getBoolVar(TajoConf.ConfVars.TAJO_MASTER_HA_ENABLE)) {
    try (FSDataOutputStream out = FileSystem.create(defaultFS, systemConfPath,
            new FsPermission(SYSTEM_CONF_FILE_PERMISSION))) {
      systemConf.writeXml(out);
    }
    defaultFS.setReplication(systemConfPath, (short) systemConf.getIntVar(ConfVars.SYSTEM_CONF_REPLICA_COUNT));
  }
}
 
Example 3
Source Project: hadoop   File: TestIFile.java    License: Apache License 2.0 6 votes vote down vote up
@Test
/** Same as above but create a reader. */
public void testIFileReaderWithCodec() throws Exception {
  Configuration conf = new Configuration();
  FileSystem localFs = FileSystem.getLocal(conf);
  FileSystem rfs = ((LocalFileSystem)localFs).getRaw();
  Path path = new Path(new Path("build/test.ifile"), "data");
  DefaultCodec codec = new GzipCodec();
  codec.setConf(conf);
  FSDataOutputStream out = rfs.create(path);
  IFile.Writer<Text, Text> writer =
      new IFile.Writer<Text, Text>(conf, out, Text.class, Text.class,
                                   codec, null);
  writer.close();
  FSDataInputStream in = rfs.open(path);
  IFile.Reader<Text, Text> reader =
    new IFile.Reader<Text, Text>(conf, in, rfs.getFileStatus(path).getLen(),
        codec, null);
  reader.close();
  
  // test check sum 
  byte[] ab= new byte[100];
  int readed= reader.checksumIn.readWithChecksum(ab, 0, ab.length);
  assertEquals( readed,reader.checksumIn.getChecksum().length);
  
}
 
Example 4
Source Project: phoenix   File: RegexBulkLoadToolIT.java    License: Apache License 2.0 5 votes vote down vote up
@Ignore
@Test
public void testImportWithIndex() throws Exception {


    Statement stmt = conn.createStatement();
    stmt.execute("CREATE TABLE TABLE3 (ID INTEGER NOT NULL PRIMARY KEY, " +
        "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)");
    String ddl = "CREATE INDEX TABLE3_IDX ON TABLE3 "
            + " (FIRST_NAME ASC)"
            + " INCLUDE (LAST_NAME)";
    stmt.execute(ddl);
    
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input3.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1,FirstName 1,LastName 1");
    printWriter.println("2,FirstName 2,LastName 2");
    printWriter.close();

    RegexBulkLoadTool regexBulkLoadTool = new RegexBulkLoadTool();
    regexBulkLoadTool.setConf(getUtility().getConfiguration());
    int exitCode = regexBulkLoadTool.run(new String[] {
            "--input", "/tmp/input3.csv",
            "--table", "table3",
            "--regex", "([^,]*),([^,]*),([^,]*)",
            "--zookeeper", zkQuorum});
    assertEquals(0, exitCode);

    ResultSet rs = stmt.executeQuery("SELECT id, FIRST_NAME FROM TABLE3 where first_name='FirstName 2'");
    assertTrue(rs.next());
    assertEquals(2, rs.getInt(1));
    assertEquals("FirstName 2", rs.getString(2));

    rs.close();
    stmt.close();
}
 
Example 5
Source Project: RDFS   File: TestRaidDfs.java    License: Apache License 2.0 5 votes vote down vote up
public static long createTestFile(FileSystem fileSys, Path name, int repl,
                      long fileSize, long blockSize, int seed)
  throws IOException {
  CRC32 crc = new CRC32();
  Random rand = new Random(seed);
  FSDataOutputStream stm = fileSys.create(name, true,
                                          fileSys.getConf().getInt("io.file.buffer.size", 4096),
                                          (short)repl, blockSize);
  LOG.info("create file " + name + " size: " + fileSize + " blockSize: " + 
           blockSize + " repl: " + repl);
  // fill random data into file
  byte[] b = new byte[(int)blockSize];
  long numBlocks = fileSize / blockSize;
  for (int i = 0; i < numBlocks; i++) {
    rand.nextBytes(b);
    stm.write(b);
    crc.update(b);
  }
  long lastBlock = fileSize - numBlocks * blockSize;
  if (lastBlock > 0) {
    b = new byte[(int)lastBlock];
    rand.nextBytes(b);
    stm.write(b);
    crc.update(b);
  }
  stm.close();
  return crc.getValue();
}
 
Example 6
Source Project: sparkboost   File: DataUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Generate a new LibSvm output file giving each document an index corresponding to the index tha documents had on
 * original input LibSvm file.
 *
 * @param sc         The spark context.
 * @param dataFile   The data file.
 * @param outputFile The output file.
 */
public static void generateLibSvmFileWithIDs(JavaSparkContext sc, String dataFile, String outputFile) {
    if (sc == null)
        throw new NullPointerException("The Spark Context is 'null'");
    if (dataFile == null || dataFile.isEmpty())
        throw new IllegalArgumentException("The dataFile is 'null'");

    ArrayList<MultilabelPoint> points = new ArrayList<>();
    try {
        Path pt = new Path(dataFile);
        FileSystem fs = FileSystem.get(pt.toUri(), new Configuration());
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(pt)));

        Path ptOut = new Path(outputFile);
        BufferedWriter bw = new BufferedWriter((new OutputStreamWriter(fs.create(ptOut))));

        try {
            int docID = 0;
            String line = br.readLine();
            while (line != null) {
                bw.write("" + docID + "\t" + line + "\n");
                line = br.readLine();
                docID++;
            }
        } finally {
            br.close();
            bw.close();
        }
    } catch (Exception e) {
        throw new RuntimeException("Reading input LibSVM data file", e);
    }

}
 
Example 7
Source Project: RDFS   File: JobControlTestUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Generates data that can be used for Job Control tests.
 * 
 * @param fs FileSystem to create data in.
 * @param dirPath Path to create the data in.
 * @throws IOException If an error occurs creating the data.
 */
static void generateData(FileSystem fs, Path dirPath) throws IOException {
  FSDataOutputStream out = fs.create(new Path(dirPath, "data.txt"));
  for (int i = 0; i < 10000; i++) {
    String line = generateRandomLine();
    out.write(line.getBytes("UTF-8"));
  }
  out.close();
}
 
Example 8
Source Project: components   File: AvroHdfsFileSink.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected void mergeOutput(FileSystem fs, String sourceFolder, String targetFile) throws IOException {
    try (DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>())) {
        FileStatus[] sourceStatuses = FileSystemUtil.listSubFiles(fs, sourceFolder);
        Schema schema = null;
        String inputCodec = null;
        OutputStream output = new BufferedOutputStream(fs.create(new Path(targetFile)));
        for (FileStatus sourceStatus : sourceStatuses) {
            try (DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(
                    new BufferedInputStream(fs.open(sourceStatus.getPath())), new GenericDatumReader<GenericRecord>())) {

                if (schema == null) {
                    schema = reader.getSchema();
                    for (String key : reader.getMetaKeys()) {
                        if (!DataFileWriter.isReservedMeta(key)) {
                            writer.setMeta(key, reader.getMeta(key));
                        }
                    }
                    inputCodec = reader.getMetaString(DataFileConstants.CODEC);
                    if (inputCodec == null) {
                        inputCodec = DataFileConstants.NULL_CODEC;
                    }
                    writer.setCodec(CodecFactory.fromString(inputCodec));
                    writer.create(schema, output);
                }
                writer.appendAllFrom(reader, false);
            }
        }
    }
}
 
Example 9
Source Project: aegisthus   File: Distcp.java    License: Apache License 2.0 5 votes vote down vote up
protected void writeManifest(Job job, List<FileStatus> files) throws IOException {
	Path out = new Path(job.getConfiguration().get(OPT_DISTCP_TARGET));
	FileSystem fsOut = out.getFileSystem(job.getConfiguration());
	DataOutputStream dos = fsOut.create(new Path(out, "_manifest/.manifest"));
	for (FileStatus file : files) {
		Path output = new Path(out, file.getPath().getName());
		dos.writeBytes(output.toUri().toString());
		dos.write('\n');
	}
	dos.close();
}
 
Example 10
Source Project: RDFS   File: TestSeekBug.java    License: Apache License 2.0 5 votes vote down vote up
private void writeFile(FileSystem fileSys, Path name) throws IOException {
  // create and write a file that contains 1MB
  DataOutputStream stm = fileSys.create(name);
  byte[] buffer = new byte[ONEMB];
  Random rand = new Random(seed);
  rand.nextBytes(buffer);
  stm.write(buffer);
  stm.close();
}
 
Example 11
Source Project: hadoop-gpu   File: TestSetTimes.java    License: Apache License 2.0 5 votes vote down vote up
private FSDataOutputStream writeFile(FileSystem fileSys, Path name, int repl)
  throws IOException {
  FSDataOutputStream stm = fileSys.create(name, true, 
                                          fileSys.getConf().getInt("io.file.buffer.size", 4096),
                                          (short)repl, (long)blockSize);
  byte[] buffer = new byte[fileSize];
  Random rand = new Random(seed);
  rand.nextBytes(buffer);
  stm.write(buffer);
  return stm;
}
 
Example 12
Source Project: succinct   File: TestUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static FSDataInputStream getStream(ShortBuffer buf) throws IOException {
  File tmpDir = Files.createTempDir();
  Path filePath = new Path(tmpDir.getAbsolutePath() + "/testOut");
  FileSystem fs = FileSystem.get(filePath.toUri(), new Configuration());
  FSDataOutputStream fOut = fs.create(filePath);
  buf.rewind();
  while (buf.hasRemaining()) {
    fOut.writeShort(buf.get());
  }
  fOut.close();
  buf.rewind();
  return fs.open(filePath);
}
 
Example 13
Source Project: big-c   File: TestLineRecordReaderJobs.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Writes the input test file
 *
 * @param conf
 * @throws IOException
 */
public void createInputFile(Configuration conf) throws IOException {
  FileSystem localFs = FileSystem.getLocal(conf);
  Path file = new Path(inputDir, "test.txt");
  Writer writer = new OutputStreamWriter(localFs.create(file));
  writer.write("abc\ndef\t\nghi\njkl");
  writer.close();
}
 
Example 14
Source Project: emr-sample-apps   File: CopyFromS3.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * This method constructs the JobConf to be used to run the map reduce job to
 * download the files from S3. This is a potentially expensive method since it
 * makes multiple calls to S3 to get a listing of all the input data. Clients
 * are encouraged to cache the returned JobConf reference and not call this
 * method multiple times unless necessary.
 * 
 * @return the JobConf to be used to run the map reduce job to download the
 *         files from S3.
 */
public JobConf getJobConf() throws IOException, ParseException {
  JobConf conf = new JobConf(CopyFromS3.class);
  conf.setJobName("CopyFromS3");
  conf.setOutputKeyClass(NullWritable.class);
  conf.setOutputValueClass(Text.class);
  conf.setMapperClass(S3CopyMapper.class);
  // We configure a reducer, even though we don't use it right now.
  // The idea is that, in the future we may. 
  conf.setReducerClass(HDFSWriterReducer.class);
  conf.setNumReduceTasks(0);

  FileInputFormat.setInputPaths(conf, new Path(tempFile));
  FileOutputFormat.setOutputPath(conf, new Path(outputPath));
  conf.setOutputFormat(TextOutputFormat.class);
  conf.setCompressMapOutput(true);

  JobClient jobClient = new JobClient(conf);

  FileSystem inputFS = FileSystem.get(URI.create(inputPathPrefix), conf);
  DatePathFilter datePathFilter = new DatePathFilter(startDate, endDate);
  List<Path> filePaths = getFilePaths(inputFS, new Path(inputPathPrefix), datePathFilter, jobClient.getDefaultMaps());

  // Write the file names to a temporary index file to be used
  // as input to the map tasks.
  FileSystem outputFS = FileSystem.get(URI.create(tempFile), conf);
  FSDataOutputStream outputStream = outputFS.create(new Path(tempFile), true);
  try {
    for (Path path : filePaths) {
      outputStream.writeBytes(path.toString() + "\n");
    }
  }
  finally {
    outputStream.close();
  }

  conf.setNumMapTasks(Math.min(filePaths.size(), jobClient.getDefaultMaps()));

  return conf;
}
 
Example 15
public FSDataOutputStream openHdfsFile(String fileName, String folderName) {
	logger.debug("Begin file opening");
	FSDataOutputStream fsOS = null;
	Path filePath = null;
	try {
		FileSystem fs = hdfs.getFs();
		filePath = fs.getWorkingDirectory();
		if (folderName != null && folderName.length() > 0) {
			filePath = Path.mergePaths(filePath, new Path(Path.SEPARATOR, folderName));
			if (!fs.exists(filePath) || !fs.isDirectory(filePath)) {
				fs.mkdirs(filePath);
			}
		}
		filePath = Path.mergePaths(filePath, new Path(Path.SEPARATOR + fileName));
		boolean existsFile = fs.exists(filePath);
		if (existsFile) {
			logger.debug("File is already present in folder, it will be deleted and replaced with new file");
			fs.delete(filePath, true);
		}
		fsOS = fs.create(filePath, true);
	} catch (IOException e) {
		logger.error("Impossible to open file in File System");
		throw new SpagoBIRuntimeException("Impossible to open file in File System" + e);
	}
	logger.debug("File opened");
	return fsOS;
}
 
Example 16
Source Project: big-c   File: UtilsForTests.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * This creates a file in the dfs
 * @param dfs FileSystem Local File System where file needs to be picked
 * @param URIPATH Path dfs path where file needs to be copied
 * @param permission FsPermission File permission
 * @return returns the DataOutputStream
 */
public static DataOutputStream
    createTmpFileDFS(FileSystem dfs, Path URIPATH,
    FsPermission permission, String input) throws Exception {
  //Creating the path with the file
  DataOutputStream file =
    FileSystem.create(dfs, URIPATH, permission);
  file.writeBytes(input);
  file.close();
  return file;
}
 
Example 17
Source Project: hadoop-gpu   File: TestTrash.java    License: Apache License 2.0 5 votes vote down vote up
protected static Path writeFile(FileSystem fs, Path f) throws IOException {
  DataOutputStream out = fs.create(f);
  out.writeBytes("dhruba: " + f);
  out.close();
  assertTrue(fs.exists(f));
  return f;
}
 
Example 18
Source Project: hadoop   File: TestBlockTokenWithDFS.java    License: Apache License 2.0 4 votes vote down vote up
private void createFile(FileSystem fs, Path filename) throws IOException {
  FSDataOutputStream out = fs.create(filename);
  out.write(rawData);
  out.close();
}
 
Example 19
Source Project: big-c   File: TestDistributedFileSystem.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testCreateWithCustomChecksum() throws Exception {
  Configuration conf = getTestConfiguration();
  MiniDFSCluster cluster = null;
  Path testBasePath = new Path("/test/csum");
  // create args 
  Path path1 = new Path(testBasePath, "file_wtih_crc1");
  Path path2 = new Path(testBasePath, "file_with_crc2");
  ChecksumOpt opt1 = new ChecksumOpt(DataChecksum.Type.CRC32C, 512);
  ChecksumOpt opt2 = new ChecksumOpt(DataChecksum.Type.CRC32, 512);

  // common args
  FsPermission perm = FsPermission.getDefault().applyUMask(
      FsPermission.getUMask(conf));
  EnumSet<CreateFlag> flags = EnumSet.of(CreateFlag.OVERWRITE,
      CreateFlag.CREATE);
  short repl = 1;

  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    FileSystem dfs = cluster.getFileSystem();

    dfs.mkdirs(testBasePath);

    // create two files with different checksum types
    FSDataOutputStream out1 = dfs.create(path1, perm, flags, 4096, repl,
        131072L, null, opt1);
    FSDataOutputStream out2 = dfs.create(path2, perm, flags, 4096, repl,
        131072L, null, opt2);

    for (int i = 0; i < 1024; i++) {
      out1.write(i);
      out2.write(i);
    }
    out1.close();
    out2.close();

    // the two checksums must be different.
    MD5MD5CRC32FileChecksum sum1 =
        (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path1);
    MD5MD5CRC32FileChecksum sum2 =
        (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path2);
    assertFalse(sum1.equals(sum2));

    // check the individual params
    assertEquals(DataChecksum.Type.CRC32C, sum1.getCrcType());
    assertEquals(DataChecksum.Type.CRC32,  sum2.getCrcType());

  } finally {
    if (cluster != null) {
      cluster.getFileSystem().delete(testBasePath, true);
      cluster.shutdown();
    }
  }
}
 
Example 20
Source Project: RDFS   File: TestFileAppend4.java    License: Apache License 2.0 4 votes vote down vote up
void replicationTest(int badDN) throws Exception {
  LOG.info("START");
  cluster = new MiniDFSCluster(conf, 3, true, null);
  FileSystem fs1 = cluster.getFileSystem();
  try {
    int halfBlock = (int)BLOCK_SIZE/2;
    short rep = 3; // replication
    assertTrue(BLOCK_SIZE%4 == 0);

    file1 = new Path("/appendWithReplication.dat");

    // write 1/2 block & sync
    stm = fs1.create(file1, true, (int)BLOCK_SIZE*2, rep, BLOCK_SIZE);
    AppendTestUtil.write(stm, 0, halfBlock);
    stm.sync();
    assertNumCurrentReplicas(rep);

    // close one of the datanodes
    cluster.stopDataNode(badDN);

    // write 1/4 block & sync
    AppendTestUtil.write(stm, halfBlock, (int)BLOCK_SIZE/4);
    stm.sync();
    assertNumCurrentReplicas((short)(rep - 1));

    // restart the cluster
    /*
     * we put the namenode in safe mode first so he doesn't process
     * recoverBlock() commands from the remaining DFSClient as datanodes
     * are serially shutdown
     */
    cluster.getNameNode().setSafeMode(SafeModeAction.SAFEMODE_ENTER);
    fs1.close();
    cluster.shutdown();
    LOG.info("STOPPED first instance of the cluster");
    cluster = new MiniDFSCluster(conf, 3, false, null);
    cluster.getNameNode().getNamesystem().stallReplicationWork();
    cluster.waitActive();
    fs1 = cluster.getFileSystem();
    LOG.info("START second instance.");

    recoverFile(fs1);
    LOG.info("Recovered file");

    // the 2 DNs with the larger sequence number should win
    BlockLocation[] bl = fs1.getFileBlockLocations(
        fs1.getFileStatus(file1), 0, BLOCK_SIZE);
    LOG.info("Checking blocks");
    assertTrue("Should have one block", bl.length == 1);

    // Wait up to 1 second for block replication - we may have
    // only replication 1 for a brief moment after close, since
    // closing only waits for fs.replcation.min replicas, and
    // it may take some millis before the other DN reports block
    waitForBlockReplication(fs1, file1.toString(), 2, 1);

    assertFileSize(fs1, BLOCK_SIZE*3/4);
    checkFile(fs1, BLOCK_SIZE*3/4);

    LOG.info("Checking replication");
    // verify that, over time, the block has been replicated to 3 DN
    cluster.getNameNode().getNamesystem().restartReplicationWork();
    waitForBlockReplication(fs1, file1.toString(), 3, 20);
  } finally {
    fs1.close();
    cluster.shutdown();
  }
}