Java Code Examples for org.apache.hadoop.fs.FileSystem#create()

The following examples show how to use org.apache.hadoop.fs.FileSystem#create() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestIFile.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
/** Same as above but create a reader. */
public void testIFileReaderWithCodec() throws Exception {
  Configuration conf = new Configuration();
  FileSystem localFs = FileSystem.getLocal(conf);
  FileSystem rfs = ((LocalFileSystem)localFs).getRaw();
  Path path = new Path(new Path("build/test.ifile"), "data");
  DefaultCodec codec = new GzipCodec();
  codec.setConf(conf);
  FSDataOutputStream out = rfs.create(path);
  IFile.Writer<Text, Text> writer =
      new IFile.Writer<Text, Text>(conf, out, Text.class, Text.class,
                                   codec, null);
  writer.close();
  FSDataInputStream in = rfs.open(path);
  IFile.Reader<Text, Text> reader =
    new IFile.Reader<Text, Text>(conf, in, rfs.getFileStatus(path).getLen(),
        codec, null);
  reader.close();
  
  // test check sum 
  byte[] ab= new byte[100];
  int readed= reader.checksumIn.readWithChecksum(ab, 0, ab.length);
  assertEquals( readed,reader.checksumIn.getChecksum().length);
  
}
 
Example 2
Source File: TajoMaster.java    From tajo with Apache License 2.0 6 votes vote down vote up
private void writeSystemConf() throws IOException {
  // Storing the system configs
  Path systemConfPath = TajoConf.getSystemConfPath(systemConf);

  if (!defaultFS.exists(systemConfPath.getParent())) {
    defaultFS.mkdirs(systemConfPath.getParent());
  }

  if (defaultFS.exists(systemConfPath)) {
    defaultFS.delete(systemConfPath, false);
  }

  // In TajoMaster HA, some master might see LeaseExpiredException because of lease mismatch. Thus,
  // we need to create below xml file at HdfsServiceTracker::writeSystemConf.
  if (!systemConf.getBoolVar(TajoConf.ConfVars.TAJO_MASTER_HA_ENABLE)) {
    try (FSDataOutputStream out = FileSystem.create(defaultFS, systemConfPath,
            new FsPermission(SYSTEM_CONF_FILE_PERMISSION))) {
      systemConf.writeXml(out);
    }
    defaultFS.setReplication(systemConfPath, (short) systemConf.getIntVar(ConfVars.SYSTEM_CONF_REPLICA_COUNT));
  }
}
 
Example 3
Source File: DistributedPentomino.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
/**
 * Create the input file with all of the possible combinations of the 
 * given depth.
 * @param fs the filesystem to write into
 * @param dir the directory to write the input file into
 * @param pent the puzzle 
 * @param depth the depth to explore when generating prefixes
 */
private static void createInputDirectory(FileSystem fs, 
                                         Path dir,
                                         Pentomino pent,
                                         int depth
                                         ) throws IOException {
  fs.mkdirs(dir);
  List<int[]> splits = pent.getSplits(depth);
  PrintStream file = 
    new PrintStream(new BufferedOutputStream
                    (fs.create(new Path(dir, "part1")), 64*1024));
  for(int[] prefix: splits) {
    for(int i=0; i < prefix.length; ++i) {
      if (i != 0) {
        file.print(',');          
      }
      file.print(prefix[i]);
    }
    file.print('\n');
  }
  file.close();
}
 
Example 4
Source File: TestSetTimes.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
private FSDataOutputStream writeFile(FileSystem fileSys, Path name, int repl)
  throws IOException {
  FSDataOutputStream stm = fileSys.create(name, true, 
                                          fileSys.getConf().getInt("io.file.buffer.size", 4096),
                                          (short)repl, (long)blockSize);
  byte[] buffer = new byte[fileSize];
  Random rand = new Random(seed);
  rand.nextBytes(buffer);
  stm.write(buffer);
  return stm;
}
 
Example 5
Source File: TestTrash.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
protected static Path writeFile(FileSystem fs, Path f) throws IOException {
  DataOutputStream out = fs.create(f);
  out.writeBytes("dhruba: " + f);
  out.close();
  assertTrue(fs.exists(f));
  return f;
}
 
Example 6
Source File: UtilsForTests.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * This creates a file in the dfs
 * @param dfs FileSystem Local File System where file needs to be picked
 * @param URIPATH Path dfs path where file needs to be copied
 * @param permission FsPermission File permission
 * @return returns the DataOutputStream
 */
public static DataOutputStream
    createTmpFileDFS(FileSystem dfs, Path URIPATH,
    FsPermission permission, String input) throws Exception {
  //Creating the path with the file
  DataOutputStream file =
    FileSystem.create(dfs, URIPATH, permission);
  file.writeBytes(input);
  file.close();
  return file;
}
 
Example 7
Source File: PersistedHDFSManager.java    From Knowage-Server with GNU Affero General Public License v3.0 5 votes vote down vote up
public FSDataOutputStream openHdfsFile(String fileName, String folderName) {
	logger.debug("Begin file opening");
	FSDataOutputStream fsOS = null;
	Path filePath = null;
	try {
		FileSystem fs = hdfs.getFs();
		filePath = fs.getWorkingDirectory();
		if (folderName != null && folderName.length() > 0) {
			filePath = Path.mergePaths(filePath, new Path(Path.SEPARATOR, folderName));
			if (!fs.exists(filePath) || !fs.isDirectory(filePath)) {
				fs.mkdirs(filePath);
			}
		}
		filePath = Path.mergePaths(filePath, new Path(Path.SEPARATOR + fileName));
		boolean existsFile = fs.exists(filePath);
		if (existsFile) {
			logger.debug("File is already present in folder, it will be deleted and replaced with new file");
			fs.delete(filePath, true);
		}
		fsOS = fs.create(filePath, true);
	} catch (IOException e) {
		logger.error("Impossible to open file in File System");
		throw new SpagoBIRuntimeException("Impossible to open file in File System" + e);
	}
	logger.debug("File opened");
	return fsOS;
}
 
Example 8
Source File: CopyFromS3.java    From emr-sample-apps with Apache License 2.0 5 votes vote down vote up
/**
 * This method constructs the JobConf to be used to run the map reduce job to
 * download the files from S3. This is a potentially expensive method since it
 * makes multiple calls to S3 to get a listing of all the input data. Clients
 * are encouraged to cache the returned JobConf reference and not call this
 * method multiple times unless necessary.
 * 
 * @return the JobConf to be used to run the map reduce job to download the
 *         files from S3.
 */
public JobConf getJobConf() throws IOException, ParseException {
  JobConf conf = new JobConf(CopyFromS3.class);
  conf.setJobName("CopyFromS3");
  conf.setOutputKeyClass(NullWritable.class);
  conf.setOutputValueClass(Text.class);
  conf.setMapperClass(S3CopyMapper.class);
  // We configure a reducer, even though we don't use it right now.
  // The idea is that, in the future we may. 
  conf.setReducerClass(HDFSWriterReducer.class);
  conf.setNumReduceTasks(0);

  FileInputFormat.setInputPaths(conf, new Path(tempFile));
  FileOutputFormat.setOutputPath(conf, new Path(outputPath));
  conf.setOutputFormat(TextOutputFormat.class);
  conf.setCompressMapOutput(true);

  JobClient jobClient = new JobClient(conf);

  FileSystem inputFS = FileSystem.get(URI.create(inputPathPrefix), conf);
  DatePathFilter datePathFilter = new DatePathFilter(startDate, endDate);
  List<Path> filePaths = getFilePaths(inputFS, new Path(inputPathPrefix), datePathFilter, jobClient.getDefaultMaps());

  // Write the file names to a temporary index file to be used
  // as input to the map tasks.
  FileSystem outputFS = FileSystem.get(URI.create(tempFile), conf);
  FSDataOutputStream outputStream = outputFS.create(new Path(tempFile), true);
  try {
    for (Path path : filePaths) {
      outputStream.writeBytes(path.toString() + "\n");
    }
  }
  finally {
    outputStream.close();
  }

  conf.setNumMapTasks(Math.min(filePaths.size(), jobClient.getDefaultMaps()));

  return conf;
}
 
Example 9
Source File: TestLineRecordReaderJobs.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Writes the input test file
 *
 * @param conf
 * @throws IOException
 */
public void createInputFile(Configuration conf) throws IOException {
  FileSystem localFs = FileSystem.getLocal(conf);
  Path file = new Path(inputDir, "test.txt");
  Writer writer = new OutputStreamWriter(localFs.create(file));
  writer.write("abc\ndef\t\nghi\njkl");
  writer.close();
}
 
Example 10
Source File: TestUtils.java    From succinct with Apache License 2.0 5 votes vote down vote up
public static FSDataInputStream getStream(ShortBuffer buf) throws IOException {
  File tmpDir = Files.createTempDir();
  Path filePath = new Path(tmpDir.getAbsolutePath() + "/testOut");
  FileSystem fs = FileSystem.get(filePath.toUri(), new Configuration());
  FSDataOutputStream fOut = fs.create(filePath);
  buf.rewind();
  while (buf.hasRemaining()) {
    fOut.writeShort(buf.get());
  }
  fOut.close();
  buf.rewind();
  return fs.open(filePath);
}
 
Example 11
Source File: TestSeekBug.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private void writeFile(FileSystem fileSys, Path name) throws IOException {
  // create and write a file that contains 1MB
  DataOutputStream stm = fileSys.create(name);
  byte[] buffer = new byte[ONEMB];
  Random rand = new Random(seed);
  rand.nextBytes(buffer);
  stm.write(buffer);
  stm.close();
}
 
Example 12
Source File: Distcp.java    From aegisthus with Apache License 2.0 5 votes vote down vote up
protected void writeManifest(Job job, List<FileStatus> files) throws IOException {
	Path out = new Path(job.getConfiguration().get(OPT_DISTCP_TARGET));
	FileSystem fsOut = out.getFileSystem(job.getConfiguration());
	DataOutputStream dos = fsOut.create(new Path(out, "_manifest/.manifest"));
	for (FileStatus file : files) {
		Path output = new Path(out, file.getPath().getName());
		dos.writeBytes(output.toUri().toString());
		dos.write('\n');
	}
	dos.close();
}
 
Example 13
Source File: AvroHdfsFileSink.java    From components with Apache License 2.0 5 votes vote down vote up
@Override
protected void mergeOutput(FileSystem fs, String sourceFolder, String targetFile) throws IOException {
    try (DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>())) {
        FileStatus[] sourceStatuses = FileSystemUtil.listSubFiles(fs, sourceFolder);
        Schema schema = null;
        String inputCodec = null;
        OutputStream output = new BufferedOutputStream(fs.create(new Path(targetFile)));
        for (FileStatus sourceStatus : sourceStatuses) {
            try (DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(
                    new BufferedInputStream(fs.open(sourceStatus.getPath())), new GenericDatumReader<GenericRecord>())) {

                if (schema == null) {
                    schema = reader.getSchema();
                    for (String key : reader.getMetaKeys()) {
                        if (!DataFileWriter.isReservedMeta(key)) {
                            writer.setMeta(key, reader.getMeta(key));
                        }
                    }
                    inputCodec = reader.getMetaString(DataFileConstants.CODEC);
                    if (inputCodec == null) {
                        inputCodec = DataFileConstants.NULL_CODEC;
                    }
                    writer.setCodec(CodecFactory.fromString(inputCodec));
                    writer.create(schema, output);
                }
                writer.appendAllFrom(reader, false);
            }
        }
    }
}
 
Example 14
Source File: JobControlTestUtils.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Generates data that can be used for Job Control tests.
 * 
 * @param fs FileSystem to create data in.
 * @param dirPath Path to create the data in.
 * @throws IOException If an error occurs creating the data.
 */
static void generateData(FileSystem fs, Path dirPath) throws IOException {
  FSDataOutputStream out = fs.create(new Path(dirPath, "data.txt"));
  for (int i = 0; i < 10000; i++) {
    String line = generateRandomLine();
    out.write(line.getBytes("UTF-8"));
  }
  out.close();
}
 
Example 15
Source File: DataUtils.java    From sparkboost with Apache License 2.0 5 votes vote down vote up
/**
 * Generate a new LibSvm output file giving each document an index corresponding to the index tha documents had on
 * original input LibSvm file.
 *
 * @param sc         The spark context.
 * @param dataFile   The data file.
 * @param outputFile The output file.
 */
public static void generateLibSvmFileWithIDs(JavaSparkContext sc, String dataFile, String outputFile) {
    if (sc == null)
        throw new NullPointerException("The Spark Context is 'null'");
    if (dataFile == null || dataFile.isEmpty())
        throw new IllegalArgumentException("The dataFile is 'null'");

    ArrayList<MultilabelPoint> points = new ArrayList<>();
    try {
        Path pt = new Path(dataFile);
        FileSystem fs = FileSystem.get(pt.toUri(), new Configuration());
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(pt)));

        Path ptOut = new Path(outputFile);
        BufferedWriter bw = new BufferedWriter((new OutputStreamWriter(fs.create(ptOut))));

        try {
            int docID = 0;
            String line = br.readLine();
            while (line != null) {
                bw.write("" + docID + "\t" + line + "\n");
                line = br.readLine();
                docID++;
            }
        } finally {
            br.close();
            bw.close();
        }
    } catch (Exception e) {
        throw new RuntimeException("Reading input LibSVM data file", e);
    }

}
 
Example 16
Source File: TestRaidDfs.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public static long createTestFile(FileSystem fileSys, Path name, int repl,
                      long fileSize, long blockSize, int seed)
  throws IOException {
  CRC32 crc = new CRC32();
  Random rand = new Random(seed);
  FSDataOutputStream stm = fileSys.create(name, true,
                                          fileSys.getConf().getInt("io.file.buffer.size", 4096),
                                          (short)repl, blockSize);
  LOG.info("create file " + name + " size: " + fileSize + " blockSize: " + 
           blockSize + " repl: " + repl);
  // fill random data into file
  byte[] b = new byte[(int)blockSize];
  long numBlocks = fileSize / blockSize;
  for (int i = 0; i < numBlocks; i++) {
    rand.nextBytes(b);
    stm.write(b);
    crc.update(b);
  }
  long lastBlock = fileSize - numBlocks * blockSize;
  if (lastBlock > 0) {
    b = new byte[(int)lastBlock];
    rand.nextBytes(b);
    stm.write(b);
    crc.update(b);
  }
  stm.close();
  return crc.getValue();
}
 
Example 17
Source File: RegexBulkLoadToolIT.java    From phoenix with Apache License 2.0 5 votes vote down vote up
@Ignore
@Test
public void testImportWithIndex() throws Exception {


    Statement stmt = conn.createStatement();
    stmt.execute("CREATE TABLE TABLE3 (ID INTEGER NOT NULL PRIMARY KEY, " +
        "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)");
    String ddl = "CREATE INDEX TABLE3_IDX ON TABLE3 "
            + " (FIRST_NAME ASC)"
            + " INCLUDE (LAST_NAME)";
    stmt.execute(ddl);
    
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input3.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1,FirstName 1,LastName 1");
    printWriter.println("2,FirstName 2,LastName 2");
    printWriter.close();

    RegexBulkLoadTool regexBulkLoadTool = new RegexBulkLoadTool();
    regexBulkLoadTool.setConf(getUtility().getConfiguration());
    int exitCode = regexBulkLoadTool.run(new String[] {
            "--input", "/tmp/input3.csv",
            "--table", "table3",
            "--regex", "([^,]*),([^,]*),([^,]*)",
            "--zookeeper", zkQuorum});
    assertEquals(0, exitCode);

    ResultSet rs = stmt.executeQuery("SELECT id, FIRST_NAME FROM TABLE3 where first_name='FirstName 2'");
    assertTrue(rs.next());
    assertEquals(2, rs.getInt(1));
    assertEquals("FirstName 2", rs.getString(2));

    rs.close();
    stmt.close();
}
 
Example 18
Source File: TestBlockTokenWithDFS.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void createFile(FileSystem fs, Path filename) throws IOException {
  FSDataOutputStream out = fs.create(filename);
  out.write(rawData);
  out.close();
}
 
Example 19
Source File: TestDistributedFileSystem.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testCreateWithCustomChecksum() throws Exception {
  Configuration conf = getTestConfiguration();
  MiniDFSCluster cluster = null;
  Path testBasePath = new Path("/test/csum");
  // create args 
  Path path1 = new Path(testBasePath, "file_wtih_crc1");
  Path path2 = new Path(testBasePath, "file_with_crc2");
  ChecksumOpt opt1 = new ChecksumOpt(DataChecksum.Type.CRC32C, 512);
  ChecksumOpt opt2 = new ChecksumOpt(DataChecksum.Type.CRC32, 512);

  // common args
  FsPermission perm = FsPermission.getDefault().applyUMask(
      FsPermission.getUMask(conf));
  EnumSet<CreateFlag> flags = EnumSet.of(CreateFlag.OVERWRITE,
      CreateFlag.CREATE);
  short repl = 1;

  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    FileSystem dfs = cluster.getFileSystem();

    dfs.mkdirs(testBasePath);

    // create two files with different checksum types
    FSDataOutputStream out1 = dfs.create(path1, perm, flags, 4096, repl,
        131072L, null, opt1);
    FSDataOutputStream out2 = dfs.create(path2, perm, flags, 4096, repl,
        131072L, null, opt2);

    for (int i = 0; i < 1024; i++) {
      out1.write(i);
      out2.write(i);
    }
    out1.close();
    out2.close();

    // the two checksums must be different.
    MD5MD5CRC32FileChecksum sum1 =
        (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path1);
    MD5MD5CRC32FileChecksum sum2 =
        (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path2);
    assertFalse(sum1.equals(sum2));

    // check the individual params
    assertEquals(DataChecksum.Type.CRC32C, sum1.getCrcType());
    assertEquals(DataChecksum.Type.CRC32,  sum2.getCrcType());

  } finally {
    if (cluster != null) {
      cluster.getFileSystem().delete(testBasePath, true);
      cluster.shutdown();
    }
  }
}
 
Example 20
Source File: TestFileAppend4.java    From RDFS with Apache License 2.0 4 votes vote down vote up
void replicationTest(int badDN) throws Exception {
  LOG.info("START");
  cluster = new MiniDFSCluster(conf, 3, true, null);
  FileSystem fs1 = cluster.getFileSystem();
  try {
    int halfBlock = (int)BLOCK_SIZE/2;
    short rep = 3; // replication
    assertTrue(BLOCK_SIZE%4 == 0);

    file1 = new Path("/appendWithReplication.dat");

    // write 1/2 block & sync
    stm = fs1.create(file1, true, (int)BLOCK_SIZE*2, rep, BLOCK_SIZE);
    AppendTestUtil.write(stm, 0, halfBlock);
    stm.sync();
    assertNumCurrentReplicas(rep);

    // close one of the datanodes
    cluster.stopDataNode(badDN);

    // write 1/4 block & sync
    AppendTestUtil.write(stm, halfBlock, (int)BLOCK_SIZE/4);
    stm.sync();
    assertNumCurrentReplicas((short)(rep - 1));

    // restart the cluster
    /*
     * we put the namenode in safe mode first so he doesn't process
     * recoverBlock() commands from the remaining DFSClient as datanodes
     * are serially shutdown
     */
    cluster.getNameNode().setSafeMode(SafeModeAction.SAFEMODE_ENTER);
    fs1.close();
    cluster.shutdown();
    LOG.info("STOPPED first instance of the cluster");
    cluster = new MiniDFSCluster(conf, 3, false, null);
    cluster.getNameNode().getNamesystem().stallReplicationWork();
    cluster.waitActive();
    fs1 = cluster.getFileSystem();
    LOG.info("START second instance.");

    recoverFile(fs1);
    LOG.info("Recovered file");

    // the 2 DNs with the larger sequence number should win
    BlockLocation[] bl = fs1.getFileBlockLocations(
        fs1.getFileStatus(file1), 0, BLOCK_SIZE);
    LOG.info("Checking blocks");
    assertTrue("Should have one block", bl.length == 1);

    // Wait up to 1 second for block replication - we may have
    // only replication 1 for a brief moment after close, since
    // closing only waits for fs.replcation.min replicas, and
    // it may take some millis before the other DN reports block
    waitForBlockReplication(fs1, file1.toString(), 2, 1);

    assertFileSize(fs1, BLOCK_SIZE*3/4);
    checkFile(fs1, BLOCK_SIZE*3/4);

    LOG.info("Checking replication");
    // verify that, over time, the block has been replicated to 3 DN
    cluster.getNameNode().getNamesystem().restartReplicationWork();
    waitForBlockReplication(fs1, file1.toString(), 3, 20);
  } finally {
    fs1.close();
    cluster.shutdown();
  }
}