Java Code Examples for org.apache.hadoop.fs.FileSystem#getConf()

The following examples show how to use org.apache.hadoop.fs.FileSystem#getConf() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HiveIncrementalPuller.java    From hudi with Apache License 2.0 6 votes vote down vote up
private String scanForCommitTime(FileSystem fs, String targetDataPath) throws IOException {
  if (targetDataPath == null) {
    throw new IllegalArgumentException("Please specify either --fromCommitTime or --targetDataPath");
  }
  if (!fs.exists(new Path(targetDataPath)) || !fs.exists(new Path(targetDataPath + "/.hoodie"))) {
    return "0";
  }
  HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs.getConf(), targetDataPath);

  Option<HoodieInstant> lastCommit =
      metadata.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant();
  if (lastCommit.isPresent()) {
    return lastCommit.get().getTimestamp();
  }
  return "0";
}
 
Example 2
Source File: SequenceFileReader.java    From kafka-connect-fs with Apache License 2.0 6 votes vote down vote up
public SequenceFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
    super(fs, filePath, new SeqToStruct(), config);

    this.reader = new SequenceFile.Reader(fs.getConf(),
            SequenceFile.Reader.file(filePath),
            SequenceFile.Reader.bufferSize(fs.getConf().getInt(FILE_READER_BUFFER_SIZE, DEFAULT_BUFFER_SIZE)));
    this.key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), fs.getConf());
    this.value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), fs.getConf());
    this.schema = SchemaBuilder.struct()
            .field(keyFieldName, getSchema(this.key))
            .field(valueFieldName, getSchema(this.value))
            .build();
    this.recordIndex = this.hasNextIndex = -1;
    this.hasNext = false;
    this.closed = false;
}
 
Example 3
Source File: FileSystemWriter.java    From kite with Apache License 2.0 6 votes vote down vote up
private FileSystemWriter(FileSystem fs, Path path, long rollIntervalMillis,
                         long targetFileSize, DatasetDescriptor descriptor, Schema writerSchema) {
  Preconditions.checkNotNull(fs, "File system is not defined");
  Preconditions.checkNotNull(path, "Destination directory is not defined");
  Preconditions.checkNotNull(descriptor, "Descriptor is not defined");

  this.fs = fs;
  this.directory = path;
  this.rollIntervalMillis = rollIntervalMillis;
  this.targetFileSize = targetFileSize;
  this.descriptor = descriptor;
  this.conf = new Configuration(fs.getConf());
  this.state = ReaderWriterState.NEW;
  this.schema = writerSchema;

  // copy file format settings from custom properties to the Configuration
  for (String prop : descriptor.listProperties()) {
    conf.set(prop, descriptor.getProperty(prop));
  }

  // For performance reasons we will skip temp file creation if the file system does not support
  // efficient renaming, and write the file directly.
  this.useTempPath = FileSystemUtil.supportsRename(fs.getUri(), conf);
}
 
Example 4
Source File: SequenceFileReader.java    From jstorm with Apache License 2.0 5 votes vote down vote up
public SequenceFileReader(FileSystem fs, Path file, Map conf)
        throws IOException {
  super(fs, file);
  int bufferSize = !conf.containsKey(BUFFER_SIZE) ? DEFAULT_BUFF_SIZE : Integer.parseInt( conf.get(BUFFER_SIZE).toString() );
  this.reader = new SequenceFile.Reader(fs.getConf(),  SequenceFile.Reader.file(file), SequenceFile.Reader.bufferSize(bufferSize) );
  this.key = (Key) ReflectionUtils.newInstance(reader.getKeyClass(), fs.getConf() );
  this.value = (Value) ReflectionUtils.newInstance(reader.getValueClass(), fs.getConf() );
  this.offset = new SequenceFileReader.Offset(0,0,0);
}
 
Example 5
Source File: SequenceFileWriter.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(FileSystem fs, Path path) throws IOException {
	super.open(fs, path);
	if (keyClass == null) {
		throw new IllegalStateException("Key Class has not been initialized.");
	}
	if (valueClass == null) {
		throw new IllegalStateException("Value Class has not been initialized.");
	}

	CompressionCodec codec = null;

	Configuration conf = fs.getConf();

	if (!compressionCodecName.equals("None")) {
		CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
		codec = codecFactory.getCodecByName(compressionCodecName);
		if (codec == null) {
			throw new RuntimeException("Codec " + compressionCodecName + " not found.");
		}
	}

	// the non-deprecated constructor syntax is only available in recent hadoop versions...
	writer = SequenceFile.createWriter(conf,
			getStream(),
			keyClass,
			valueClass,
			compressionType,
			codec);
}
 
Example 6
Source File: HoodieDataSourceHelpers.java    From hudi with Apache License 2.0 5 votes vote down vote up
/**
 * Obtain all the commits, compactions that have occurred on the timeline, whose instant times could be fed into the
 * datasource options.
 */
public static HoodieTimeline allCompletedCommitsCompactions(FileSystem fs, String basePath) {
  HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true);
  if (metaClient.getTableType().equals(HoodieTableType.MERGE_ON_READ)) {
    return metaClient.getActiveTimeline().getTimelineOfActions(
        CollectionUtils.createSet(HoodieActiveTimeline.COMMIT_ACTION,
            HoodieActiveTimeline.DELTA_COMMIT_ACTION));
  } else {
    return metaClient.getCommitTimeline().filterCompletedInstants();
  }
}
 
Example 7
Source File: TestHDFSTrash.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testNonDefaultFS() throws IOException {
  FileSystem fs = cluster.getFileSystem();
  Configuration conf = fs.getConf();
  conf.set(DFSConfigKeys.FS_DEFAULT_NAME_KEY, fs.getUri().toString());
  TestTrash.trashNonDefaultFS(conf);
}
 
Example 8
Source File: SequenceFileWriter.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(FileSystem fs, Path path) throws IOException {
	super.open(fs, path);
	if (keyClass == null) {
		throw new IllegalStateException("Key Class has not been initialized.");
	}
	if (valueClass == null) {
		throw new IllegalStateException("Value Class has not been initialized.");
	}

	CompressionCodec codec = null;

	Configuration conf = fs.getConf();

	if (!compressionCodecName.equals("None")) {
		CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
		codec = codecFactory.getCodecByName(compressionCodecName);
		if (codec == null) {
			throw new RuntimeException("Codec " + compressionCodecName + " not found.");
		}
	}

	// the non-deprecated constructor syntax is only available in recent hadoop versions...
	writer = SequenceFile.createWriter(conf,
			getStream(),
			keyClass,
			valueClass,
			compressionType,
			codec);
}
 
Example 9
Source File: AvroUtils.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Get Avro schema from an Avro data file.
 */
public static Schema getSchemaFromDataFile(Path dataFile, FileSystem fs) throws IOException {
  try (SeekableInput sin = new FsInput(dataFile, fs.getConf());
      DataFileReader<GenericRecord> reader = new DataFileReader<>(sin, new GenericDatumReader<GenericRecord>())) {
    return reader.getSchema();
  }
}
 
Example 10
Source File: DFSAdmin.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/** Constructor */
public DFSAdminCommand(FileSystem fs) {
  super(fs.getConf());
  if (!(fs instanceof DistributedFileSystem)) {
    throw new IllegalArgumentException("FileSystem " + fs.getUri() + 
        " is not a distributed file system");
  }
  this.dfs = (DistributedFileSystem)fs;
}
 
Example 11
Source File: TestHoodieDeltaStreamer.java    From hudi with Apache License 2.0 5 votes vote down vote up
static void assertAtleastNDeltaCommits(int minExpected, String tablePath, FileSystem fs) {
  HoodieTableMetaClient meta = new HoodieTableMetaClient(fs.getConf(), tablePath);
  HoodieTimeline timeline = meta.getActiveTimeline().getDeltaCommitTimeline().filterCompletedInstants();
  LOG.info("Timeline Instants=" + meta.getActiveTimeline().getInstants().collect(Collectors.toList()));
  int numDeltaCommits = (int) timeline.getInstants().count();
  assertTrue(minExpected <= numDeltaCommits, "Got=" + numDeltaCommits + ", exp >=" + minExpected);
}
 
Example 12
Source File: TestHDFSTrash.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testNonDefaultFS() throws IOException {
  FileSystem fs = cluster.getFileSystem();
  Configuration conf = fs.getConf();
  conf.set(DFSConfigKeys.FS_DEFAULT_NAME_KEY, fs.getUri().toString());
  TestTrash.trashNonDefaultFS(conf);
}
 
Example 13
Source File: TestHDFSTrash.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public void testNonDefaultFS() throws IOException {
  FileSystem fs = cluster.getFileSystem();
  Configuration conf = fs.getConf();
  conf.set("fs.default.name", fs.getUri().toString());
  trashNonDefaultFS(conf);
}
 
Example 14
Source File: TestMultiFS.java    From hudi with Apache License 2.0 4 votes vote down vote up
@Test
public void readLocalWriteHDFS() throws Exception {
  // Initialize table and filesystem
  HoodieTableMetaClient.initTableType(hadoopConf, dfsBasePath, HoodieTableType.valueOf(tableType),
      tableName, HoodieAvroPayload.class.getName());

  // Create write client to write some records in
  HoodieWriteConfig cfg = getHoodieWriteConfig(dfsBasePath);
  HoodieWriteConfig localConfig = getHoodieWriteConfig(tablePath);

  try (HoodieWriteClient hdfsWriteClient = getHoodieWriteClient(cfg);
      HoodieWriteClient localWriteClient = getHoodieWriteClient(localConfig)) {

    // Write generated data to hdfs (only inserts)
    String readCommitTime = hdfsWriteClient.startCommit();
    LOG.info("Starting commit " + readCommitTime);
    List<HoodieRecord> records = dataGen.generateInserts(readCommitTime, 100);
    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
    hdfsWriteClient.upsert(writeRecords, readCommitTime);

    // Read from hdfs
    FileSystem fs = FSUtils.getFs(dfsBasePath, HoodieTestUtils.getDefaultHadoopConf());
    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), dfsBasePath);
    HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
    Dataset<Row> readRecords = HoodieClientTestUtils.readCommit(dfsBasePath, sqlContext, timeline, readCommitTime);
    assertEquals(readRecords.count(), records.size(), "Should contain 100 records");

    // Write to local
    HoodieTableMetaClient.initTableType(hadoopConf, tablePath, HoodieTableType.valueOf(tableType),
        tableName, HoodieAvroPayload.class.getName());

    String writeCommitTime = localWriteClient.startCommit();
    LOG.info("Starting write commit " + writeCommitTime);
    List<HoodieRecord> localRecords = dataGen.generateInserts(writeCommitTime, 100);
    JavaRDD<HoodieRecord> localWriteRecords = jsc.parallelize(localRecords, 1);
    LOG.info("Writing to path: " + tablePath);
    localWriteClient.upsert(localWriteRecords, writeCommitTime);

    LOG.info("Reading from path: " + tablePath);
    fs = FSUtils.getFs(tablePath, HoodieTestUtils.getDefaultHadoopConf());
    metaClient = new HoodieTableMetaClient(fs.getConf(), tablePath);
    timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
    Dataset<Row> localReadRecords =
        HoodieClientTestUtils.readCommit(tablePath, sqlContext, timeline, writeCommitTime);
    assertEquals(localReadRecords.count(), localRecords.size(), "Should contain 100 records");
  }
}
 
Example 15
Source File: HoodieSnapshotExporter.java    From hudi with Apache License 2.0 4 votes vote down vote up
private BaseFileOnlyView getBaseFileOnlyView(JavaSparkContext jsc, Config cfg) {
  FileSystem fs = FSUtils.getFs(cfg.sourceBasePath, jsc.hadoopConfiguration());
  HoodieTableMetaClient tableMetadata = new HoodieTableMetaClient(fs.getConf(), cfg.sourceBasePath);
  return new HoodieTableFileSystemView(tableMetadata, tableMetadata
      .getActiveTimeline().getCommitsAndCompactionTimeline().filterCompletedInstants());
}
 
Example 16
Source File: TestReplication.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void changeBlockLen(MiniDFSCluster cluster, int lenDelta)
    throws IOException, InterruptedException, TimeoutException {
  final Path fileName = new Path("/file1");
  final short REPLICATION_FACTOR = (short)1;
  final FileSystem fs = cluster.getFileSystem();
  final int fileLen = fs.getConf().getInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 512);
  DFSTestUtil.createFile(fs, fileName, fileLen, REPLICATION_FACTOR, 0);
  DFSTestUtil.waitReplication(fs, fileName, REPLICATION_FACTOR);

  ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);

  // Change the length of a replica
  for (int i=0; i<cluster.getDataNodes().size(); i++) {
    if (DFSTestUtil.changeReplicaLength(cluster, block, i, lenDelta)) {
      break;
    }
  }

  // increase the file's replication factor
  fs.setReplication(fileName, (short)(REPLICATION_FACTOR+1));

  // block replication triggers corrupt block detection
  DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost", 
      cluster.getNameNodePort()), fs.getConf());
  LocatedBlocks blocks = dfsClient.getNamenode().getBlockLocations(
      fileName.toString(), 0, fileLen);
  if (lenDelta < 0) { // replica truncated
  	while (!blocks.get(0).isCorrupt() || 
  			REPLICATION_FACTOR != blocks.get(0).getLocations().length) {
  		Thread.sleep(100);
  		blocks = dfsClient.getNamenode().getBlockLocations(
  				fileName.toString(), 0, fileLen);
  	}
  } else { // no corruption detected; block replicated
  	while (REPLICATION_FACTOR+1 != blocks.get(0).getLocations().length) {
  		Thread.sleep(100);
  		blocks = dfsClient.getNamenode().getBlockLocations(
  				fileName.toString(), 0, fileLen);
  	}
  }
  fs.delete(fileName, true);
}
 
Example 17
Source File: TestHDFSTrash.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
public void testNonDefaultFS() throws IOException {
  FileSystem fs = cluster.getFileSystem();
  Configuration conf = fs.getConf();
  conf.set("fs.default.name", fs.getUri().toString());
  trashNonDefaultFS(conf);
}
 
Example 18
Source File: TestReplication.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
private void changeBlockLen(MiniDFSCluster cluster, 
    int lenDelta) throws IOException, InterruptedException {
  final Path fileName = new Path("/file1");
  final short REPLICATION_FACTOR = (short)1;
  final FileSystem fs = cluster.getFileSystem();
  final int fileLen = fs.getConf().getInt("io.bytes.per.checksum", 512);
  DFSTestUtil.createFile(fs, fileName, fileLen, REPLICATION_FACTOR, 0);
  DFSTestUtil.waitReplication(fs, fileName, REPLICATION_FACTOR);

  String block = DFSTestUtil.getFirstBlock(fs, fileName).getBlockName();

  // Change the length of a replica
  for (int i=0; i<cluster.getDataNodes().size(); i++) {
    if (TestDatanodeBlockScanner.changeReplicaLength(block, i, lenDelta)) {
      break;
    }
  }

  // increase the file's replication factor
  fs.setReplication(fileName, (short)(REPLICATION_FACTOR+1));

  // block replication triggers corrupt block detection
  DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost", 
      cluster.getNameNodePort()), fs.getConf());
  LocatedBlocks blocks = dfsClient.namenode.getBlockLocations(
      fileName.toString(), 0, fileLen);
  if (lenDelta < 0) { // replica truncated
  	while (!blocks.get(0).isCorrupt() || 
  			REPLICATION_FACTOR != blocks.get(0).getLocations().length) {
  		Thread.sleep(100);
  		blocks = dfsClient.namenode.getBlockLocations(
  				fileName.toString(), 0, fileLen);
  	}
  } else { // no corruption detected; block replicated
  	while (REPLICATION_FACTOR+1 != blocks.get(0).getLocations().length) {
  		Thread.sleep(100);
  		blocks = dfsClient.namenode.getBlockLocations(
  				fileName.toString(), 0, fileLen);
  	}
  }
  fs.delete(fileName, true);
}
 
Example 19
Source File: TestHDFSTrash.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public void testTrashEmptier() throws Exception {
  FileSystem fs = cluster.getFileSystem();
  Configuration conf = fs.getConf();
  conf.set("fs.default.name", fs.getUri().toString());
  trashEmptier(fs, conf);
}
 
Example 20
Source File: TestDirectoryBlockFixer.java    From RDFS with Apache License 2.0 4 votes vote down vote up
@Test
public void testDirectoryFilterUnfixableFiles() throws IOException {
  conf = new Configuration();
  dfsCluster = new MiniDFSCluster(conf, NUM_DATANODES, true, null);
  dfsCluster.waitActive();
  FileSystem fs = dfsCluster.getFileSystem();

  Utils.loadTestCodecs(conf, 3, 5, 1,
      3, "/destraid", "/destraidrs", false, true);
  try {
    Configuration testConf = fs.getConf();
    BlockIntegrityMonitor blockFixer = new
        LocalBlockIntegrityMonitor(testConf);

    String p1 = "/user/foo/f1";
    String p2 = "/user/foo/f2";
    String p3 = "/user1/foo/bar/f1";
    String p4 = "/a/b";
    String p5 = "/c";
    String p6 = "/destraidrs/user";
    String p7 = "/destraid/user1/foo";
    
    fs.mkdirs(new Path(p6));

    List<String> fileList = new ArrayList<String>();
    fileList.add(p1);
    fileList.add(p2);
    fileList.add(p3);
    fileList.add(p4);
    fileList.add(p5);

    blockFixer.filterUnreconstructableSourceFiles(fs, fileList.iterator());
    // p3 and p5 should be filtered out.
    assertEquals(3, fileList.size());

    Set<String> filtered = new HashSet<String>();
    for (String p: fileList) filtered.add(p);
    assertFalse("File not filtered", filtered.contains(p3));
    assertFalse("File not filtered", filtered.contains(p5));

    fileList.add(p3);
    fs.mkdirs(new Path(p7));
    blockFixer.filterUnreconstructableSourceFiles(fs, fileList.iterator());
    // Nothing is filtered.
    assertEquals(4, fileList.size());
  } finally {
    dfsCluster.shutdown();
  }
}