Java Code Examples for org.apache.hadoop.fs.Path#getFileSystem()

The following examples show how to use org.apache.hadoop.fs.Path#getFileSystem() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TezCommonUtils.java From incubator-tez with Apache License 2.0

6 votes

/**
 * <p>
 * This function returns the staging directory defined in the config with
 * property name <code>TezConfiguration.TEZ_AM_STAGING_DIR</code>. If the
 * property is not defined in the conf, Tez uses the value defined as
 * <code>TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT</code>. In addition, the
 * function makes sure if the staging directory exists. If not, it creates the
 * directory with permission <code>TEZ_AM_DIR_PERMISSION</code>.
 * </p>
 * 
 * @param conf
 *          TEZ configuration
 * @return Fully qualified staging directory
 */
public static Path getTezBaseStagingPath(Configuration conf) {
  String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR,
      TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT);
  Path baseStagingDir;
  try {
    Path p = new Path(stagingDirStr);
    FileSystem fs = p.getFileSystem(conf);
    if (!fs.exists(p)) {
      mkDirForAM(fs, p);
      LOG.info("Stage directory " + p + " doesn't exist and is created");
    }
    baseStagingDir = fs.resolvePath(p);
  } catch (IOException e) {
    throw new TezUncheckedException(e);
  }
  return baseStagingDir;
}

Example 2

Source File: FileOutputCommitter.java From hadoop-gpu with Apache License 2.0

6 votes

public boolean needsTaskCommit(TaskAttemptContext context) 
throws IOException {
  try {
    Path taskOutputPath = getTempTaskOutputPath(context);
    if (taskOutputPath != null) {
      context.getProgressible().progress();
      // Get the file-system for the task output directory
      FileSystem fs = taskOutputPath.getFileSystem(context.getJobConf());
      // since task output path is created on demand, 
      // if it exists, task needs a commit
      if (fs.exists(taskOutputPath)) {
        return true;
      }
    }
  } catch (IOException  ioe) {
    throw ioe;
  }
  return false;
}

Example 3

Source File: TestCredentialProviderFactory.java From hadoop with Apache License 2.0

6 votes

@Test
public void testJksProvider() throws Exception {
  Configuration conf = new Configuration();
  final Path jksPath = new Path(tmpDir.toString(), "test.jks");
  final String ourUrl =
      JavaKeyStoreProvider.SCHEME_NAME + "://file" + jksPath.toUri();

  File file = new File(tmpDir, "test.jks");
  file.delete();
  conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, ourUrl);
  checkSpecificProvider(conf, ourUrl);
  Path path = ProviderUtils.unnestUri(new URI(ourUrl));
  FileSystem fs = path.getFileSystem(conf);
  FileStatus s = fs.getFileStatus(path);
  assertTrue(s.getPermission().toString().equals("rwx------"));
  assertTrue(file + " should exist", file.isFile());

  // check permission retention after explicit change
  fs.setPermission(path, new FsPermission("777"));
  checkPermissionRetention(conf, ourUrl, path);
}

Example 4

Source File: TeraOutputFormat.java From incubator-tez with Apache License 2.0

5 votes

public RecordWriter<Text,Text> getRecordWriter(TaskAttemptContext job
                                               ) throws IOException {
  Path file = getDefaultWorkFile(job, "");
  FileSystem fs = file.getFileSystem(job.getConfiguration());
   FSDataOutputStream fileOut = fs.create(file);
  return new TeraRecordWriter(fileOut, job);
}

Example 5

Source File: FSDownload.java From big-c with Apache License 2.0

5 votes

/**
 * Creates the cache loader for the status loading cache. This should be used
 * to create an instance of the status cache that is passed into the
 * FSDownload constructor.
 */
public static CacheLoader<Path,Future<FileStatus>>
    createStatusCacheLoader(final Configuration conf) {
  return new CacheLoader<Path,Future<FileStatus>>() {
    public Future<FileStatus> load(Path path) {
      try {
        FileSystem fs = path.getFileSystem(conf);
        return Futures.immediateFuture(fs.getFileStatus(path));
      } catch (Throwable th) {
        // report failures so it can be memoized
        return Futures.immediateFailedFuture(th);
      }
    }
  };
}

Example 6

Source File: TestParquetWriter.java From dremio-oss with Apache License 2.0

5 votes

public void runTestAndValidate(String selection, String validationSelection, String inputTable, String outputFile, boolean sort) throws Exception {
  try {
    deleteTableIfExists(outputFile);
    test("use dfs_test");
//    test("ALTER SESSION SET \"planner.add_producer_consumer\" = false");
    String query = select(selection, inputTable, sort);
    System.out.println(outputFile);
    String create = "CREATE TABLE " + outputFile + " AS " + query;
    String validateQuery = select(validationSelection, outputFile, sort);
    test(create);
    test(validateQuery); // TODO: remove
    testBuilder()
        .unOrdered()
        .sqlQuery(validateQuery)
        .sqlBaselineQuery(query)
        .go();

    Configuration hadoopConf = new Configuration();
    Path output = new Path(getDfsTestTmpSchemaLocation(), outputFile);
    FileSystem fs = output.getFileSystem(hadoopConf);
    for (FileStatus file : fs.listStatus(output)) {
      ParquetMetadata footer = ParquetFileReader.readFooter(hadoopConf, file, SKIP_ROW_GROUPS);
      String version = footer.getFileMetaData().getKeyValueMetaData().get(DREMIO_VERSION_PROPERTY);
      assertEquals(DremioVersionInfo.getVersion(), version);
      PageHeaderUtil.validatePageHeaders(file.getPath(), footer);
    }
  } finally {
    deleteTableIfExists(outputFile);
  }
}

Example 7

Source File: TestJoinDatamerge.java From hadoop with Apache License 2.0

5 votes

private static SequenceFile.Writer[] createWriters(Path testdir,
    Configuration conf, int srcs, Path[] src) throws IOException {
  for (int i = 0; i < srcs; ++i) {
    src[i] = new Path(testdir, Integer.toString(i + 10, 36));
  }
  SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
  for (int i = 0; i < srcs; ++i) {
    out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
        src[i], IntWritable.class, IntWritable.class);
  }
  return out;
}

Example 8

Source File: MultiFileSplit.java From hadoop with Apache License 2.0

5 votes

public String[] getLocations() throws IOException {
  HashSet<String> hostSet = new HashSet<String>();
  for (Path file : getPaths()) {
    FileSystem fs = file.getFileSystem(getJob());
    FileStatus status = fs.getFileStatus(file);
    BlockLocation[] blkLocations = fs.getFileBlockLocations(status,
                                        0, status.getLen());
    if (blkLocations != null && blkLocations.length > 0) {
      addToSet(hostSet, blkLocations[0].getHosts());
    }
  }
  return hostSet.toArray(new String[hostSet.size()]);
}

Example 9

Source File: TestDataJoin.java From big-c with Apache License 2.0

5 votes

private static SequenceFile.Writer[] createWriters(Path testdir,
    JobConf conf, int srcs, Path[] src) throws IOException {
  for (int i = 0; i < srcs; ++i) {
    src[i] = new Path(testdir, Integer.toString(i + 10, 36));
  }
  SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
  for (int i = 0; i < srcs; ++i) {
    out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
        src[i], Text.class, Text.class);
  }
  return out;
}

Example 10

Source File: MRHiveDictUtil.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

private static long getFileSize(String hdfsUrl) throws IOException {
    Configuration configuration = new Configuration();
    Path path = new Path(hdfsUrl);
    FileSystem fs = path.getFileSystem(configuration);
    ContentSummary contentSummary = fs.getContentSummary(path);
    return contentSummary.getLength();
}

Example 11

Source File: DistRaid.java From RDFS with Apache License 2.0

5 votes

public void cleanUp() {
  for (Codec codec: Codec.getCodecs()) {
    Path tmpdir = new Path(codec.tmpParityDirectory, this.getJobID());
    try {
      FileSystem fs = tmpdir.getFileSystem(jobconf);
      if (fs.exists(tmpdir)) {
        fs.delete(tmpdir, true);
      }
    } catch (IOException ioe) {
      LOG.error("Fail to delete " + tmpdir, ioe);
    }
  }
}

Example 12

Source File: AdmmIterationMapper.java From laser with Apache License 2.0

5 votes

protected void setup(Context context) throws IOException,
		InterruptedException {
	conf = context.getConfiguration();
	iteration = Integer.parseInt(conf.get("iteration.number"));
	addIntercept = conf.getBoolean("add.intercept", false);
	rho = conf.getFloat("rho", DEFAULT_RHO);
	regularizationFactor = conf.getFloat("regularization.factor",
			DEFAULT_REGULARIZATION_FACTOR);
	previousIntermediateOutputLocation = conf
			.get("previous.intermediate.output.location");
	previousIntermediateOutputLocationPath = new Path(
			previousIntermediateOutputLocation);

	try {
		fs = previousIntermediateOutputLocationPath.getFileSystem(conf);
	} catch (IOException e) {
		LOG.info(e.toString());
	}

	lbfgs = new QNMinimizer();

	FileSplit split = (FileSplit) context.getInputSplit();
	splitId = split.getPath() + ":" + Long.toString(split.getStart())
			+ " - " + Long.toString(split.getLength());
	splitId = removeIpFromHdfsFileName(splitId);

	inputSplitData = new LinkedList<Vector>();
}

Example 13

Source File: ScanPerformanceEvaluation.java From hbase with Apache License 2.0

5 votes

@Override
public void setConf(Configuration conf) {
  super.setConf(conf);
  Path rootDir;
  try {
    rootDir = CommonFSUtils.getRootDir(conf);
    rootDir.getFileSystem(conf);
  } catch (IOException ex) {
    throw new RuntimeException(ex);
  }
}

Example 14

Source File: GenericMRLoadGenerator.java From hadoop with Apache License 2.0

4 votes

public int run(String [] argv) throws Exception {
  JobConf job = new JobConf(getConf());
  job.setJarByClass(GenericMRLoadGenerator.class);
  job.setMapperClass(SampleMapper.class);
  job.setReducerClass(SampleReducer.class);
  if (!parseArgs(argv, job)) {
    return -1;
  }

  if (null == FileOutputFormat.getOutputPath(job)) {
    // No output dir? No writes
    job.setOutputFormat(NullOutputFormat.class);
  }

  if (0 == FileInputFormat.getInputPaths(job).length) {
    // No input dir? Generate random data
    System.err.println("No input path; ignoring InputFormat");
    confRandom(job);
  } else if (null != job.getClass(
     org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FORMAT,
     null)) {
    // specified IndirectInputFormat? Build src list
    JobClient jClient = new JobClient(job);
    Path tmpDir = new Path(jClient.getFs().getHomeDirectory(), ".staging");
    Random r = new Random();
    Path indirInputFile = new Path(tmpDir,
        Integer.toString(r.nextInt(Integer.MAX_VALUE), 36) + "_files");
    job.set(
      org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FILE,
      indirInputFile.toString());
    SequenceFile.Writer writer = SequenceFile.createWriter(
        tmpDir.getFileSystem(job), job, indirInputFile,
        LongWritable.class, Text.class,
        SequenceFile.CompressionType.NONE);
    try {
      for (Path p : FileInputFormat.getInputPaths(job)) {
        FileSystem fs = p.getFileSystem(job);
        Stack<Path> pathstack = new Stack<Path>();
        pathstack.push(p);
        while (!pathstack.empty()) {
          for (FileStatus stat : fs.listStatus(pathstack.pop())) {
            if (stat.isDirectory()) {
              if (!stat.getPath().getName().startsWith("_")) {
                pathstack.push(stat.getPath());
              }
            } else {
              writer.sync();
              writer.append(new LongWritable(stat.getLen()),
                  new Text(stat.getPath().toUri().toString()));
            }
          }
        }
      }
    } finally {
      writer.close();
    }
  }

  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " +
                     (endTime.getTime() - startTime.getTime()) /1000 +
                     " seconds.");

  return 0;
}

Example 15

Source File: Cluster.java From spork with Apache License 2.0

4 votes

public void copyFromLocalFile(Path local, Path destination, boolean overwrite)
    throws IOException {
  FileSystem fs = local.getFileSystem(configuration);
  fs.copyFromLocalFile(false, overwrite, local, destination);
}

Example 16

Source File: DelimitedTextInputFormat.java From marklogic-contentpump with Apache License 2.0

4 votes

public List<InputSplit> getSplits(JobContext job) throws IOException {
    boolean delimSplit = isSplitInput(job.getConfiguration());
    //if delimSplit is true, size of each split is determined by 
    //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat
    List<InputSplit> splits = super.getSplits(job);
    if (!delimSplit) {
        return splits;
    }

    if (splits.size()>= SPLIT_COUNT_LIMIT) {
        //if #splits > 1 million, there is enough parallelism
        //therefore no point to split
        LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:"
            + SPLIT_COUNT_LIMIT);
        DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT);
        return splits;
    }
    // add header info into splits
    List<InputSplit> populatedSplits = new ArrayList<InputSplit>();
    LOG.info(splits.size() + " DelimitedSplits generated");
    Configuration conf = job.getConfiguration();
    char delimiter =0;
    ArrayList<Text> hlist = new ArrayList<Text>();
    for (InputSplit file: splits) {
        FileSplit fsplit = ((FileSplit)file);
        Path path = fsplit.getPath();
        FileSystem fs = path.getFileSystem(conf);
        
        if (fsplit.getStart() == 0) {
        // parse the inSplit, get the header
            FSDataInputStream fileIn = fs.open(path);

            String delimStr = conf.get(ConfigConstants.CONF_DELIMITER,
                ConfigConstants.DEFAULT_DELIMITER);
            if (delimStr.length() == 1) {
                delimiter = delimStr.charAt(0);
            } else {
                LOG.error("Incorrect delimitor: " + delimiter
                    + ". Expects single character.");
            }
            String encoding = conf.get(
                MarkLogicConstants.OUTPUT_CONTENT_ENCODING,
                MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING);
            InputStreamReader instream = new InputStreamReader(fileIn, encoding);
            CSVParser parser = new CSVParser(instream, CSVParserFormatter.
            		getFormat(delimiter, DelimitedTextReader.encapsulator,
            				true, true));
            Iterator<CSVRecord> it = parser.iterator();
            
            String[] header = null;
            if (it.hasNext()) {
            	CSVRecord record = (CSVRecord)it.next();
            	Iterator<String> recordIterator = record.iterator();
                int recordSize = record.size();
                header = new String[recordSize];
                for (int i = 0; i < recordSize; i++) {
                	if (recordIterator.hasNext()) {
                		header[i] = (String)recordIterator.next();
                	} else {
                		throw new IOException("Record size doesn't match the real size");
                	}
                }
                
                EncodingUtil.handleBOMUTF8(header, 0);
                
                hlist.clear();
                for (String s : header) {
                    hlist.add(new Text(s));
                }
            }
            instream.close();
        }
        
        DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(
            hlist.toArray(new Text[hlist.size()])), path,
            fsplit.getStart(), fsplit.getLength(),
            fsplit.getLocations());
        populatedSplits.add(ds);
    }
    
    return populatedSplits;
}

Example 17

Source File: TajoMasterClientService.java From tajo with Apache License 2.0

4 votes

@Override
public TableResponse createExternalTable(RpcController controller, CreateTableRequest request)
    throws ServiceException {
  try {
    Session session = context.getSessionManager().getSession(request.getSessionId().getId());
    QueryContext queryContext = new QueryContext(conf, session);

    Path path = new Path(request.getPath());
    FileSystem fs = path.getFileSystem(conf);

    if (!fs.exists(path)) {
      throw new UnavailableTableLocationException(path.toString(), "no such a directory");
    }

    Schema schema = null;
    if (request.hasSchema()) {
      schema = SchemaFactory.newV1(request.getSchema());
    }

    TableMeta meta = new TableMeta(request.getMeta());
    PartitionMethodDesc partitionDesc = null;
    if (request.hasPartition()) {
      partitionDesc = new PartitionMethodDesc(request.getPartition());
    }

    TableDesc desc = context.getGlobalEngine().getDDLExecutor().getCreateTableExecutor().create(
        queryContext,
        request.getName(),
        null,
        schema,
        meta,
        path.toUri(),
        true,
        partitionDesc,
        false
    );

    return TableResponse.newBuilder()
        .setState(OK)
        .setTable(desc.getProto()).build();

  } catch (Throwable t) {
    printStackTraceIfError(LOG, t);
    return TableResponse.newBuilder()
        .setState(returnError(t))
        .build();
  }
}

Example 18

Source File: CustomOutputCommitter.java From hadoop with Apache License 2.0

4 votes

private void writeFile(JobConf conf , String filename) throws IOException {
  System.out.println("writing file ----" + filename);
  Path outputPath = FileOutputFormat.getOutputPath(conf);
  FileSystem fs = outputPath.getFileSystem(conf);
  fs.create(new Path(outputPath, filename)).close();
}

Example 19

Source File: DistCpSync.java From big-c with Apache License 2.0

4 votes

static boolean sync(DistCpOptions inputOptions, Configuration conf)
    throws IOException {
  List<Path> sourcePaths = inputOptions.getSourcePaths();
  if (sourcePaths.size() != 1) {
    // we only support one source dir which must be a snapshottable directory
    throw new IllegalArgumentException(sourcePaths.size()
        + " source paths are provided");
  }
  final Path sourceDir = sourcePaths.get(0);
  final Path targetDir = inputOptions.getTargetPath();

  final FileSystem sfs = sourceDir.getFileSystem(conf);
  final FileSystem tfs = targetDir.getFileSystem(conf);
  // currently we require both the source and the target file system are
  // DistributedFileSystem.
  if (!(sfs instanceof DistributedFileSystem) ||
      !(tfs instanceof DistributedFileSystem)) {
    throw new IllegalArgumentException("The FileSystems needs to" +
        " be DistributedFileSystem for using snapshot-diff-based distcp");
  }
  final DistributedFileSystem sourceFs = (DistributedFileSystem) sfs;
  final DistributedFileSystem targetFs= (DistributedFileSystem) tfs;

  // make sure targetFS has no change between from and the current states
  if (!checkNoChange(inputOptions, targetFs, targetDir)) {
    // set the source path using the snapshot path
    inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir,
        inputOptions.getToSnapshot())));
    return false;
  }

  Path tmpDir = null;
  try {
    tmpDir = createTargetTmpDir(targetFs, targetDir);
    DiffInfo[] diffs = getDiffs(inputOptions, sourceFs, sourceDir, targetDir);
    if (diffs == null) {
      return false;
    }
    // do the real sync work: deletion and rename
    syncDiff(diffs, targetFs, tmpDir);
    return true;
  } catch (Exception e) {
    DistCp.LOG.warn("Failed to use snapshot diff for distcp", e);
    return false;
  } finally {
    deleteTargetTmpDir(targetFs, tmpDir);
    // TODO: since we have tmp directory, we can support "undo" with failures
    // set the source path using the snapshot path
    inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir,
        inputOptions.getToSnapshot())));
  }
}

Example 20

Source File: HBCKFsUtils.java From hbase-operator-tools with Apache License 2.0

2 votes

/**
 *
 * COPIED from CommonFSUtils.getRootDir
 *
 * @param c configuration
 * @return {@link Path} to hbase root directory from
 *     configuration as a qualified Path.
 * @throws IOException e
 */
public static Path getRootDir(final Configuration c) throws IOException {
  Path p = new Path(c.get(HConstants.HBASE_DIR));
  FileSystem fs = p.getFileSystem(c);
  return p.makeQualified(fs.getUri(), fs.getWorkingDirectory());
}