Java Code Examples for org.apache.hadoop.fs.Path#getFileSystem()

The following examples show how to use org.apache.hadoop.fs.Path#getFileSystem() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TezCommonUtils.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
/**
 * <p>
 * This function returns the staging directory defined in the config with
 * property name <code>TezConfiguration.TEZ_AM_STAGING_DIR</code>. If the
 * property is not defined in the conf, Tez uses the value defined as
 * <code>TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT</code>. In addition, the
 * function makes sure if the staging directory exists. If not, it creates the
 * directory with permission <code>TEZ_AM_DIR_PERMISSION</code>.
 * </p>
 * 
 * @param conf
 *          TEZ configuration
 * @return Fully qualified staging directory
 */
public static Path getTezBaseStagingPath(Configuration conf) {
  String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR,
      TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT);
  Path baseStagingDir;
  try {
    Path p = new Path(stagingDirStr);
    FileSystem fs = p.getFileSystem(conf);
    if (!fs.exists(p)) {
      mkDirForAM(fs, p);
      LOG.info("Stage directory " + p + " doesn't exist and is created");
    }
    baseStagingDir = fs.resolvePath(p);
  } catch (IOException e) {
    throw new TezUncheckedException(e);
  }
  return baseStagingDir;
}
 
Example 2
Source File: FileOutputCommitter.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public boolean needsTaskCommit(TaskAttemptContext context) 
throws IOException {
  try {
    Path taskOutputPath = getTempTaskOutputPath(context);
    if (taskOutputPath != null) {
      context.getProgressible().progress();
      // Get the file-system for the task output directory
      FileSystem fs = taskOutputPath.getFileSystem(context.getJobConf());
      // since task output path is created on demand, 
      // if it exists, task needs a commit
      if (fs.exists(taskOutputPath)) {
        return true;
      }
    }
  } catch (IOException  ioe) {
    throw ioe;
  }
  return false;
}
 
Example 3
Source File: TestCredentialProviderFactory.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testJksProvider() throws Exception {
  Configuration conf = new Configuration();
  final Path jksPath = new Path(tmpDir.toString(), "test.jks");
  final String ourUrl =
      JavaKeyStoreProvider.SCHEME_NAME + "://file" + jksPath.toUri();

  File file = new File(tmpDir, "test.jks");
  file.delete();
  conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, ourUrl);
  checkSpecificProvider(conf, ourUrl);
  Path path = ProviderUtils.unnestUri(new URI(ourUrl));
  FileSystem fs = path.getFileSystem(conf);
  FileStatus s = fs.getFileStatus(path);
  assertTrue(s.getPermission().toString().equals("rwx------"));
  assertTrue(file + " should exist", file.isFile());

  // check permission retention after explicit change
  fs.setPermission(path, new FsPermission("777"));
  checkPermissionRetention(conf, ourUrl, path);
}
 
Example 4
Source File: TeraOutputFormat.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public RecordWriter<Text,Text> getRecordWriter(TaskAttemptContext job
                                               ) throws IOException {
  Path file = getDefaultWorkFile(job, "");
  FileSystem fs = file.getFileSystem(job.getConfiguration());
   FSDataOutputStream fileOut = fs.create(file);
  return new TeraRecordWriter(fileOut, job);
}
 
Example 5
Source File: FSDownload.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Creates the cache loader for the status loading cache. This should be used
 * to create an instance of the status cache that is passed into the
 * FSDownload constructor.
 */
public static CacheLoader<Path,Future<FileStatus>>
    createStatusCacheLoader(final Configuration conf) {
  return new CacheLoader<Path,Future<FileStatus>>() {
    public Future<FileStatus> load(Path path) {
      try {
        FileSystem fs = path.getFileSystem(conf);
        return Futures.immediateFuture(fs.getFileStatus(path));
      } catch (Throwable th) {
        // report failures so it can be memoized
        return Futures.immediateFailedFuture(th);
      }
    }
  };
}
 
Example 6
Source File: TestParquetWriter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public void runTestAndValidate(String selection, String validationSelection, String inputTable, String outputFile, boolean sort) throws Exception {
  try {
    deleteTableIfExists(outputFile);
    test("use dfs_test");
//    test("ALTER SESSION SET \"planner.add_producer_consumer\" = false");
    String query = select(selection, inputTable, sort);
    System.out.println(outputFile);
    String create = "CREATE TABLE " + outputFile + " AS " + query;
    String validateQuery = select(validationSelection, outputFile, sort);
    test(create);
    test(validateQuery); // TODO: remove
    testBuilder()
        .unOrdered()
        .sqlQuery(validateQuery)
        .sqlBaselineQuery(query)
        .go();

    Configuration hadoopConf = new Configuration();
    Path output = new Path(getDfsTestTmpSchemaLocation(), outputFile);
    FileSystem fs = output.getFileSystem(hadoopConf);
    for (FileStatus file : fs.listStatus(output)) {
      ParquetMetadata footer = ParquetFileReader.readFooter(hadoopConf, file, SKIP_ROW_GROUPS);
      String version = footer.getFileMetaData().getKeyValueMetaData().get(DREMIO_VERSION_PROPERTY);
      assertEquals(DremioVersionInfo.getVersion(), version);
      PageHeaderUtil.validatePageHeaders(file.getPath(), footer);
    }
  } finally {
    deleteTableIfExists(outputFile);
  }
}
 
Example 7
Source File: TestJoinDatamerge.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static SequenceFile.Writer[] createWriters(Path testdir,
    Configuration conf, int srcs, Path[] src) throws IOException {
  for (int i = 0; i < srcs; ++i) {
    src[i] = new Path(testdir, Integer.toString(i + 10, 36));
  }
  SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
  for (int i = 0; i < srcs; ++i) {
    out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
        src[i], IntWritable.class, IntWritable.class);
  }
  return out;
}
 
Example 8
Source File: MultiFileSplit.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public String[] getLocations() throws IOException {
  HashSet<String> hostSet = new HashSet<String>();
  for (Path file : getPaths()) {
    FileSystem fs = file.getFileSystem(getJob());
    FileStatus status = fs.getFileStatus(file);
    BlockLocation[] blkLocations = fs.getFileBlockLocations(status,
                                        0, status.getLen());
    if (blkLocations != null && blkLocations.length > 0) {
      addToSet(hostSet, blkLocations[0].getHosts());
    }
  }
  return hostSet.toArray(new String[hostSet.size()]);
}
 
Example 9
Source File: TestDataJoin.java    From big-c with Apache License 2.0 5 votes vote down vote up
private static SequenceFile.Writer[] createWriters(Path testdir,
    JobConf conf, int srcs, Path[] src) throws IOException {
  for (int i = 0; i < srcs; ++i) {
    src[i] = new Path(testdir, Integer.toString(i + 10, 36));
  }
  SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
  for (int i = 0; i < srcs; ++i) {
    out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
        src[i], Text.class, Text.class);
  }
  return out;
}
 
Example 10
Source File: MRHiveDictUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private static long getFileSize(String hdfsUrl) throws IOException {
    Configuration configuration = new Configuration();
    Path path = new Path(hdfsUrl);
    FileSystem fs = path.getFileSystem(configuration);
    ContentSummary contentSummary = fs.getContentSummary(path);
    return contentSummary.getLength();
}
 
Example 11
Source File: DistRaid.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void cleanUp() {
  for (Codec codec: Codec.getCodecs()) {
    Path tmpdir = new Path(codec.tmpParityDirectory, this.getJobID());
    try {
      FileSystem fs = tmpdir.getFileSystem(jobconf);
      if (fs.exists(tmpdir)) {
        fs.delete(tmpdir, true);
      }
    } catch (IOException ioe) {
      LOG.error("Fail to delete " + tmpdir, ioe);
    }
  }
}
 
Example 12
Source File: AdmmIterationMapper.java    From laser with Apache License 2.0 5 votes vote down vote up
protected void setup(Context context) throws IOException,
		InterruptedException {
	conf = context.getConfiguration();
	iteration = Integer.parseInt(conf.get("iteration.number"));
	addIntercept = conf.getBoolean("add.intercept", false);
	rho = conf.getFloat("rho", DEFAULT_RHO);
	regularizationFactor = conf.getFloat("regularization.factor",
			DEFAULT_REGULARIZATION_FACTOR);
	previousIntermediateOutputLocation = conf
			.get("previous.intermediate.output.location");
	previousIntermediateOutputLocationPath = new Path(
			previousIntermediateOutputLocation);

	try {
		fs = previousIntermediateOutputLocationPath.getFileSystem(conf);
	} catch (IOException e) {
		LOG.info(e.toString());
	}

	lbfgs = new QNMinimizer();

	FileSplit split = (FileSplit) context.getInputSplit();
	splitId = split.getPath() + ":" + Long.toString(split.getStart())
			+ " - " + Long.toString(split.getLength());
	splitId = removeIpFromHdfsFileName(splitId);

	inputSplitData = new LinkedList<Vector>();
}
 
Example 13
Source File: ScanPerformanceEvaluation.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Override
public void setConf(Configuration conf) {
  super.setConf(conf);
  Path rootDir;
  try {
    rootDir = CommonFSUtils.getRootDir(conf);
    rootDir.getFileSystem(conf);
  } catch (IOException ex) {
    throw new RuntimeException(ex);
  }
}
 
Example 14
Source File: GenericMRLoadGenerator.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public int run(String [] argv) throws Exception {
  JobConf job = new JobConf(getConf());
  job.setJarByClass(GenericMRLoadGenerator.class);
  job.setMapperClass(SampleMapper.class);
  job.setReducerClass(SampleReducer.class);
  if (!parseArgs(argv, job)) {
    return -1;
  }

  if (null == FileOutputFormat.getOutputPath(job)) {
    // No output dir? No writes
    job.setOutputFormat(NullOutputFormat.class);
  }

  if (0 == FileInputFormat.getInputPaths(job).length) {
    // No input dir? Generate random data
    System.err.println("No input path; ignoring InputFormat");
    confRandom(job);
  } else if (null != job.getClass(
     org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FORMAT,
     null)) {
    // specified IndirectInputFormat? Build src list
    JobClient jClient = new JobClient(job);
    Path tmpDir = new Path(jClient.getFs().getHomeDirectory(), ".staging");
    Random r = new Random();
    Path indirInputFile = new Path(tmpDir,
        Integer.toString(r.nextInt(Integer.MAX_VALUE), 36) + "_files");
    job.set(
      org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FILE,
      indirInputFile.toString());
    SequenceFile.Writer writer = SequenceFile.createWriter(
        tmpDir.getFileSystem(job), job, indirInputFile,
        LongWritable.class, Text.class,
        SequenceFile.CompressionType.NONE);
    try {
      for (Path p : FileInputFormat.getInputPaths(job)) {
        FileSystem fs = p.getFileSystem(job);
        Stack<Path> pathstack = new Stack<Path>();
        pathstack.push(p);
        while (!pathstack.empty()) {
          for (FileStatus stat : fs.listStatus(pathstack.pop())) {
            if (stat.isDirectory()) {
              if (!stat.getPath().getName().startsWith("_")) {
                pathstack.push(stat.getPath());
              }
            } else {
              writer.sync();
              writer.append(new LongWritable(stat.getLen()),
                  new Text(stat.getPath().toUri().toString()));
            }
          }
        }
      }
    } finally {
      writer.close();
    }
  }

  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " +
                     (endTime.getTime() - startTime.getTime()) /1000 +
                     " seconds.");

  return 0;
}
 
Example 15
Source File: Cluster.java    From spork with Apache License 2.0 4 votes vote down vote up
public void copyFromLocalFile(Path local, Path destination, boolean overwrite)
    throws IOException {
  FileSystem fs = local.getFileSystem(configuration);
  fs.copyFromLocalFile(false, overwrite, local, destination);
}
 
Example 16
Source File: DelimitedTextInputFormat.java    From marklogic-contentpump with Apache License 2.0 4 votes vote down vote up
public List<InputSplit> getSplits(JobContext job) throws IOException {
    boolean delimSplit = isSplitInput(job.getConfiguration());
    //if delimSplit is true, size of each split is determined by 
    //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat
    List<InputSplit> splits = super.getSplits(job);
    if (!delimSplit) {
        return splits;
    }

    if (splits.size()>= SPLIT_COUNT_LIMIT) {
        //if #splits > 1 million, there is enough parallelism
        //therefore no point to split
        LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:"
            + SPLIT_COUNT_LIMIT);
        DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT);
        return splits;
    }
    // add header info into splits
    List<InputSplit> populatedSplits = new ArrayList<InputSplit>();
    LOG.info(splits.size() + " DelimitedSplits generated");
    Configuration conf = job.getConfiguration();
    char delimiter =0;
    ArrayList<Text> hlist = new ArrayList<Text>();
    for (InputSplit file: splits) {
        FileSplit fsplit = ((FileSplit)file);
        Path path = fsplit.getPath();
        FileSystem fs = path.getFileSystem(conf);
        
        if (fsplit.getStart() == 0) {
        // parse the inSplit, get the header
            FSDataInputStream fileIn = fs.open(path);

            String delimStr = conf.get(ConfigConstants.CONF_DELIMITER,
                ConfigConstants.DEFAULT_DELIMITER);
            if (delimStr.length() == 1) {
                delimiter = delimStr.charAt(0);
            } else {
                LOG.error("Incorrect delimitor: " + delimiter
                    + ". Expects single character.");
            }
            String encoding = conf.get(
                MarkLogicConstants.OUTPUT_CONTENT_ENCODING,
                MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING);
            InputStreamReader instream = new InputStreamReader(fileIn, encoding);
            CSVParser parser = new CSVParser(instream, CSVParserFormatter.
            		getFormat(delimiter, DelimitedTextReader.encapsulator,
            				true, true));
            Iterator<CSVRecord> it = parser.iterator();
            
            String[] header = null;
            if (it.hasNext()) {
            	CSVRecord record = (CSVRecord)it.next();
            	Iterator<String> recordIterator = record.iterator();
                int recordSize = record.size();
                header = new String[recordSize];
                for (int i = 0; i < recordSize; i++) {
                	if (recordIterator.hasNext()) {
                		header[i] = (String)recordIterator.next();
                	} else {
                		throw new IOException("Record size doesn't match the real size");
                	}
                }
                
                EncodingUtil.handleBOMUTF8(header, 0);
                
                hlist.clear();
                for (String s : header) {
                    hlist.add(new Text(s));
                }
            }
            instream.close();
        }
        
        DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(
            hlist.toArray(new Text[hlist.size()])), path,
            fsplit.getStart(), fsplit.getLength(),
            fsplit.getLocations());
        populatedSplits.add(ds);
    }
    
    return populatedSplits;
}
 
Example 17
Source File: TajoMasterClientService.java    From tajo with Apache License 2.0 4 votes vote down vote up
@Override
public TableResponse createExternalTable(RpcController controller, CreateTableRequest request)
    throws ServiceException {
  try {
    Session session = context.getSessionManager().getSession(request.getSessionId().getId());
    QueryContext queryContext = new QueryContext(conf, session);

    Path path = new Path(request.getPath());
    FileSystem fs = path.getFileSystem(conf);

    if (!fs.exists(path)) {
      throw new UnavailableTableLocationException(path.toString(), "no such a directory");
    }

    Schema schema = null;
    if (request.hasSchema()) {
      schema = SchemaFactory.newV1(request.getSchema());
    }

    TableMeta meta = new TableMeta(request.getMeta());
    PartitionMethodDesc partitionDesc = null;
    if (request.hasPartition()) {
      partitionDesc = new PartitionMethodDesc(request.getPartition());
    }

    TableDesc desc = context.getGlobalEngine().getDDLExecutor().getCreateTableExecutor().create(
        queryContext,
        request.getName(),
        null,
        schema,
        meta,
        path.toUri(),
        true,
        partitionDesc,
        false
    );

    return TableResponse.newBuilder()
        .setState(OK)
        .setTable(desc.getProto()).build();

  } catch (Throwable t) {
    printStackTraceIfError(LOG, t);
    return TableResponse.newBuilder()
        .setState(returnError(t))
        .build();
  }
}
 
Example 18
Source File: CustomOutputCommitter.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void writeFile(JobConf conf , String filename) throws IOException {
  System.out.println("writing file ----" + filename);
  Path outputPath = FileOutputFormat.getOutputPath(conf);
  FileSystem fs = outputPath.getFileSystem(conf);
  fs.create(new Path(outputPath, filename)).close();
}
 
Example 19
Source File: DistCpSync.java    From big-c with Apache License 2.0 4 votes vote down vote up
static boolean sync(DistCpOptions inputOptions, Configuration conf)
    throws IOException {
  List<Path> sourcePaths = inputOptions.getSourcePaths();
  if (sourcePaths.size() != 1) {
    // we only support one source dir which must be a snapshottable directory
    throw new IllegalArgumentException(sourcePaths.size()
        + " source paths are provided");
  }
  final Path sourceDir = sourcePaths.get(0);
  final Path targetDir = inputOptions.getTargetPath();

  final FileSystem sfs = sourceDir.getFileSystem(conf);
  final FileSystem tfs = targetDir.getFileSystem(conf);
  // currently we require both the source and the target file system are
  // DistributedFileSystem.
  if (!(sfs instanceof DistributedFileSystem) ||
      !(tfs instanceof DistributedFileSystem)) {
    throw new IllegalArgumentException("The FileSystems needs to" +
        " be DistributedFileSystem for using snapshot-diff-based distcp");
  }
  final DistributedFileSystem sourceFs = (DistributedFileSystem) sfs;
  final DistributedFileSystem targetFs= (DistributedFileSystem) tfs;

  // make sure targetFS has no change between from and the current states
  if (!checkNoChange(inputOptions, targetFs, targetDir)) {
    // set the source path using the snapshot path
    inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir,
        inputOptions.getToSnapshot())));
    return false;
  }

  Path tmpDir = null;
  try {
    tmpDir = createTargetTmpDir(targetFs, targetDir);
    DiffInfo[] diffs = getDiffs(inputOptions, sourceFs, sourceDir, targetDir);
    if (diffs == null) {
      return false;
    }
    // do the real sync work: deletion and rename
    syncDiff(diffs, targetFs, tmpDir);
    return true;
  } catch (Exception e) {
    DistCp.LOG.warn("Failed to use snapshot diff for distcp", e);
    return false;
  } finally {
    deleteTargetTmpDir(targetFs, tmpDir);
    // TODO: since we have tmp directory, we can support "undo" with failures
    // set the source path using the snapshot path
    inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir,
        inputOptions.getToSnapshot())));
  }
}
 
Example 20
Source File: HBCKFsUtils.java    From hbase-operator-tools with Apache License 2.0 2 votes vote down vote up
/**
 *
 * COPIED from CommonFSUtils.getRootDir
 *
 * @param c configuration
 * @return {@link Path} to hbase root directory from
 *     configuration as a qualified Path.
 * @throws IOException e
 */
public static Path getRootDir(final Configuration c) throws IOException {
  Path p = new Path(c.get(HConstants.HBASE_DIR));
  FileSystem fs = p.getFileSystem(c);
  return p.makeQualified(fs.getUri(), fs.getWorkingDirectory());
}