Java Code Examples for org.apache.hadoop.fs.FileSystem#getLocal()

The following examples show how to use org.apache.hadoop.fs.FileSystem#getLocal() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMerge.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
/**
 * Return true if there's a file in 'dirName' with a line that starts with
 * 'prefix'.
 */
protected boolean recordStartsWith(List<Integer> record, String dirName,
    SqoopOptions.FileLayout fileLayout)
    throws Exception {
  Path warehousePath = new Path(LOCAL_WAREHOUSE_DIR);
  Path targetPath = new Path(warehousePath, dirName);

  FileSystem fs = FileSystem.getLocal(new Configuration());
  FileStatus [] files = fs.listStatus(targetPath);

  if (null == files || files.length == 0) {
    fail("Got no import files!");
  }

  for (FileStatus stat : files) {
    Path p = stat.getPath();
    if (p.getName().startsWith("part-")) {
      if (checkFileForLine(fs, p, fileLayout, record)) {
        // We found the line. Nothing further to do.
        return true;
      }
    }
  }

  return false;
}
 
Example 2
Source File: TestMerge.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void copyPartitions(Path mapOutputPath, Path indexPath)
  throws IOException {
  FileSystem localFs = FileSystem.getLocal(jobConf);
  FileSystem rfs = ((LocalFileSystem)localFs).getRaw();
  FSDataOutputStream rawOutput = rfs.create(mapOutputPath, true, BUF_SIZE);
  SpillRecord spillRecord = new SpillRecord(numberOfPartitions);
  IndexRecord indexRecord = new IndexRecord();
  for (int i = 0; i < numberOfPartitions; i++) {
    indexRecord.startOffset = rawOutput.getPos();
    byte buffer[] = outStreams[i].toByteArray();
    IFileOutputStream checksumOutput = new IFileOutputStream(rawOutput);
    checksumOutput.write(buffer);
    // Write checksum.
    checksumOutput.finish();
    // Write index record
    indexRecord.rawLength = (long)buffer.length;
    indexRecord.partLength = rawOutput.getPos() - indexRecord.startOffset;
    spillRecord.putIndex(indexRecord, i);
    reporter.progress();
  }
  rawOutput.close();
  spillRecord.writeToFile(indexPath, jobConf);
}
 
Example 3
Source File: TestBloomMapFile.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public void setUp() throws Exception {
  LocalFileSystem fs = FileSystem.getLocal(conf);
  if (fs.exists(TEST_ROOT) && !fs.delete(TEST_ROOT, true)) {
    Assert.fail("Can't clean up test root dir");
  }
  fs.mkdirs(TEST_ROOT);
}
 
Example 4
Source File: TestViewFileSystemWithAuthorityLocalFileSystem.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
@Before
public void setUp() throws Exception {
  // create the test root on local_fs
  fsTarget = FileSystem.getLocal(new Configuration());
  super.setUp(); // this sets up conf (and fcView which we replace)

  // Now create a viewfs using a mount table called "default"
  // hence viewfs://default/
  schemeWithAuthority = 
    new URI(FsConstants.VIEWFS_SCHEME, "default", "/", null, null);
  fsView = FileSystem.get(schemeWithAuthority, conf);
}
 
Example 5
Source File: TestLineRecordReaderJobs.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Writes the input test file
 *
 * @param conf
 * @throws IOException
 */
public void createInputFile(Configuration conf) throws IOException {
  FileSystem localFs = FileSystem.getLocal(conf);
  Path file = new Path(inputDir, "test.txt");
  Writer writer = new OutputStreamWriter(localFs.create(file));
  writer.write("abc\ndef\t\nghi\njkl");
  writer.close();
}
 
Example 6
Source File: TestHistograms.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * @throws IOException
 * 
 *           There should be files in the directory named by
 *           ${test.build.data}/rumen/histogram-test .
 * 
 *           There will be pairs of files, inputXxx.json and goldXxx.json .
 * 
 *           We read the input file as a HistogramRawTestData in json. Then we
 *           create a Histogram using the data field, and then a
 *           LoggedDiscreteCDF using the percentiles and scale field. Finally,
 *           we read the corresponding goldXxx.json as a LoggedDiscreteCDF and
 *           deepCompare them.
 */
@Test
public void testHistograms() throws IOException {
  final Configuration conf = new Configuration();
  final FileSystem lfs = FileSystem.getLocal(conf);
  final Path rootInputDir = new Path(
      System.getProperty("test.tools.input.dir", "")).makeQualified(lfs);
  final Path rootInputFile = new Path(rootInputDir, "rumen/histogram-tests");


  FileStatus[] tests = lfs.listStatus(rootInputFile);

  for (int i = 0; i < tests.length; ++i) {
    Path filePath = tests[i].getPath();
    String fileName = filePath.getName();
    if (fileName.startsWith("input")) {
      String testName = fileName.substring("input".length());
      Path goldFilePath = new Path(rootInputFile, "gold"+testName);
      assertTrue("Gold file dies not exist", lfs.exists(goldFilePath));
      LoggedDiscreteCDF newResult = histogramFileToCDF(filePath, lfs);
      System.out.println("Testing a Histogram for " + fileName);
      FSDataInputStream goldStream = lfs.open(goldFilePath);
      JsonObjectMapperParser<LoggedDiscreteCDF> parser = new JsonObjectMapperParser<LoggedDiscreteCDF>(
          goldStream, LoggedDiscreteCDF.class); 
      try {
        LoggedDiscreteCDF dcdf = parser.getNext();
        dcdf.deepCompare(newResult, new TreePath(null, "<root>"));
      } catch (DeepInequalityException e) {
        fail(e.path.toString());
      }
      finally {
          parser.close();
      }
    }
  }
}
 
Example 7
Source File: CommitSequenceTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public void setUp() throws IOException {
  this.fs = FileSystem.getLocal(new Configuration());

  this.fs.delete(new Path(ROOT_DIR), true);

  Path storeRootDir = new Path(ROOT_DIR, "store");

  Path dir1 = new Path(ROOT_DIR, "dir1");
  Path dir2 = new Path(ROOT_DIR, "dir2");

  this.fs.mkdirs(dir1);
  this.fs.mkdirs(dir2);

  Path src1 = new Path(dir1, "file1");
  Path src2 = new Path(dir2, "file2");
  Path dst1 = new Path(dir2, "file1");
  Path dst2 = new Path(dir1, "file2");
  this.fs.createNewFile(src1);
  this.fs.createNewFile(src2);

  DatasetState ds = new DatasetState("job-name", "job-id");
  ds.setDatasetUrn("urn");
  ds.setNoJobFailure();

  State state = new State();
  state.setProp(ConfigurationKeys.STATE_STORE_ROOT_DIR_KEY, storeRootDir.toString());

  this.sequence = new CommitSequence.Builder().withJobName("testjob").withDatasetUrn("testurn")
      .beginStep(FsRenameCommitStep.Builder.class).from(src1).to(dst1).withProps(state).endStep()
      .beginStep(FsRenameCommitStep.Builder.class).from(src2).to(dst2).withProps(state).endStep()
      .beginStep(DatasetStateCommitStep.Builder.class).withDatasetUrn("urn").withDatasetState(ds).withProps(state)
      .endStep().build();
}
 
Example 8
Source File: LoopingDatasetFinderSourceTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@AfterClass
public void tearDown()
    throws IOException {
  FileSystem fs = FileSystem.getLocal(new Configuration(false));
  Path rootDir = new Path(TEST_STATE_STORE_ROOT_DIR);
  if (fs.exists(rootDir)) {
    fs.delete(rootDir, true);
  }
}
 
Example 9
Source File: TestAvroStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
private void verifyResults(String outPath, String expectedOutpath, String expectedCodec) throws IOException {

        FileSystem fs = FileSystem.getLocal(new Configuration()) ;

        /* read in expected results*/
        Set<Object> expected = getExpected (expectedOutpath);

        /* read in output results and compare */
        Path output = new Path(outPath);
        assertTrue("Output dir does not exists!", fs.exists(output)
                && fs.getFileStatus(output).isDir());

        Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
        assertTrue("Split field dirs not found!", paths != null);

        for (Path path : paths) {
          Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
          assertTrue("No files found for path: " + path.toUri().getPath(),
                  files != null);
          for (Path filePath : files) {
            assertTrue("This shouldn't be a directory", fs.isFile(filePath));

            GenericDatumReader<Object> reader = new GenericDatumReader<Object>();

            DataFileStream<Object> in = new DataFileStream<Object>(
                                            fs.open(filePath), reader);
            assertEquals("codec", expectedCodec, in.getMetaString("avro.codec"));
            int count = 0;
            while (in.hasNext()) {
                Object obj = in.next();
                //System.out.println("obj = " + (GenericData.Array<Float>)obj);
                assertTrue("Avro result object found that's not expected: " + obj, expected.contains(obj));
                count++;
            }
            in.close();
            assertEquals(expected.size(), count);
          }
        }
      }
 
Example 10
Source File: NMLeveldbStateStoreService.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private Path createStorageDir(Configuration conf) throws IOException {
  final String storeUri = conf.get(YarnConfiguration.NM_RECOVERY_DIR);
  if (storeUri == null) {
    throw new IOException("No store location directory configured in " +
        YarnConfiguration.NM_RECOVERY_DIR);
  }

  Path root = new Path(storeUri, DB_NAME);
  FileSystem fs = FileSystem.getLocal(conf);
  fs.mkdirs(root, new FsPermission((short)0700));
  return root;
}
 
Example 11
Source File: TestDelegationTokenRemoteFetcher.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Before
public void init() throws Exception {
  conf = new Configuration();
  fileSys = FileSystem.getLocal(conf);
  httpPort = NetUtils.getFreeSocketPort();
  serviceUrl = new URI("http://localhost:" + httpPort);
  testToken = createToken(serviceUrl);
}
 
Example 12
Source File: TestMROutput.java    From tez with Apache License 2.0 5 votes vote down vote up
public static LogicalIOProcessorRuntimeTask createLogicalTask(
    Configuration conf,
    TezUmbilical umbilical, String dagName,
    String vertexName, TezExecutors sharedExecutor) throws Exception {
  ProcessorDescriptor procDesc = ProcessorDescriptor.create(TestProcessor.class.getName());
  List<InputSpec> inputSpecs = Lists.newLinkedList();
  List<OutputSpec> outputSpecs = Lists.newLinkedList();
  outputSpecs.add(new OutputSpec("Null",
      MROutput.createConfigBuilder(conf, TestOutputFormat.class).build().getOutputDescriptor(), 1));
  
  TaskSpec taskSpec = new TaskSpec(
      TezTestUtils.getMockTaskAttemptId(0, 0, 0, 0),
      dagName, vertexName, -1,
      procDesc,
      inputSpecs,
      outputSpecs, null, null);

  FileSystem fs = FileSystem.getLocal(conf);
  Path workDir =
      new Path(new Path(System.getProperty("test.build.data", "/tmp")),
               "TestMapOutput").makeQualified(fs.getUri(), fs.getWorkingDirectory());

  return new LogicalIOProcessorRuntimeTask(
      taskSpec,
      0,
      conf,
      new String[] {workDir.toString()},
      umbilical,
      null,
      new HashMap<String, String>(),
      HashMultimap.<String, String>create(), null, "", new ExecutionContextImpl("localhost"),
      Runtime.getRuntime().maxMemory(), true, new DefaultHadoopShim(), sharedExecutor);
}
 
Example 13
Source File: UpgradeUtilities.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public static void createFederatedNameNodeStorageDirs(String[] parents) 
    throws Exception {
  LocalFileSystem localFS = FileSystem.getLocal(new Configuration());
  for (int i = 0; i < parents.length; i++) {
    File newDir = new File(parents[i]);
    createEmptyDirs(new String[] {newDir.toString()});
    localFS.copyToLocalFile(new Path(namenodeStorage.toString()),
        new Path(newDir.toString()),
        false);
  }
}
 
Example 14
Source File: IsolationRunner.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Run a single task
 * @param args the first argument is the task directory
 */
public static void main(String[] args
                        ) throws ClassNotFoundException, IOException, 
                                 InterruptedException {
  if (args.length != 1) {
    System.out.println("Usage: IsolationRunner <path>/job.xml");
    System.exit(1);
  }
  File jobFilename = new File(args[0]);
  if (!jobFilename.exists() || !jobFilename.isFile()) {
    System.out.println(jobFilename + " is not a valid job file.");
    System.exit(1);
  }
  JobConf conf = new JobConf(new Path(jobFilename.toString()));
  TaskAttemptID taskId = TaskAttemptID.forName(conf.get("mapred.task.id"));
  boolean isMap = conf.getBoolean("mapred.task.is.map", true);
  int partition = conf.getInt("mapred.task.partition", 0);
  
  // setup the local and user working directories
  FileSystem local = FileSystem.getLocal(conf);
  LocalDirAllocator lDirAlloc = new LocalDirAllocator("mapred.local.dir");
  File workDirName = new File(lDirAlloc.getLocalPathToRead(
                                TaskTracker.getLocalTaskDir(
                                  taskId.getJobID().toString(), 
                                  taskId.toString())
                                + Path.SEPARATOR + "work",
                                conf). toString());
  local.setWorkingDirectory(new Path(workDirName.toString()));
  FileSystem.get(conf).setWorkingDirectory(conf.getWorkingDirectory());
  
  // set up a classloader with the right classpath
  ClassLoader classLoader = makeClassLoader(conf, workDirName);
  Thread.currentThread().setContextClassLoader(classLoader);
  conf.setClassLoader(classLoader);
  
  Task task;
  if (isMap) {
    Path localSplit = new Path(new Path(jobFilename.toString()).getParent(), 
                               "split.dta");
    DataInputStream splitFile = FileSystem.getLocal(conf).open(localSplit);
    String splitClass = Text.readString(splitFile);
    BytesWritable split = new BytesWritable();
    split.readFields(splitFile);
    splitFile.close();
    task = new MapTask(jobFilename.toString(), taskId, partition, 
                       splitClass, split, 1, conf.getUser());
  } else {
    int numMaps = conf.getNumMapTasks();
    fillInMissingMapOutputs(local, taskId, numMaps, conf);
    task = new ReduceTask(jobFilename.toString(), taskId, partition, numMaps, 
                          1, conf.getUser());
  }
  task.setConf(conf);
  task.run(conf, new FakeUmbilical());
}
 
Example 15
Source File: TestMerger.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Before
public void setup() throws IOException {
  conf = new Configuration();
  jobConf = new JobConf();
  fs = FileSystem.getLocal(conf);
}
 
Example 16
Source File: BenchmarkThroughput.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
public int run(String[] args) throws IOException {
  // silence the minidfs cluster
  Log hadoopLog = LogFactory.getLog("org");
  if (hadoopLog instanceof Log4JLogger) {
    ((Log4JLogger) hadoopLog).getLogger().setLevel(Level.WARN);
  }
  int reps = 1;
  if (args.length == 1) {
    try {
      reps = Integer.parseInt(args[0]);
    } catch (NumberFormatException e) {
      printUsage();
      return -1;
    }
  } else if (args.length > 1) {
    printUsage();
    return -1;
  }
  Configuration conf = getConf();
  // the size of the file to write
  long SIZE = conf.getLong("dfsthroughput.file.size",
      10L * 1024 * 1024 * 1024);
  BUFFER_SIZE = conf.getInt("dfsthroughput.buffer.size", 4 * 1024);

  String localDir = conf.get("mapred.temp.dir");
  dir = new LocalDirAllocator("mapred.temp.dir");

  System.setProperty("test.build.data", localDir);
  System.out.println("Local = " + localDir);
  ChecksumFileSystem checkedLocal = FileSystem.getLocal(conf);
  FileSystem rawLocal = checkedLocal.getRawFileSystem();
  for(int i=0; i < reps; ++i) {
    writeAndReadLocalFile("local", conf, SIZE);
    writeAndReadFile(rawLocal, "raw", conf, SIZE);
    writeAndReadFile(checkedLocal, "checked", conf, SIZE);
  }
  MiniDFSCluster cluster = null;
  try {
    cluster = new MiniDFSCluster(conf, 1, true, new String[]{"/foo"});
    cluster.waitActive();
    FileSystem dfs = cluster.getFileSystem();
    for(int i=0; i < reps; ++i) {
      writeAndReadFile(dfs, "dfs", conf, SIZE);
    }
  } finally {
    if (cluster != null) {
      cluster.shutdown();
      // clean up minidfs junk
      rawLocal.delete(new Path(localDir, "dfs"), true);
    }
  }
  return 0;
}
 
Example 17
Source File: TestInputFormatColumnProjection.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testProjectionSize() throws Exception {
  Assume.assumeTrue( // only run this test for Hadoop 2
      org.apache.hadoop.mapreduce.JobContext.class.isInterface());

  File inputFile = temp.newFile();
  FileOutputStream out = new FileOutputStream(inputFile);
  out.write(FILE_CONTENT.getBytes("UTF-8"));
  out.close();

  File tempFolder = temp.newFolder();
  tempFolder.delete();
  Path tempPath = new Path(tempFolder.toURI());

  File outputFolder = temp.newFile();
  outputFolder.delete();

  Configuration conf = new Configuration();
  // set the projection schema
  conf.set("parquet.read.schema", Types.buildMessage()
      .required(BINARY).as(UTF8).named("char")
      .named("FormatTestObject").toString());

  // disable summary metadata, it isn't needed
  conf.set("parquet.enable.summary-metadata", "false");
  conf.set("parquet.example.schema", PARQUET_TYPE.toString());

  {
    Job writeJob = new Job(conf, "write");
    writeJob.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(writeJob, new Path(inputFile.toString()));

    writeJob.setOutputFormatClass(ExampleOutputFormat.class);
    writeJob.setMapperClass(Writer.class);
    writeJob.setNumReduceTasks(0); // write directly to Parquet without reduce
    ParquetOutputFormat.setBlockSize(writeJob, 10240);
    ParquetOutputFormat.setPageSize(writeJob, 512);
    ParquetOutputFormat.setDictionaryPageSize(writeJob, 1024);
    ParquetOutputFormat.setEnableDictionary(writeJob, true);
    ParquetOutputFormat.setMaxPaddingSize(writeJob, 1023); // always pad
    ParquetOutputFormat.setOutputPath(writeJob, tempPath);

    waitForJob(writeJob);
  }

  long bytesWritten = 0;
  FileSystem fs = FileSystem.getLocal(conf);
  for (FileStatus file : fs.listStatus(tempPath)) {
    bytesWritten += file.getLen();
  }

  long bytesRead;
  {
    Job readJob = new Job(conf, "read");
    readJob.setInputFormatClass(ExampleInputFormat.class);
    TextInputFormat.addInputPath(readJob, tempPath);

    readJob.setOutputFormatClass(TextOutputFormat.class);
    readJob.setMapperClass(Reader.class);
    readJob.setNumReduceTasks(0); // no reduce phase
    TextOutputFormat.setOutputPath(readJob, new Path(outputFolder.toString()));

    waitForJob(readJob);

    bytesRead = Reader.bytesReadCounter.getValue();
  }

  Assert.assertTrue("Should read less than 10% of the input file size",
      bytesRead < (bytesWritten / 10));
}
 
Example 18
Source File: MockRemoteDirectoryManager.java    From submarine with Apache License 2.0 4 votes vote down vote up
@Override
public FileSystem getDefaultFileSystem() throws IOException {
  return FileSystem.getLocal(new Configuration());
}
 
Example 19
Source File: AvroTestToolsTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
public void test() throws Exception {
  DataTestTools testTools = new AvroTestTools();

  String resourceName = "avroWriterTest";

  File tmpDir = Files.createTempDir();

  FileSystem fs = FileSystem.getLocal(new Configuration());
  Path output = new Path(tmpDir.getAbsolutePath(), "test");

  testTools.writeJsonResourceRecordsAsBinary(resourceName, fs, output, null);

  Assert.assertTrue(testTools.checkSameFilesAndRecords(testTools.readAllRecordsInJsonResource(resourceName, null),
      testTools.readAllRecordsInBinaryDirectory(fs, output), false, null, true));
}
 
Example 20
Source File: LocalFSContract.java    From hadoop with Apache License 2.0 2 votes vote down vote up
/**
 * Get the local filesystem. This may be overridden
 * @return the filesystem
 * @throws IOException
 */
protected FileSystem getLocalFS() throws IOException {
  return FileSystem.getLocal(getConf());
}