Java Code Examples for org.apache.hadoop.fs.Path

The following examples show how to use org.apache.hadoop.fs.Path. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: s3mper   Source File: ConsistentListingAspect.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Check the the metastore listing against the s3 listing and return any paths 
 * missing from s3.
 * 
 * @param metastoreListing
 * @param s3Listing
 * @return 
 */
private List<Path> checkListing(List<FileInfo> metastoreListing, FileStatus [] s3Listing) {
    Map<String, FileStatus> s3paths = new HashMap<String, FileStatus>();
        
    if(s3Listing != null) {
        for (FileStatus fileStatus : s3Listing) {
            s3paths.put(fileStatus.getPath().toUri().normalize().getSchemeSpecificPart(), fileStatus);
        }
    }

    List<Path> missingPaths = new ArrayList<Path>();

    for (FileInfo f : metastoreListing) {
        if(f.isDeleted()) {
            continue;
        }
        
        if (!s3paths.containsKey(f.getPath().toUri().normalize().getSchemeSpecificPart())) {
            missingPaths.add(f.getPath());
        }
    }
    
    return missingPaths;
}
 
Example 2
Source Project: hadoop-ozone   Source File: TestHadoopDirTreeGenerator.java    License: Apache License 2.0 6 votes vote down vote up
private int traverseToLeaf(FileSystem fs, Path dirPath, int depth,
                           int expectedDepth, int expectedSpanCnt,
                           int expectedFileCnt, int perFileSizeInBytes)
        throws IOException {
  FileStatus[] fileStatuses = fs.listStatus(dirPath);
  // check the num of peer directories except root and leaf as both
  // has less dirs.
  if (depth < expectedDepth - 1) {
    verifyActualSpan(expectedSpanCnt, fileStatuses);
  }
  int actualNumFiles = 0;
  for (FileStatus fileStatus : fileStatuses) {
    if (fileStatus.isDirectory()) {
      ++depth;
      return traverseToLeaf(fs, fileStatus.getPath(), depth, expectedDepth,
              expectedSpanCnt, expectedFileCnt, perFileSizeInBytes);
    } else {
      Assert.assertEquals("Mismatches file len",
              perFileSizeInBytes, fileStatus.getLen());
      actualNumFiles++;
    }
  }
  Assert.assertEquals("Mismatches files count in a directory",
          expectedFileCnt, actualNumFiles);
  return depth;
}
 
Example 3
Source Project: hadoop   Source File: UtilsForTests.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Configure a waiting job
 */
static void configureWaitingJobConf(JobConf jobConf, Path inDir,
                                    Path outputPath, int numMaps, int numRed,
                                    String jobName, String mapSignalFilename,
                                    String redSignalFilename)
throws IOException {
  jobConf.setJobName(jobName);
  jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  FileInputFormat.setInputPaths(jobConf, inDir);
  FileOutputFormat.setOutputPath(jobConf, outputPath);
  jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class);
  jobConf.setReducerClass(IdentityReducer.class);
  jobConf.setOutputKeyClass(BytesWritable.class);
  jobConf.setOutputValueClass(BytesWritable.class);
  jobConf.setInputFormat(RandomInputFormat.class);
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numRed);
  jobConf.setJar("build/test/mapred/testjar/testjob.jar");
  jobConf.set(getTaskSignalParameter(true), mapSignalFilename);
  jobConf.set(getTaskSignalParameter(false), redSignalFilename);
}
 
Example 4
Source Project: components   Source File: SimpleFileIODatasetRuntimeTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGetSampleWithSpecialPath() throws Exception {
    RecordSet rs = getSimpleTestData(0);
    writeRandomCsvFile(mini.getFs(), "/user/test/Marketing Customer Contacts US.CSV", rs, "UTF-8");
    String fileSpec = mini.getFs().getUri().resolve(new Path("/user/test/Marketing Customer Contacts US.CSV").toUri()).toString();
    //the method above will escape it, so make it back here as the customer set the path, should not escape one
    fileSpec = fileSpec.replace("%20", " ");
    
    // Configure the component.
    SimpleFileIODatasetProperties props = createDatasetProperties();
    props.format.setValue(SimpleFileIOFormat.CSV);
    props.path.setValue(fileSpec);

    final List<IndexedRecord> actual = getSample(props,Integer.MAX_VALUE);

    assertThat(actual, hasSize(10));
}
 
Example 5
Source Project: incubator-gobblin   Source File: HiveTargetPathHelper.java    License: Apache License 2.0 6 votes vote down vote up
public HiveTargetPathHelper(HiveDataset dataset) {

    this.dataset = dataset;
    this.relocateDataFiles = Boolean
        .valueOf(this.dataset.getProperties().getProperty(RELOCATE_DATA_FILES_KEY, DEFAULT_RELOCATE_DATA_FILES));
    this.targetTableRoot = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_ROOT)
        ? Optional.of(resolvePath(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_ROOT),
        this.dataset.getTable().getDbName(), this.dataset.getTable().getTableName()))
        : Optional.<Path> absent();

    this.targetTablePrefixTobeReplaced =
        this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED)
            ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED)))
            : Optional.<Path> absent();

    this.targetTablePrefixReplacement = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_REPLACEMENT)
        ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_REPLACEMENT)))
        : Optional.<Path> absent();
  }
 
Example 6
Source Project: presto   Source File: TestRubixCaching.java    License: Apache License 2.0 6 votes vote down vote up
private FileSystem getCachingFileSystem(HdfsContext context, Path path)
        throws IOException
{
    HdfsConfigurationInitializer configurationInitializer = new HdfsConfigurationInitializer(config, ImmutableSet.of());
    HiveHdfsConfiguration configuration = new HiveHdfsConfiguration(
            configurationInitializer,
            ImmutableSet.of(
                    rubixConfigInitializer,
                    (dynamicConfig, ignoredContext, ignoredUri) -> {
                        dynamicConfig.set("fs.file.impl", CachingLocalFileSystem.class.getName());
                        dynamicConfig.setBoolean("fs.gs.lazy.init.enable", true);
                        dynamicConfig.set("fs.azure.account.key", "Zm9vCg==");
                        dynamicConfig.set("fs.adl.oauth2.client.id", "test");
                        dynamicConfig.set("fs.adl.oauth2.refresh.url", "http://localhost");
                        dynamicConfig.set("fs.adl.oauth2.credential", "password");
                    }));
    HdfsEnvironment environment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication());
    return environment.getFileSystem(context, path);
}
 
Example 7
Source Project: hbase   Source File: TestStoreFileInfo.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testOpenErrorMessageHFileLink() throws IOException, IllegalStateException {
  // Test file link exception
  // Try to open nonsense hfilelink. Make sure exception is from HFileLink.
  Path p = new Path("/hbase/test/0123/cf/testtb=4567-abcd");
  try (FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration())) {
    StoreFileInfo sfi = new StoreFileInfo(TEST_UTIL.getConfiguration(), fs, p, true);
    try {
      ReaderContext context = sfi.createReaderContext(false, 1000, ReaderType.PREAD);
      sfi.createReader(context, null);
      throw new IllegalStateException();
    } catch (FileNotFoundException fnfe) {
      assertTrue(fnfe.getMessage().contains(HFileLink.class.getSimpleName()));
    }
  }
}
 
Example 8
Source Project: hadoop   Source File: BenchmarkThroughput.java    License: Apache License 2.0 6 votes vote down vote up
private void writeAndReadFile(FileSystem fs,
                                     String name,
                                     Configuration conf,
                                     long size
                                     ) throws IOException {
  Path f = null;
  try {
    f = writeFile(fs, name, conf, size);
    readFile(fs, f, name, conf);
  } finally {
    try {
      if (f != null) {
        fs.delete(f, true);
      }
    } catch (IOException ie) {
      // IGNORE
    }
  }
}
 
Example 9
Source Project: Bats   Source File: ParquetRecordWriter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void abort() throws IOException {
  List<String> errors = Lists.newArrayList();
  for (Path location : cleanUpLocations) {
    try {
      if (fs.exists(location)) {
        fs.delete(location, true);
        logger.info("Aborting writer. Location [{}] on file system [{}] is deleted.",
            location.toUri().getPath(), fs.getUri());
      }
    } catch (IOException e) {
      errors.add(location.toUri().getPath());
      logger.error("Failed to delete location [{}] on file system [{}].",
          location, fs.getUri(), e);
    }
  }
  if (!errors.isEmpty()) {
    throw new IOException(String.format("Failed to delete the following locations %s on file system [%s]" +
        " during aborting writer", errors, fs.getUri()));
  }
}
 
Example 10
Source Project: RDFS   Source File: TestHftpFileSystem.java    License: Apache License 2.0 6 votes vote down vote up
public void readHftpFile(
  boolean strictContentLength, boolean sendContentLength
)
  throws IOException, URISyntaxException {
  int bufSize = 128 * 1024;
  byte[] buf = DFSTestUtil.generateSequentialBytes(0, bufSize);
  final ByteArrayInputStream inputStream = new ByteArrayInputStream(buf);
  final long contentLength = bufSize + 1;
  Configuration conf = new Configuration();

  conf.setBoolean(HftpFileSystem.STRICT_CONTENT_LENGTH, strictContentLength);

  HftpFileSystem fileSystem =
    new MockHftpFileSystem(
      sendContentLength ? contentLength : null, inputStream, conf
    );
  FSDataInputStream dataInputStream = fileSystem.open(new Path("dont-care"));
  byte[] readBuf = new byte[1024];

  while (dataInputStream.read(readBuf) > -1) {
    //nothing
  }

  dataInputStream.close();
}
 
Example 11
Source Project: hadoop   Source File: TestHadoopArchives.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGlobFiles() throws Exception {
  final Path sub1 = new Path(inputPath, "dir1");
  final Path sub2 = new Path(inputPath, "dir2");
  fs.mkdirs(sub1);
  String fileName = "a";
  createFile(inputPath, fs, sub1.getName(), fileName);
  createFile(inputPath, fs, sub2.getName(), fileName);
  createFile(inputPath, fs, sub1.getName(), "b"); // not part of result

  final String glob =  "dir{1,2}/a";
  final FsShell shell = new FsShell(conf);
  final List<String> originalPaths = lsr(shell, inputPath.toString(),
      inputPath + "/" + glob);
  System.out.println("originalPaths: " + originalPaths);

  // make the archive:
  final String fullHarPathStr = makeArchive(inputPath, glob);

  // compare results:
  final List<String> harPaths = lsr(shell, fullHarPathStr,
      fullHarPathStr + "/" + glob);
  Assert.assertEquals(originalPaths, harPaths);
}
 
Example 12
Source Project: big-c   Source File: TestHadoopArchives.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSingleFile() throws Exception {
  final Path sub1 = new Path(inputPath, "dir1");
  fs.mkdirs(sub1);
  String singleFileName = "a";
  createFile(inputPath, fs, sub1.getName(), singleFileName);
  final FsShell shell = new FsShell(conf);

  final List<String> originalPaths = lsr(shell, sub1.toString());
  System.out.println("originalPaths: " + originalPaths);

  // make the archive:
  final String fullHarPathStr = makeArchive(sub1, singleFileName);

  // compare results:
  final List<String> harPaths = lsr(shell, fullHarPathStr);
  Assert.assertEquals(originalPaths, harPaths);
}
 
Example 13
Source Project: big-c   Source File: TestJobCleanup.java    License: Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void setUp() throws IOException {
  JobConf conf = new JobConf();
  fileSys = FileSystem.get(conf);
  fileSys.delete(new Path(TEST_ROOT_DIR), true);
  conf.set("mapred.job.tracker.handler.count", "1");
  conf.set("mapred.job.tracker", "127.0.0.1:0");
  conf.set("mapred.job.tracker.http.address", "127.0.0.1:0");
  conf.set("mapred.task.tracker.http.address", "127.0.0.1:0");
  conf.set(JHAdminConfig.MR_HISTORY_INTERMEDIATE_DONE_DIR, TEST_ROOT_DIR +
    "/intermediate");
  conf.set(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
    .SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, "true");

  mr = new MiniMRCluster(1, "file:///", 1, null, null, conf);
  inDir = new Path(TEST_ROOT_DIR, "test-input");
  String input = "The quick brown fox\n" + "has many silly\n"
      + "red fox sox\n";
  DataOutputStream file = fileSys.create(new Path(inDir, "part-" + 0));
  file.writeBytes(input);
  file.close();
  emptyInDir = new Path(TEST_ROOT_DIR, "empty-input");
  fileSys.mkdirs(emptyInDir);
}
 
Example 14
Source Project: hadoop   Source File: ProviderUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Convert a nested URI to decode the underlying path. The translation takes
 * the authority and parses it into the underlying scheme and authority.
 * For example, "myscheme://[email protected]/my/path" is converted to
 * "hdfs://nn/my/path".
 * @param nestedUri the URI from the nested URI
 * @return the unnested path
 */
public static Path unnestUri(URI nestedUri) {
  String[] parts = nestedUri.getAuthority().split("@", 2);
  StringBuilder result = new StringBuilder(parts[0]);
  result.append("://");
  if (parts.length == 2) {
    result.append(parts[1]);
  }
  result.append(nestedUri.getPath());
  if (nestedUri.getQuery() != null) {
    result.append("?");
    result.append(nestedUri.getQuery());
  }
  if (nestedUri.getFragment() != null) {
    result.append("#");
    result.append(nestedUri.getFragment());
  }
  return new Path(result.toString());
}
 
Example 15
Source Project: hadoop-ozone   Source File: TestOzoneFileSystem.java    License: Apache License 2.0 6 votes vote down vote up
private void testDeleteCreatesFakeParentDir() throws Exception {
  Path grandparent = new Path("/testDeleteCreatesFakeParentDir");
  Path parent = new Path(grandparent, "parent");
  Path child = new Path(parent, "child");
  ContractTestUtils.touch(fs, child);
  rootItemCount++; // grandparent

  // Verify that parent dir key does not exist
  // Creating a child should not add parent keys to the bucket
  try {
    getKey(parent, true);
  } catch (IOException ex) {
    assertKeyNotFoundException(ex);
  }

  // Delete the child key
  fs.delete(child, false);

  // Deleting the only child should create the parent dir key if it does
  // not exist
  String parentKey = o3fs.pathToKey(parent) + "/";
  OzoneKeyDetails parentKeyInfo = getKey(parent, true);
  assertEquals(parentKey, parentKeyInfo.getName());
}
 
Example 16
/** @throws Exception If failed. */
@Test
public void testSetPermissionIfOutputStreamIsNotClosed() throws Exception {
    Path fsHome = new Path(primaryFsUri);
    Path file = new Path(fsHome, "myFile");

    FsPermission perm = new FsPermission((short)123);

    FSDataOutputStream os = fs.create(file, EnumSet.noneOf(CreateFlag.class),
        Options.CreateOpts.perms(FsPermission.getDefault()));

    fs.setPermission(file, perm);

    os.close();

    assertEquals(perm, fs.getFileStatus(file).getPermission());
}
 
Example 17
Source Project: RDFS   Source File: NativeS3FileSystem.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public FSDataOutputStream create(Path f, FsPermission permission,
    boolean overwrite, int bufferSize, short replication, long blockSize,
    Progressable progress) throws IOException {

  if (exists(f) && !overwrite) {
    throw new IOException("File already exists:"+f);
  }
  Path absolutePath = makeAbsolute(f);
  String key = pathToKey(absolutePath);
  return new FSDataOutputStream(new NativeS3FsOutputStream(getConf(), store,
      key, progress, bufferSize), statistics);
}
 
Example 18
Source Project: RDFS   Source File: TestDataJoin.java    License: Apache License 2.0 5 votes vote down vote up
private static SequenceFile.Writer[] createWriters(Path testdir,
    JobConf conf, int srcs, Path[] src) throws IOException {
  for (int i = 0; i < srcs; ++i) {
    src[i] = new Path(testdir, Integer.toString(i + 10, 36));
  }
  SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
  for (int i = 0; i < srcs; ++i) {
    out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
        src[i], Text.class, Text.class);
  }
  return out;
}
 
Example 19
Source Project: kylin-on-parquet-v2   Source File: HDFSResourceStore.java    License: Apache License 2.0 5 votes vote down vote up
private Path getRealHDFSPath(String resourcePath) {
    if (resourcePath.equals("/"))
        return this.hdfsMetaPath;
    if (resourcePath.startsWith("/") && resourcePath.length() > 1)
        resourcePath = resourcePath.substring(1, resourcePath.length());
    return new Path(this.hdfsMetaPath, resourcePath);
}
 
Example 20
Source Project: systemds   Source File: IOUtilFunctions.java    License: Apache License 2.0 5 votes vote down vote up
public static Path[] getSequenceFilePaths( FileSystem fs, Path file ) 
	throws IOException
{
	Path[] ret = null;
	
	//Note on object stores: Since the object store file system implementations 
	//only emulate a file system, the directory of a multi-part file does not
	//exist physically and hence the isDirectory call returns false. Furthermore,
	//listStatus call returns all files with the given directory as prefix, which
	//includes the mtd file which needs to be ignored accordingly.
	
	if( fs.isDirectory(file) 
		|| IOUtilFunctions.isObjectStoreFileScheme(file) )
	{
		LinkedList<Path> tmp = new LinkedList<>();
		FileStatus[] dStatus = fs.listStatus(file);
		for( FileStatus fdStatus : dStatus )
			if( !fdStatus.getPath().getName().startsWith("_") //skip internal files
				&& !fdStatus.getPath().toString().equals(file.toString()+".mtd") ) //mtd file
				tmp.add(fdStatus.getPath());
		ret = tmp.toArray(new Path[0]);
	}
	else {
		ret = new Path[]{ file };
	}
	
	return ret;
}
 
Example 21
Source Project: hadoop-solr   Source File: IngestJobTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRegex() throws Exception {
  String regex1 = "regex" + File.separator + "regex-small.txt";
  File regexFile1 = new File(ClassLoader.getSystemClassLoader().getResource(regex1).getPath());
  assertTrue(regex1 + " does not exist: " + regexFile1.getAbsolutePath(), regexFile1.exists());
  Path input1 = new Path(tempDir, regex1);
  addContentToFS(input1, Files.toByteArray(regexFile1));

  String regex2 = "regex" + File.separator + "regex-small-2.txt";
  File regexFile2 = new File(ClassLoader.getSystemClassLoader().getResource(regex2).getPath());
  assertTrue(regex2 + " does not exist: " + regexFile2.getAbsolutePath(), regexFile2.exists());
  Path input2 = new Path(tempDir, regex2);
  addContentToFS(input2, Files.toByteArray(regexFile2));

  String jobName = "testRegex";

  String[] args = new JobArgs().withJobName(jobName).withClassname(RegexIngestMapper.class.getName())
      .withCollection(DEFAULT_COLLECTION).withZkString(getBaseUrl())
      .withInput(tempDir.toUri().toString() + File.separator + "regex" + File.separator +
          "regex-small*")
      .withDArgs("-D" + RegexIngestMapper.REGEX + "=\\w+", "-D" + RegexIngestMapper
          .GROUPS_TO_FIELDS + "=0=match")
      .getJobArgs();

  int val = ToolRunner.run(conf, new IngestJob(), args);
  assertEquals(0, val);
  MockRecordWriter mockRecordWriter = IngestJobMockMapRedOutFormat.writers.get(jobName);
  Assert.assertNotNull(mockRecordWriter);
  assertEquals(2, mockRecordWriter.map.size());
}
 
Example 22
Source Project: RDFS   Source File: TestEmptyJob.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void commitJob(JobContext context) throws IOException {
  Configuration conf = context.getConfiguration();
  Path share = new Path(conf.get("share"));
  FileSystem fs = FileSystem.get(conf);

  
  while (true) {
    if (fs.exists(share)) {
      break;
    }
    UtilsForTests.waitFor(100);
  }
  super.commitJob(context);
}
 
Example 23
Source Project: tunnel   Source File: HdfsClient.java    License: Apache License 2.0 5 votes vote down vote up
public void append(HdfsConfig config, HdfsRule rule, Event event) {
    try {
        Configuration hadoopConfig = new Configuration();
        FileSystem fileSystem = FileSystem.get(URI.create(this.address), hadoopConfig);
        Path hdfsPath = new Path(fileName);
        FSDataOutputStream fileOutputStream = null;
        try {
            if (fileSystem.exists(hdfsPath)) {
                fileOutputStream = fileSystem.append(hdfsPath);
            } else {
                fileOutputStream = fileSystem.create(hdfsPath);
            }
            fileOutputStream.writeUTF(JSON.toJSONString(event));

        } finally {
            if (fileSystem != null) {
                fileSystem.close();
            }
            if (fileOutputStream != null) {
                fileOutputStream.close();
            }
        }
    } catch (IOException e) {

    }

}
 
Example 24
Source Project: hudi   Source File: HoodieRealtimeRecordReaderUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Reads the schema from the base file.
 */
public static Schema readSchema(Configuration conf, Path filePath) {
  try {
    HoodieFileReader storageReader = HoodieFileReaderFactory.getFileReader(conf, filePath);
    return storageReader.getSchema();
  } catch (IOException e) {
    throw new HoodieIOException("Failed to read schema from " + filePath, e);
  }
}
 
Example 25
private void assertSingleShardedWritingWorks(GATKPath inputBam, String referenceFile, String outputPath, String outputPartsPath, boolean writeBai, boolean writeSbi, long sbiGranularity) throws IOException {
    JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();

    final GATKPath referencePath = referenceFile == null ? null : new GATKPath(referenceFile);

    ReadsSparkSource readSource = new ReadsSparkSource(ctx);
    JavaRDD<GATKRead> rddParallelReads = readSource.getParallelReads(inputBam, referencePath);
    SAMFileHeader header = readSource.getHeader(inputBam, referencePath);

    ReadsSparkSink.writeReads(ctx, outputPath, referencePath, rddParallelReads, header, ReadsWriteFormat.SINGLE, 0, outputPartsPath, writeBai, writeSbi, true, sbiGranularity);

    // check that a bai file is created
    if (new GATKPath(outputPath).isBam() && writeBai) {
        Assert.assertTrue(Files.exists(IOUtils.getPath(outputPath + FileExtensions.BAI_INDEX)));
    }
    // check that a splitting bai file is created with correct granularity
    if (new GATKPath(outputPath).isBam() && writeSbi) {
        final java.nio.file.Path sbiPath = IOUtils.getPath(outputPath + FileExtensions.SBI);
        Assert.assertTrue(Files.exists(sbiPath));
        final SBIIndex sbi = SBIIndex.load(sbiPath);
        Assert.assertEquals(sbi.getGranularity(), sbiGranularity);
    }

    JavaRDD<GATKRead> rddParallelReads2 = readSource.getParallelReads(new GATKPath(outputPath), referencePath);
    final List<GATKRead> writtenReads = rddParallelReads2.collect();

    assertReadsAreSorted(header, writtenReads);
    Assert.assertEquals(rddParallelReads.count(), rddParallelReads2.count());
}
 
Example 26
Source Project: incubator-retired-blur   Source File: BlurUtilsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testValidateShardCountExtraDir() throws IOException {
  File file = new File(TMPDIR, "ValidateShardCount-test");
  rm(file);
  Path path = new Path(file.toURI());
  Configuration conf = new Configuration();
  FileSystem fileSystem = path.getFileSystem(conf);
  fileSystem.mkdirs(path);
  int shardCount = 10;
  createShardDirs(shardCount, fileSystem, path);
  fileSystem.mkdirs(new Path(path, "logs"));
  BlurUtil.validateShardCount(shardCount, fileSystem, path);
}
 
Example 27
Source Project: big-c   Source File: TestJobHistoryEventHandler.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected EventWriter createEventWriter(Path historyFilePath)
    throws IOException {
  if (mockHistoryProcessing) {
    this.eventWriter = mock(EventWriter.class);
  }
  else {
    this.eventWriter = super.createEventWriter(historyFilePath);
  }
  return this.eventWriter;
}
 
Example 28
Source Project: hadoop   Source File: KeyValueTextInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected boolean isSplitable(JobContext context, Path file) {
  final CompressionCodec codec =
    new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
  if (null == codec) {
    return true;
  }
  return codec instanceof SplittableCompressionCodec;
}
 
Example 29
Source Project: flink   Source File: WordCount.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: WordCount <input path> <result path>");
		return;
	}

	final String inputPath = args[0];
	final String outputPath = args[1];

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// Set up the Hadoop Input Format
	Job job = Job.getInstance();
	HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
	TextInputFormat.addInputPath(job, new Path(inputPath));

	// Create a Flink job with it
	DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

	// Tokenize the line and convert from Writable "Text" to String for better handling
	DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());

	// Sum up the words
	DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

	// Convert String back to Writable "Text" for use with Hadoop Output Format
	DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());

	// Set up Hadoop Output Format
	HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
	hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
	hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
	TextOutputFormat.setOutputPath(job, new Path(outputPath));

	// Output & Execute
	hadoopResult.output(hadoopOutputFormat);
	env.execute("Word Count");
}
 
Example 30
@Test
public void checkDeleteWithRefresh() {
  List<String> allSegmentsInCluster = new ArrayList<>();
  allSegmentsInCluster.add("mytable_0");
  allSegmentsInCluster.add("mytable_1");
  allSegmentsInCluster.add("mytable_2");

  List<Path> currentSegments = new ArrayList<>();
  currentSegments.add(new Path("mytable_0"));
  SegmentTarPushJob segmentTarPushJob = new SegmentTarPushJob(_defaultProperties);
  List<String> segmentsToDelete = segmentTarPushJob.getSegmentsToDelete(allSegmentsInCluster, currentSegments);
  Assert.assertEquals(segmentsToDelete.size(), 2);
  Assert.assertFalse(segmentsToDelete.contains("mytable_0"));
}