org.apache.hadoop.fs.Path Java Examples

The following examples show how to use org.apache.hadoop.fs.Path. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConsistentListingAspect.java    From s3mper with Apache License 2.0 6 votes vote down vote up
/**
 * Check the the metastore listing against the s3 listing and return any paths 
 * missing from s3.
 * 
 * @param metastoreListing
 * @param s3Listing
 * @return 
 */
private List<Path> checkListing(List<FileInfo> metastoreListing, FileStatus [] s3Listing) {
    Map<String, FileStatus> s3paths = new HashMap<String, FileStatus>();
        
    if(s3Listing != null) {
        for (FileStatus fileStatus : s3Listing) {
            s3paths.put(fileStatus.getPath().toUri().normalize().getSchemeSpecificPart(), fileStatus);
        }
    }

    List<Path> missingPaths = new ArrayList<Path>();

    for (FileInfo f : metastoreListing) {
        if(f.isDeleted()) {
            continue;
        }
        
        if (!s3paths.containsKey(f.getPath().toUri().normalize().getSchemeSpecificPart())) {
            missingPaths.add(f.getPath());
        }
    }
    
    return missingPaths;
}
 
Example #2
Source File: TestHadoopDirTreeGenerator.java    From hadoop-ozone with Apache License 2.0 6 votes vote down vote up
private int traverseToLeaf(FileSystem fs, Path dirPath, int depth,
                           int expectedDepth, int expectedSpanCnt,
                           int expectedFileCnt, int perFileSizeInBytes)
        throws IOException {
  FileStatus[] fileStatuses = fs.listStatus(dirPath);
  // check the num of peer directories except root and leaf as both
  // has less dirs.
  if (depth < expectedDepth - 1) {
    verifyActualSpan(expectedSpanCnt, fileStatuses);
  }
  int actualNumFiles = 0;
  for (FileStatus fileStatus : fileStatuses) {
    if (fileStatus.isDirectory()) {
      ++depth;
      return traverseToLeaf(fs, fileStatus.getPath(), depth, expectedDepth,
              expectedSpanCnt, expectedFileCnt, perFileSizeInBytes);
    } else {
      Assert.assertEquals("Mismatches file len",
              perFileSizeInBytes, fileStatus.getLen());
      actualNumFiles++;
    }
  }
  Assert.assertEquals("Mismatches files count in a directory",
          expectedFileCnt, actualNumFiles);
  return depth;
}
 
Example #3
Source File: UtilsForTests.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Configure a waiting job
 */
static void configureWaitingJobConf(JobConf jobConf, Path inDir,
                                    Path outputPath, int numMaps, int numRed,
                                    String jobName, String mapSignalFilename,
                                    String redSignalFilename)
throws IOException {
  jobConf.setJobName(jobName);
  jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  FileInputFormat.setInputPaths(jobConf, inDir);
  FileOutputFormat.setOutputPath(jobConf, outputPath);
  jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class);
  jobConf.setReducerClass(IdentityReducer.class);
  jobConf.setOutputKeyClass(BytesWritable.class);
  jobConf.setOutputValueClass(BytesWritable.class);
  jobConf.setInputFormat(RandomInputFormat.class);
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numRed);
  jobConf.setJar("build/test/mapred/testjar/testjob.jar");
  jobConf.set(getTaskSignalParameter(true), mapSignalFilename);
  jobConf.set(getTaskSignalParameter(false), redSignalFilename);
}
 
Example #4
Source File: SimpleFileIODatasetRuntimeTest.java    From components with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetSampleWithSpecialPath() throws Exception {
    RecordSet rs = getSimpleTestData(0);
    writeRandomCsvFile(mini.getFs(), "/user/test/Marketing Customer Contacts US.CSV", rs, "UTF-8");
    String fileSpec = mini.getFs().getUri().resolve(new Path("/user/test/Marketing Customer Contacts US.CSV").toUri()).toString();
    //the method above will escape it, so make it back here as the customer set the path, should not escape one
    fileSpec = fileSpec.replace("%20", " ");
    
    // Configure the component.
    SimpleFileIODatasetProperties props = createDatasetProperties();
    props.format.setValue(SimpleFileIOFormat.CSV);
    props.path.setValue(fileSpec);

    final List<IndexedRecord> actual = getSample(props,Integer.MAX_VALUE);

    assertThat(actual, hasSize(10));
}
 
Example #5
Source File: HiveTargetPathHelper.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
public HiveTargetPathHelper(HiveDataset dataset) {

    this.dataset = dataset;
    this.relocateDataFiles = Boolean
        .valueOf(this.dataset.getProperties().getProperty(RELOCATE_DATA_FILES_KEY, DEFAULT_RELOCATE_DATA_FILES));
    this.targetTableRoot = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_ROOT)
        ? Optional.of(resolvePath(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_ROOT),
        this.dataset.getTable().getDbName(), this.dataset.getTable().getTableName()))
        : Optional.<Path> absent();

    this.targetTablePrefixTobeReplaced =
        this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED)
            ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED)))
            : Optional.<Path> absent();

    this.targetTablePrefixReplacement = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_REPLACEMENT)
        ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_REPLACEMENT)))
        : Optional.<Path> absent();
  }
 
Example #6
Source File: TestRubixCaching.java    From presto with Apache License 2.0 6 votes vote down vote up
private FileSystem getCachingFileSystem(HdfsContext context, Path path)
        throws IOException
{
    HdfsConfigurationInitializer configurationInitializer = new HdfsConfigurationInitializer(config, ImmutableSet.of());
    HiveHdfsConfiguration configuration = new HiveHdfsConfiguration(
            configurationInitializer,
            ImmutableSet.of(
                    rubixConfigInitializer,
                    (dynamicConfig, ignoredContext, ignoredUri) -> {
                        dynamicConfig.set("fs.file.impl", CachingLocalFileSystem.class.getName());
                        dynamicConfig.setBoolean("fs.gs.lazy.init.enable", true);
                        dynamicConfig.set("fs.azure.account.key", "Zm9vCg==");
                        dynamicConfig.set("fs.adl.oauth2.client.id", "test");
                        dynamicConfig.set("fs.adl.oauth2.refresh.url", "http://localhost");
                        dynamicConfig.set("fs.adl.oauth2.credential", "password");
                    }));
    HdfsEnvironment environment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication());
    return environment.getFileSystem(context, path);
}
 
Example #7
Source File: TestStoreFileInfo.java    From hbase with Apache License 2.0 6 votes vote down vote up
@Test
public void testOpenErrorMessageHFileLink() throws IOException, IllegalStateException {
  // Test file link exception
  // Try to open nonsense hfilelink. Make sure exception is from HFileLink.
  Path p = new Path("/hbase/test/0123/cf/testtb=4567-abcd");
  try (FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration())) {
    StoreFileInfo sfi = new StoreFileInfo(TEST_UTIL.getConfiguration(), fs, p, true);
    try {
      ReaderContext context = sfi.createReaderContext(false, 1000, ReaderType.PREAD);
      sfi.createReader(context, null);
      throw new IllegalStateException();
    } catch (FileNotFoundException fnfe) {
      assertTrue(fnfe.getMessage().contains(HFileLink.class.getSimpleName()));
    }
  }
}
 
Example #8
Source File: BenchmarkThroughput.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void writeAndReadFile(FileSystem fs,
                                     String name,
                                     Configuration conf,
                                     long size
                                     ) throws IOException {
  Path f = null;
  try {
    f = writeFile(fs, name, conf, size);
    readFile(fs, f, name, conf);
  } finally {
    try {
      if (f != null) {
        fs.delete(f, true);
      }
    } catch (IOException ie) {
      // IGNORE
    }
  }
}
 
Example #9
Source File: ParquetRecordWriter.java    From Bats with Apache License 2.0 6 votes vote down vote up
@Override
public void abort() throws IOException {
  List<String> errors = Lists.newArrayList();
  for (Path location : cleanUpLocations) {
    try {
      if (fs.exists(location)) {
        fs.delete(location, true);
        logger.info("Aborting writer. Location [{}] on file system [{}] is deleted.",
            location.toUri().getPath(), fs.getUri());
      }
    } catch (IOException e) {
      errors.add(location.toUri().getPath());
      logger.error("Failed to delete location [{}] on file system [{}].",
          location, fs.getUri(), e);
    }
  }
  if (!errors.isEmpty()) {
    throw new IOException(String.format("Failed to delete the following locations %s on file system [%s]" +
        " during aborting writer", errors, fs.getUri()));
  }
}
 
Example #10
Source File: TestHftpFileSystem.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void readHftpFile(
  boolean strictContentLength, boolean sendContentLength
)
  throws IOException, URISyntaxException {
  int bufSize = 128 * 1024;
  byte[] buf = DFSTestUtil.generateSequentialBytes(0, bufSize);
  final ByteArrayInputStream inputStream = new ByteArrayInputStream(buf);
  final long contentLength = bufSize + 1;
  Configuration conf = new Configuration();

  conf.setBoolean(HftpFileSystem.STRICT_CONTENT_LENGTH, strictContentLength);

  HftpFileSystem fileSystem =
    new MockHftpFileSystem(
      sendContentLength ? contentLength : null, inputStream, conf
    );
  FSDataInputStream dataInputStream = fileSystem.open(new Path("dont-care"));
  byte[] readBuf = new byte[1024];

  while (dataInputStream.read(readBuf) > -1) {
    //nothing
  }

  dataInputStream.close();
}
 
Example #11
Source File: TestHadoopArchives.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testGlobFiles() throws Exception {
  final Path sub1 = new Path(inputPath, "dir1");
  final Path sub2 = new Path(inputPath, "dir2");
  fs.mkdirs(sub1);
  String fileName = "a";
  createFile(inputPath, fs, sub1.getName(), fileName);
  createFile(inputPath, fs, sub2.getName(), fileName);
  createFile(inputPath, fs, sub1.getName(), "b"); // not part of result

  final String glob =  "dir{1,2}/a";
  final FsShell shell = new FsShell(conf);
  final List<String> originalPaths = lsr(shell, inputPath.toString(),
      inputPath + "/" + glob);
  System.out.println("originalPaths: " + originalPaths);

  // make the archive:
  final String fullHarPathStr = makeArchive(inputPath, glob);

  // compare results:
  final List<String> harPaths = lsr(shell, fullHarPathStr,
      fullHarPathStr + "/" + glob);
  Assert.assertEquals(originalPaths, harPaths);
}
 
Example #12
Source File: TestHadoopArchives.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testSingleFile() throws Exception {
  final Path sub1 = new Path(inputPath, "dir1");
  fs.mkdirs(sub1);
  String singleFileName = "a";
  createFile(inputPath, fs, sub1.getName(), singleFileName);
  final FsShell shell = new FsShell(conf);

  final List<String> originalPaths = lsr(shell, sub1.toString());
  System.out.println("originalPaths: " + originalPaths);

  // make the archive:
  final String fullHarPathStr = makeArchive(sub1, singleFileName);

  // compare results:
  final List<String> harPaths = lsr(shell, fullHarPathStr);
  Assert.assertEquals(originalPaths, harPaths);
}
 
Example #13
Source File: TestJobCleanup.java    From big-c with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void setUp() throws IOException {
  JobConf conf = new JobConf();
  fileSys = FileSystem.get(conf);
  fileSys.delete(new Path(TEST_ROOT_DIR), true);
  conf.set("mapred.job.tracker.handler.count", "1");
  conf.set("mapred.job.tracker", "127.0.0.1:0");
  conf.set("mapred.job.tracker.http.address", "127.0.0.1:0");
  conf.set("mapred.task.tracker.http.address", "127.0.0.1:0");
  conf.set(JHAdminConfig.MR_HISTORY_INTERMEDIATE_DONE_DIR, TEST_ROOT_DIR +
    "/intermediate");
  conf.set(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
    .SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, "true");

  mr = new MiniMRCluster(1, "file:///", 1, null, null, conf);
  inDir = new Path(TEST_ROOT_DIR, "test-input");
  String input = "The quick brown fox\n" + "has many silly\n"
      + "red fox sox\n";
  DataOutputStream file = fileSys.create(new Path(inDir, "part-" + 0));
  file.writeBytes(input);
  file.close();
  emptyInDir = new Path(TEST_ROOT_DIR, "empty-input");
  fileSys.mkdirs(emptyInDir);
}
 
Example #14
Source File: ProviderUtils.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Convert a nested URI to decode the underlying path. The translation takes
 * the authority and parses it into the underlying scheme and authority.
 * For example, "myscheme://[email protected]/my/path" is converted to
 * "hdfs://nn/my/path".
 * @param nestedUri the URI from the nested URI
 * @return the unnested path
 */
public static Path unnestUri(URI nestedUri) {
  String[] parts = nestedUri.getAuthority().split("@", 2);
  StringBuilder result = new StringBuilder(parts[0]);
  result.append("://");
  if (parts.length == 2) {
    result.append(parts[1]);
  }
  result.append(nestedUri.getPath());
  if (nestedUri.getQuery() != null) {
    result.append("?");
    result.append(nestedUri.getQuery());
  }
  if (nestedUri.getFragment() != null) {
    result.append("#");
    result.append(nestedUri.getFragment());
  }
  return new Path(result.toString());
}
 
Example #15
Source File: TestOzoneFileSystem.java    From hadoop-ozone with Apache License 2.0 6 votes vote down vote up
private void testDeleteCreatesFakeParentDir() throws Exception {
  Path grandparent = new Path("/testDeleteCreatesFakeParentDir");
  Path parent = new Path(grandparent, "parent");
  Path child = new Path(parent, "child");
  ContractTestUtils.touch(fs, child);
  rootItemCount++; // grandparent

  // Verify that parent dir key does not exist
  // Creating a child should not add parent keys to the bucket
  try {
    getKey(parent, true);
  } catch (IOException ex) {
    assertKeyNotFoundException(ex);
  }

  // Delete the child key
  fs.delete(child, false);

  // Deleting the only child should create the parent dir key if it does
  // not exist
  String parentKey = o3fs.pathToKey(parent) + "/";
  OzoneKeyDetails parentKeyInfo = getKey(parent, true);
  assertEquals(parentKey, parentKeyInfo.getName());
}
 
Example #16
Source File: HadoopIgfs20FileSystemAbstractSelfTest.java    From ignite with Apache License 2.0 6 votes vote down vote up
/** @throws Exception If failed. */
@Test
public void testSetPermissionIfOutputStreamIsNotClosed() throws Exception {
    Path fsHome = new Path(primaryFsUri);
    Path file = new Path(fsHome, "myFile");

    FsPermission perm = new FsPermission((short)123);

    FSDataOutputStream os = fs.create(file, EnumSet.noneOf(CreateFlag.class),
        Options.CreateOpts.perms(FsPermission.getDefault()));

    fs.setPermission(file, perm);

    os.close();

    assertEquals(perm, fs.getFileStatus(file).getPermission());
}
 
Example #17
Source File: NativeS3FileSystem.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@Override
public FSDataOutputStream create(Path f, FsPermission permission,
    boolean overwrite, int bufferSize, short replication, long blockSize,
    Progressable progress) throws IOException {

  if (exists(f) && !overwrite) {
    throw new IOException("File already exists:"+f);
  }
  Path absolutePath = makeAbsolute(f);
  String key = pathToKey(absolutePath);
  return new FSDataOutputStream(new NativeS3FsOutputStream(getConf(), store,
      key, progress, bufferSize), statistics);
}
 
Example #18
Source File: TestDataJoin.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private static SequenceFile.Writer[] createWriters(Path testdir,
    JobConf conf, int srcs, Path[] src) throws IOException {
  for (int i = 0; i < srcs; ++i) {
    src[i] = new Path(testdir, Integer.toString(i + 10, 36));
  }
  SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
  for (int i = 0; i < srcs; ++i) {
    out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
        src[i], Text.class, Text.class);
  }
  return out;
}
 
Example #19
Source File: HDFSResourceStore.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private Path getRealHDFSPath(String resourcePath) {
    if (resourcePath.equals("/"))
        return this.hdfsMetaPath;
    if (resourcePath.startsWith("/") && resourcePath.length() > 1)
        resourcePath = resourcePath.substring(1, resourcePath.length());
    return new Path(this.hdfsMetaPath, resourcePath);
}
 
Example #20
Source File: IOUtilFunctions.java    From systemds with Apache License 2.0 5 votes vote down vote up
public static Path[] getSequenceFilePaths( FileSystem fs, Path file ) 
	throws IOException
{
	Path[] ret = null;
	
	//Note on object stores: Since the object store file system implementations 
	//only emulate a file system, the directory of a multi-part file does not
	//exist physically and hence the isDirectory call returns false. Furthermore,
	//listStatus call returns all files with the given directory as prefix, which
	//includes the mtd file which needs to be ignored accordingly.
	
	if( fs.isDirectory(file) 
		|| IOUtilFunctions.isObjectStoreFileScheme(file) )
	{
		LinkedList<Path> tmp = new LinkedList<>();
		FileStatus[] dStatus = fs.listStatus(file);
		for( FileStatus fdStatus : dStatus )
			if( !fdStatus.getPath().getName().startsWith("_") //skip internal files
				&& !fdStatus.getPath().toString().equals(file.toString()+".mtd") ) //mtd file
				tmp.add(fdStatus.getPath());
		ret = tmp.toArray(new Path[0]);
	}
	else {
		ret = new Path[]{ file };
	}
	
	return ret;
}
 
Example #21
Source File: IngestJobTest.java    From hadoop-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegex() throws Exception {
  String regex1 = "regex" + File.separator + "regex-small.txt";
  File regexFile1 = new File(ClassLoader.getSystemClassLoader().getResource(regex1).getPath());
  assertTrue(regex1 + " does not exist: " + regexFile1.getAbsolutePath(), regexFile1.exists());
  Path input1 = new Path(tempDir, regex1);
  addContentToFS(input1, Files.toByteArray(regexFile1));

  String regex2 = "regex" + File.separator + "regex-small-2.txt";
  File regexFile2 = new File(ClassLoader.getSystemClassLoader().getResource(regex2).getPath());
  assertTrue(regex2 + " does not exist: " + regexFile2.getAbsolutePath(), regexFile2.exists());
  Path input2 = new Path(tempDir, regex2);
  addContentToFS(input2, Files.toByteArray(regexFile2));

  String jobName = "testRegex";

  String[] args = new JobArgs().withJobName(jobName).withClassname(RegexIngestMapper.class.getName())
      .withCollection(DEFAULT_COLLECTION).withZkString(getBaseUrl())
      .withInput(tempDir.toUri().toString() + File.separator + "regex" + File.separator +
          "regex-small*")
      .withDArgs("-D" + RegexIngestMapper.REGEX + "=\\w+", "-D" + RegexIngestMapper
          .GROUPS_TO_FIELDS + "=0=match")
      .getJobArgs();

  int val = ToolRunner.run(conf, new IngestJob(), args);
  assertEquals(0, val);
  MockRecordWriter mockRecordWriter = IngestJobMockMapRedOutFormat.writers.get(jobName);
  Assert.assertNotNull(mockRecordWriter);
  assertEquals(2, mockRecordWriter.map.size());
}
 
Example #22
Source File: TestEmptyJob.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@Override
public void commitJob(JobContext context) throws IOException {
  Configuration conf = context.getConfiguration();
  Path share = new Path(conf.get("share"));
  FileSystem fs = FileSystem.get(conf);

  
  while (true) {
    if (fs.exists(share)) {
      break;
    }
    UtilsForTests.waitFor(100);
  }
  super.commitJob(context);
}
 
Example #23
Source File: HdfsClient.java    From tunnel with Apache License 2.0 5 votes vote down vote up
public void append(HdfsConfig config, HdfsRule rule, Event event) {
    try {
        Configuration hadoopConfig = new Configuration();
        FileSystem fileSystem = FileSystem.get(URI.create(this.address), hadoopConfig);
        Path hdfsPath = new Path(fileName);
        FSDataOutputStream fileOutputStream = null;
        try {
            if (fileSystem.exists(hdfsPath)) {
                fileOutputStream = fileSystem.append(hdfsPath);
            } else {
                fileOutputStream = fileSystem.create(hdfsPath);
            }
            fileOutputStream.writeUTF(JSON.toJSONString(event));

        } finally {
            if (fileSystem != null) {
                fileSystem.close();
            }
            if (fileOutputStream != null) {
                fileOutputStream.close();
            }
        }
    } catch (IOException e) {

    }

}
 
Example #24
Source File: HoodieRealtimeRecordReaderUtils.java    From hudi with Apache License 2.0 5 votes vote down vote up
/**
 * Reads the schema from the base file.
 */
public static Schema readSchema(Configuration conf, Path filePath) {
  try {
    HoodieFileReader storageReader = HoodieFileReaderFactory.getFileReader(conf, filePath);
    return storageReader.getSchema();
  } catch (IOException e) {
    throw new HoodieIOException("Failed to read schema from " + filePath, e);
  }
}
 
Example #25
Source File: ReadsSparkSinkUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void assertSingleShardedWritingWorks(GATKPath inputBam, String referenceFile, String outputPath, String outputPartsPath, boolean writeBai, boolean writeSbi, long sbiGranularity) throws IOException {
    JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();

    final GATKPath referencePath = referenceFile == null ? null : new GATKPath(referenceFile);

    ReadsSparkSource readSource = new ReadsSparkSource(ctx);
    JavaRDD<GATKRead> rddParallelReads = readSource.getParallelReads(inputBam, referencePath);
    SAMFileHeader header = readSource.getHeader(inputBam, referencePath);

    ReadsSparkSink.writeReads(ctx, outputPath, referencePath, rddParallelReads, header, ReadsWriteFormat.SINGLE, 0, outputPartsPath, writeBai, writeSbi, true, sbiGranularity);

    // check that a bai file is created
    if (new GATKPath(outputPath).isBam() && writeBai) {
        Assert.assertTrue(Files.exists(IOUtils.getPath(outputPath + FileExtensions.BAI_INDEX)));
    }
    // check that a splitting bai file is created with correct granularity
    if (new GATKPath(outputPath).isBam() && writeSbi) {
        final java.nio.file.Path sbiPath = IOUtils.getPath(outputPath + FileExtensions.SBI);
        Assert.assertTrue(Files.exists(sbiPath));
        final SBIIndex sbi = SBIIndex.load(sbiPath);
        Assert.assertEquals(sbi.getGranularity(), sbiGranularity);
    }

    JavaRDD<GATKRead> rddParallelReads2 = readSource.getParallelReads(new GATKPath(outputPath), referencePath);
    final List<GATKRead> writtenReads = rddParallelReads2.collect();

    assertReadsAreSorted(header, writtenReads);
    Assert.assertEquals(rddParallelReads.count(), rddParallelReads2.count());
}
 
Example #26
Source File: BlurUtilsTest.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Test
public void testValidateShardCountExtraDir() throws IOException {
  File file = new File(TMPDIR, "ValidateShardCount-test");
  rm(file);
  Path path = new Path(file.toURI());
  Configuration conf = new Configuration();
  FileSystem fileSystem = path.getFileSystem(conf);
  fileSystem.mkdirs(path);
  int shardCount = 10;
  createShardDirs(shardCount, fileSystem, path);
  fileSystem.mkdirs(new Path(path, "logs"));
  BlurUtil.validateShardCount(shardCount, fileSystem, path);
}
 
Example #27
Source File: TestJobHistoryEventHandler.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
protected EventWriter createEventWriter(Path historyFilePath)
    throws IOException {
  if (mockHistoryProcessing) {
    this.eventWriter = mock(EventWriter.class);
  }
  else {
    this.eventWriter = super.createEventWriter(historyFilePath);
  }
  return this.eventWriter;
}
 
Example #28
Source File: KeyValueTextInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
protected boolean isSplitable(JobContext context, Path file) {
  final CompressionCodec codec =
    new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
  if (null == codec) {
    return true;
  }
  return codec instanceof SplittableCompressionCodec;
}
 
Example #29
Source File: WordCount.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: WordCount <input path> <result path>");
		return;
	}

	final String inputPath = args[0];
	final String outputPath = args[1];

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// Set up the Hadoop Input Format
	Job job = Job.getInstance();
	HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
	TextInputFormat.addInputPath(job, new Path(inputPath));

	// Create a Flink job with it
	DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

	// Tokenize the line and convert from Writable "Text" to String for better handling
	DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());

	// Sum up the words
	DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

	// Convert String back to Writable "Text" for use with Hadoop Output Format
	DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());

	// Set up Hadoop Output Format
	HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
	hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
	hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
	TextOutputFormat.setOutputPath(job, new Path(outputPath));

	// Output & Execute
	hadoopResult.output(hadoopOutputFormat);
	env.execute("Word Count");
}
 
Example #30
Source File: DeleteExtraPushedSegmentsTest.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
@Test
public void checkDeleteWithRefresh() {
  List<String> allSegmentsInCluster = new ArrayList<>();
  allSegmentsInCluster.add("mytable_0");
  allSegmentsInCluster.add("mytable_1");
  allSegmentsInCluster.add("mytable_2");

  List<Path> currentSegments = new ArrayList<>();
  currentSegments.add(new Path("mytable_0"));
  SegmentTarPushJob segmentTarPushJob = new SegmentTarPushJob(_defaultProperties);
  List<String> segmentsToDelete = segmentTarPushJob.getSegmentsToDelete(allSegmentsInCluster, currentSegments);
  Assert.assertEquals(segmentsToDelete.size(), 2);
  Assert.assertFalse(segmentsToDelete.contains("mytable_0"));
}