Java Code Examples for org.apache.hadoop.fs.Path
The following examples show how to use
org.apache.hadoop.fs.Path.
These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: s3mper Author: Netflix File: ConsistentListingAspect.java License: Apache License 2.0 | 6 votes |
/** * Check the the metastore listing against the s3 listing and return any paths * missing from s3. * * @param metastoreListing * @param s3Listing * @return */ private List<Path> checkListing(List<FileInfo> metastoreListing, FileStatus [] s3Listing) { Map<String, FileStatus> s3paths = new HashMap<String, FileStatus>(); if(s3Listing != null) { for (FileStatus fileStatus : s3Listing) { s3paths.put(fileStatus.getPath().toUri().normalize().getSchemeSpecificPart(), fileStatus); } } List<Path> missingPaths = new ArrayList<Path>(); for (FileInfo f : metastoreListing) { if(f.isDeleted()) { continue; } if (!s3paths.containsKey(f.getPath().toUri().normalize().getSchemeSpecificPart())) { missingPaths.add(f.getPath()); } } return missingPaths; }
Example #2
Source Project: hadoop-ozone Author: apache File: TestHadoopDirTreeGenerator.java License: Apache License 2.0 | 6 votes |
private int traverseToLeaf(FileSystem fs, Path dirPath, int depth, int expectedDepth, int expectedSpanCnt, int expectedFileCnt, int perFileSizeInBytes) throws IOException { FileStatus[] fileStatuses = fs.listStatus(dirPath); // check the num of peer directories except root and leaf as both // has less dirs. if (depth < expectedDepth - 1) { verifyActualSpan(expectedSpanCnt, fileStatuses); } int actualNumFiles = 0; for (FileStatus fileStatus : fileStatuses) { if (fileStatus.isDirectory()) { ++depth; return traverseToLeaf(fs, fileStatus.getPath(), depth, expectedDepth, expectedSpanCnt, expectedFileCnt, perFileSizeInBytes); } else { Assert.assertEquals("Mismatches file len", perFileSizeInBytes, fileStatus.getLen()); actualNumFiles++; } } Assert.assertEquals("Mismatches files count in a directory", expectedFileCnt, actualNumFiles); return depth; }
Example #3
Source Project: hadoop Author: naver File: UtilsForTests.java License: Apache License 2.0 | 6 votes |
/** * Configure a waiting job */ static void configureWaitingJobConf(JobConf jobConf, Path inDir, Path outputPath, int numMaps, int numRed, String jobName, String mapSignalFilename, String redSignalFilename) throws IOException { jobConf.setJobName(jobName); jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outputPath); jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class); jobConf.setReducerClass(IdentityReducer.class); jobConf.setOutputKeyClass(BytesWritable.class); jobConf.setOutputValueClass(BytesWritable.class); jobConf.setInputFormat(RandomInputFormat.class); jobConf.setNumMapTasks(numMaps); jobConf.setNumReduceTasks(numRed); jobConf.setJar("build/test/mapred/testjar/testjob.jar"); jobConf.set(getTaskSignalParameter(true), mapSignalFilename); jobConf.set(getTaskSignalParameter(false), redSignalFilename); }
Example #4
Source Project: components Author: Talend File: SimpleFileIODatasetRuntimeTest.java License: Apache License 2.0 | 6 votes |
@Test public void testGetSampleWithSpecialPath() throws Exception { RecordSet rs = getSimpleTestData(0); writeRandomCsvFile(mini.getFs(), "/user/test/Marketing Customer Contacts US.CSV", rs, "UTF-8"); String fileSpec = mini.getFs().getUri().resolve(new Path("/user/test/Marketing Customer Contacts US.CSV").toUri()).toString(); //the method above will escape it, so make it back here as the customer set the path, should not escape one fileSpec = fileSpec.replace("%20", " "); // Configure the component. SimpleFileIODatasetProperties props = createDatasetProperties(); props.format.setValue(SimpleFileIOFormat.CSV); props.path.setValue(fileSpec); final List<IndexedRecord> actual = getSample(props,Integer.MAX_VALUE); assertThat(actual, hasSize(10)); }
Example #5
Source Project: incubator-gobblin Author: apache File: HiveTargetPathHelper.java License: Apache License 2.0 | 6 votes |
public HiveTargetPathHelper(HiveDataset dataset) { this.dataset = dataset; this.relocateDataFiles = Boolean .valueOf(this.dataset.getProperties().getProperty(RELOCATE_DATA_FILES_KEY, DEFAULT_RELOCATE_DATA_FILES)); this.targetTableRoot = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_ROOT) ? Optional.of(resolvePath(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_ROOT), this.dataset.getTable().getDbName(), this.dataset.getTable().getTableName())) : Optional.<Path> absent(); this.targetTablePrefixTobeReplaced = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED) ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED))) : Optional.<Path> absent(); this.targetTablePrefixReplacement = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_REPLACEMENT) ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_REPLACEMENT))) : Optional.<Path> absent(); }
Example #6
Source Project: presto Author: prestosql File: TestRubixCaching.java License: Apache License 2.0 | 6 votes |
private FileSystem getCachingFileSystem(HdfsContext context, Path path) throws IOException { HdfsConfigurationInitializer configurationInitializer = new HdfsConfigurationInitializer(config, ImmutableSet.of()); HiveHdfsConfiguration configuration = new HiveHdfsConfiguration( configurationInitializer, ImmutableSet.of( rubixConfigInitializer, (dynamicConfig, ignoredContext, ignoredUri) -> { dynamicConfig.set("fs.file.impl", CachingLocalFileSystem.class.getName()); dynamicConfig.setBoolean("fs.gs.lazy.init.enable", true); dynamicConfig.set("fs.azure.account.key", "Zm9vCg=="); dynamicConfig.set("fs.adl.oauth2.client.id", "test"); dynamicConfig.set("fs.adl.oauth2.refresh.url", "http://localhost"); dynamicConfig.set("fs.adl.oauth2.credential", "password"); })); HdfsEnvironment environment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication()); return environment.getFileSystem(context, path); }
Example #7
Source Project: hbase Author: apache File: TestStoreFileInfo.java License: Apache License 2.0 | 6 votes |
@Test public void testOpenErrorMessageHFileLink() throws IOException, IllegalStateException { // Test file link exception // Try to open nonsense hfilelink. Make sure exception is from HFileLink. Path p = new Path("/hbase/test/0123/cf/testtb=4567-abcd"); try (FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration())) { StoreFileInfo sfi = new StoreFileInfo(TEST_UTIL.getConfiguration(), fs, p, true); try { ReaderContext context = sfi.createReaderContext(false, 1000, ReaderType.PREAD); sfi.createReader(context, null); throw new IllegalStateException(); } catch (FileNotFoundException fnfe) { assertTrue(fnfe.getMessage().contains(HFileLink.class.getSimpleName())); } } }
Example #8
Source Project: hadoop Author: naver File: BenchmarkThroughput.java License: Apache License 2.0 | 6 votes |
private void writeAndReadFile(FileSystem fs, String name, Configuration conf, long size ) throws IOException { Path f = null; try { f = writeFile(fs, name, conf, size); readFile(fs, f, name, conf); } finally { try { if (f != null) { fs.delete(f, true); } } catch (IOException ie) { // IGNORE } } }
Example #9
Source Project: Bats Author: lealone File: ParquetRecordWriter.java License: Apache License 2.0 | 6 votes |
@Override public void abort() throws IOException { List<String> errors = Lists.newArrayList(); for (Path location : cleanUpLocations) { try { if (fs.exists(location)) { fs.delete(location, true); logger.info("Aborting writer. Location [{}] on file system [{}] is deleted.", location.toUri().getPath(), fs.getUri()); } } catch (IOException e) { errors.add(location.toUri().getPath()); logger.error("Failed to delete location [{}] on file system [{}].", location, fs.getUri(), e); } } if (!errors.isEmpty()) { throw new IOException(String.format("Failed to delete the following locations %s on file system [%s]" + " during aborting writer", errors, fs.getUri())); } }
Example #10
Source Project: RDFS Author: iVCE File: TestHftpFileSystem.java License: Apache License 2.0 | 6 votes |
public void readHftpFile( boolean strictContentLength, boolean sendContentLength ) throws IOException, URISyntaxException { int bufSize = 128 * 1024; byte[] buf = DFSTestUtil.generateSequentialBytes(0, bufSize); final ByteArrayInputStream inputStream = new ByteArrayInputStream(buf); final long contentLength = bufSize + 1; Configuration conf = new Configuration(); conf.setBoolean(HftpFileSystem.STRICT_CONTENT_LENGTH, strictContentLength); HftpFileSystem fileSystem = new MockHftpFileSystem( sendContentLength ? contentLength : null, inputStream, conf ); FSDataInputStream dataInputStream = fileSystem.open(new Path("dont-care")); byte[] readBuf = new byte[1024]; while (dataInputStream.read(readBuf) > -1) { //nothing } dataInputStream.close(); }
Example #11
Source Project: hadoop Author: naver File: TestHadoopArchives.java License: Apache License 2.0 | 6 votes |
@Test public void testGlobFiles() throws Exception { final Path sub1 = new Path(inputPath, "dir1"); final Path sub2 = new Path(inputPath, "dir2"); fs.mkdirs(sub1); String fileName = "a"; createFile(inputPath, fs, sub1.getName(), fileName); createFile(inputPath, fs, sub2.getName(), fileName); createFile(inputPath, fs, sub1.getName(), "b"); // not part of result final String glob = "dir{1,2}/a"; final FsShell shell = new FsShell(conf); final List<String> originalPaths = lsr(shell, inputPath.toString(), inputPath + "/" + glob); System.out.println("originalPaths: " + originalPaths); // make the archive: final String fullHarPathStr = makeArchive(inputPath, glob); // compare results: final List<String> harPaths = lsr(shell, fullHarPathStr, fullHarPathStr + "/" + glob); Assert.assertEquals(originalPaths, harPaths); }
Example #12
Source Project: big-c Author: yncxcw File: TestHadoopArchives.java License: Apache License 2.0 | 6 votes |
@Test public void testSingleFile() throws Exception { final Path sub1 = new Path(inputPath, "dir1"); fs.mkdirs(sub1); String singleFileName = "a"; createFile(inputPath, fs, sub1.getName(), singleFileName); final FsShell shell = new FsShell(conf); final List<String> originalPaths = lsr(shell, sub1.toString()); System.out.println("originalPaths: " + originalPaths); // make the archive: final String fullHarPathStr = makeArchive(sub1, singleFileName); // compare results: final List<String> harPaths = lsr(shell, fullHarPathStr); Assert.assertEquals(originalPaths, harPaths); }
Example #13
Source Project: big-c Author: yncxcw File: TestJobCleanup.java License: Apache License 2.0 | 6 votes |
@BeforeClass public static void setUp() throws IOException { JobConf conf = new JobConf(); fileSys = FileSystem.get(conf); fileSys.delete(new Path(TEST_ROOT_DIR), true); conf.set("mapred.job.tracker.handler.count", "1"); conf.set("mapred.job.tracker", "127.0.0.1:0"); conf.set("mapred.job.tracker.http.address", "127.0.0.1:0"); conf.set("mapred.task.tracker.http.address", "127.0.0.1:0"); conf.set(JHAdminConfig.MR_HISTORY_INTERMEDIATE_DONE_DIR, TEST_ROOT_DIR + "/intermediate"); conf.set(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter .SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, "true"); mr = new MiniMRCluster(1, "file:///", 1, null, null, conf); inDir = new Path(TEST_ROOT_DIR, "test-input"); String input = "The quick brown fox\n" + "has many silly\n" + "red fox sox\n"; DataOutputStream file = fileSys.create(new Path(inDir, "part-" + 0)); file.writeBytes(input); file.close(); emptyInDir = new Path(TEST_ROOT_DIR, "empty-input"); fileSys.mkdirs(emptyInDir); }
Example #14
Source Project: hadoop Author: naver File: ProviderUtils.java License: Apache License 2.0 | 6 votes |
/** * Convert a nested URI to decode the underlying path. The translation takes * the authority and parses it into the underlying scheme and authority. * For example, "myscheme://[email protected]/my/path" is converted to * "hdfs://nn/my/path". * @param nestedUri the URI from the nested URI * @return the unnested path */ public static Path unnestUri(URI nestedUri) { String[] parts = nestedUri.getAuthority().split("@", 2); StringBuilder result = new StringBuilder(parts[0]); result.append("://"); if (parts.length == 2) { result.append(parts[1]); } result.append(nestedUri.getPath()); if (nestedUri.getQuery() != null) { result.append("?"); result.append(nestedUri.getQuery()); } if (nestedUri.getFragment() != null) { result.append("#"); result.append(nestedUri.getFragment()); } return new Path(result.toString()); }
Example #15
Source Project: hadoop-ozone Author: apache File: TestOzoneFileSystem.java License: Apache License 2.0 | 6 votes |
private void testDeleteCreatesFakeParentDir() throws Exception { Path grandparent = new Path("/testDeleteCreatesFakeParentDir"); Path parent = new Path(grandparent, "parent"); Path child = new Path(parent, "child"); ContractTestUtils.touch(fs, child); rootItemCount++; // grandparent // Verify that parent dir key does not exist // Creating a child should not add parent keys to the bucket try { getKey(parent, true); } catch (IOException ex) { assertKeyNotFoundException(ex); } // Delete the child key fs.delete(child, false); // Deleting the only child should create the parent dir key if it does // not exist String parentKey = o3fs.pathToKey(parent) + "/"; OzoneKeyDetails parentKeyInfo = getKey(parent, true); assertEquals(parentKey, parentKeyInfo.getName()); }
Example #16
Source Project: ignite Author: apache File: HadoopIgfs20FileSystemAbstractSelfTest.java License: Apache License 2.0 | 6 votes |
/** @throws Exception If failed. */ @Test public void testSetPermissionIfOutputStreamIsNotClosed() throws Exception { Path fsHome = new Path(primaryFsUri); Path file = new Path(fsHome, "myFile"); FsPermission perm = new FsPermission((short)123); FSDataOutputStream os = fs.create(file, EnumSet.noneOf(CreateFlag.class), Options.CreateOpts.perms(FsPermission.getDefault())); fs.setPermission(file, perm); os.close(); assertEquals(perm, fs.getFileStatus(file).getPermission()); }
Example #17
Source Project: RDFS Author: iVCE File: NativeS3FileSystem.java License: Apache License 2.0 | 5 votes |
@Override public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { if (exists(f) && !overwrite) { throw new IOException("File already exists:"+f); } Path absolutePath = makeAbsolute(f); String key = pathToKey(absolutePath); return new FSDataOutputStream(new NativeS3FsOutputStream(getConf(), store, key, progress, bufferSize), statistics); }
Example #18
Source Project: RDFS Author: iVCE File: TestDataJoin.java License: Apache License 2.0 | 5 votes |
private static SequenceFile.Writer[] createWriters(Path testdir, JobConf conf, int srcs, Path[] src) throws IOException { for (int i = 0; i < srcs; ++i) { src[i] = new Path(testdir, Integer.toString(i + 10, 36)); } SequenceFile.Writer out[] = new SequenceFile.Writer[srcs]; for (int i = 0; i < srcs; ++i) { out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf, src[i], Text.class, Text.class); } return out; }
Example #19
Source Project: kylin-on-parquet-v2 Author: Kyligence File: HDFSResourceStore.java License: Apache License 2.0 | 5 votes |
private Path getRealHDFSPath(String resourcePath) { if (resourcePath.equals("/")) return this.hdfsMetaPath; if (resourcePath.startsWith("/") && resourcePath.length() > 1) resourcePath = resourcePath.substring(1, resourcePath.length()); return new Path(this.hdfsMetaPath, resourcePath); }
Example #20
Source Project: systemds Author: tugraz-isds File: IOUtilFunctions.java License: Apache License 2.0 | 5 votes |
public static Path[] getSequenceFilePaths( FileSystem fs, Path file ) throws IOException { Path[] ret = null; //Note on object stores: Since the object store file system implementations //only emulate a file system, the directory of a multi-part file does not //exist physically and hence the isDirectory call returns false. Furthermore, //listStatus call returns all files with the given directory as prefix, which //includes the mtd file which needs to be ignored accordingly. if( fs.isDirectory(file) || IOUtilFunctions.isObjectStoreFileScheme(file) ) { LinkedList<Path> tmp = new LinkedList<>(); FileStatus[] dStatus = fs.listStatus(file); for( FileStatus fdStatus : dStatus ) if( !fdStatus.getPath().getName().startsWith("_") //skip internal files && !fdStatus.getPath().toString().equals(file.toString()+".mtd") ) //mtd file tmp.add(fdStatus.getPath()); ret = tmp.toArray(new Path[0]); } else { ret = new Path[]{ file }; } return ret; }
Example #21
Source Project: hadoop-solr Author: lucidworks File: IngestJobTest.java License: Apache License 2.0 | 5 votes |
@Test public void testRegex() throws Exception { String regex1 = "regex" + File.separator + "regex-small.txt"; File regexFile1 = new File(ClassLoader.getSystemClassLoader().getResource(regex1).getPath()); assertTrue(regex1 + " does not exist: " + regexFile1.getAbsolutePath(), regexFile1.exists()); Path input1 = new Path(tempDir, regex1); addContentToFS(input1, Files.toByteArray(regexFile1)); String regex2 = "regex" + File.separator + "regex-small-2.txt"; File regexFile2 = new File(ClassLoader.getSystemClassLoader().getResource(regex2).getPath()); assertTrue(regex2 + " does not exist: " + regexFile2.getAbsolutePath(), regexFile2.exists()); Path input2 = new Path(tempDir, regex2); addContentToFS(input2, Files.toByteArray(regexFile2)); String jobName = "testRegex"; String[] args = new JobArgs().withJobName(jobName).withClassname(RegexIngestMapper.class.getName()) .withCollection(DEFAULT_COLLECTION).withZkString(getBaseUrl()) .withInput(tempDir.toUri().toString() + File.separator + "regex" + File.separator + "regex-small*") .withDArgs("-D" + RegexIngestMapper.REGEX + "=\\w+", "-D" + RegexIngestMapper .GROUPS_TO_FIELDS + "=0=match") .getJobArgs(); int val = ToolRunner.run(conf, new IngestJob(), args); assertEquals(0, val); MockRecordWriter mockRecordWriter = IngestJobMockMapRedOutFormat.writers.get(jobName); Assert.assertNotNull(mockRecordWriter); assertEquals(2, mockRecordWriter.map.size()); }
Example #22
Source Project: RDFS Author: iVCE File: TestEmptyJob.java License: Apache License 2.0 | 5 votes |
@Override public void commitJob(JobContext context) throws IOException { Configuration conf = context.getConfiguration(); Path share = new Path(conf.get("share")); FileSystem fs = FileSystem.get(conf); while (true) { if (fs.exists(share)) { break; } UtilsForTests.waitFor(100); } super.commitJob(context); }
Example #23
Source Project: tunnel Author: hellobike File: HdfsClient.java License: Apache License 2.0 | 5 votes |
public void append(HdfsConfig config, HdfsRule rule, Event event) { try { Configuration hadoopConfig = new Configuration(); FileSystem fileSystem = FileSystem.get(URI.create(this.address), hadoopConfig); Path hdfsPath = new Path(fileName); FSDataOutputStream fileOutputStream = null; try { if (fileSystem.exists(hdfsPath)) { fileOutputStream = fileSystem.append(hdfsPath); } else { fileOutputStream = fileSystem.create(hdfsPath); } fileOutputStream.writeUTF(JSON.toJSONString(event)); } finally { if (fileSystem != null) { fileSystem.close(); } if (fileOutputStream != null) { fileOutputStream.close(); } } } catch (IOException e) { } }
Example #24
Source Project: hudi Author: apache File: HoodieRealtimeRecordReaderUtils.java License: Apache License 2.0 | 5 votes |
/** * Reads the schema from the base file. */ public static Schema readSchema(Configuration conf, Path filePath) { try { HoodieFileReader storageReader = HoodieFileReaderFactory.getFileReader(conf, filePath); return storageReader.getSchema(); } catch (IOException e) { throw new HoodieIOException("Failed to read schema from " + filePath, e); } }
Example #25
Source Project: gatk Author: broadinstitute File: ReadsSparkSinkUnitTest.java License: BSD 3-Clause "New" or "Revised" License | 5 votes |
private void assertSingleShardedWritingWorks(GATKPath inputBam, String referenceFile, String outputPath, String outputPartsPath, boolean writeBai, boolean writeSbi, long sbiGranularity) throws IOException { JavaSparkContext ctx = SparkContextFactory.getTestSparkContext(); final GATKPath referencePath = referenceFile == null ? null : new GATKPath(referenceFile); ReadsSparkSource readSource = new ReadsSparkSource(ctx); JavaRDD<GATKRead> rddParallelReads = readSource.getParallelReads(inputBam, referencePath); SAMFileHeader header = readSource.getHeader(inputBam, referencePath); ReadsSparkSink.writeReads(ctx, outputPath, referencePath, rddParallelReads, header, ReadsWriteFormat.SINGLE, 0, outputPartsPath, writeBai, writeSbi, true, sbiGranularity); // check that a bai file is created if (new GATKPath(outputPath).isBam() && writeBai) { Assert.assertTrue(Files.exists(IOUtils.getPath(outputPath + FileExtensions.BAI_INDEX))); } // check that a splitting bai file is created with correct granularity if (new GATKPath(outputPath).isBam() && writeSbi) { final java.nio.file.Path sbiPath = IOUtils.getPath(outputPath + FileExtensions.SBI); Assert.assertTrue(Files.exists(sbiPath)); final SBIIndex sbi = SBIIndex.load(sbiPath); Assert.assertEquals(sbi.getGranularity(), sbiGranularity); } JavaRDD<GATKRead> rddParallelReads2 = readSource.getParallelReads(new GATKPath(outputPath), referencePath); final List<GATKRead> writtenReads = rddParallelReads2.collect(); assertReadsAreSorted(header, writtenReads); Assert.assertEquals(rddParallelReads.count(), rddParallelReads2.count()); }
Example #26
Source Project: incubator-retired-blur Author: apache File: BlurUtilsTest.java License: Apache License 2.0 | 5 votes |
@Test public void testValidateShardCountExtraDir() throws IOException { File file = new File(TMPDIR, "ValidateShardCount-test"); rm(file); Path path = new Path(file.toURI()); Configuration conf = new Configuration(); FileSystem fileSystem = path.getFileSystem(conf); fileSystem.mkdirs(path); int shardCount = 10; createShardDirs(shardCount, fileSystem, path); fileSystem.mkdirs(new Path(path, "logs")); BlurUtil.validateShardCount(shardCount, fileSystem, path); }
Example #27
Source Project: big-c Author: yncxcw File: TestJobHistoryEventHandler.java License: Apache License 2.0 | 5 votes |
@Override protected EventWriter createEventWriter(Path historyFilePath) throws IOException { if (mockHistoryProcessing) { this.eventWriter = mock(EventWriter.class); } else { this.eventWriter = super.createEventWriter(historyFilePath); } return this.eventWriter; }
Example #28
Source Project: hadoop Author: naver File: KeyValueTextInputFormat.java License: Apache License 2.0 | 5 votes |
@Override protected boolean isSplitable(JobContext context, Path file) { final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); if (null == codec) { return true; } return codec instanceof SplittableCompressionCodec; }
Example #29
Source Project: flink Author: apache File: WordCount.java License: Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: WordCount <input path> <result path>"); return; } final String inputPath = args[0]; final String outputPath = args[1]; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // Set up the Hadoop Input Format Job job = Job.getInstance(); HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job); TextInputFormat.addInputPath(job, new Path(inputPath)); // Create a Flink job with it DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat); // Tokenize the line and convert from Writable "Text" to String for better handling DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer()); // Sum up the words DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); // Convert String back to Writable "Text" for use with Hadoop Output Format DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper()); // Set up Hadoop Output Format HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job); hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " "); hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test TextOutputFormat.setOutputPath(job, new Path(outputPath)); // Output & Execute hadoopResult.output(hadoopOutputFormat); env.execute("Word Count"); }
Example #30
Source Project: incubator-pinot Author: apache File: DeleteExtraPushedSegmentsTest.java License: Apache License 2.0 | 5 votes |
@Test public void checkDeleteWithRefresh() { List<String> allSegmentsInCluster = new ArrayList<>(); allSegmentsInCluster.add("mytable_0"); allSegmentsInCluster.add("mytable_1"); allSegmentsInCluster.add("mytable_2"); List<Path> currentSegments = new ArrayList<>(); currentSegments.add(new Path("mytable_0")); SegmentTarPushJob segmentTarPushJob = new SegmentTarPushJob(_defaultProperties); List<String> segmentsToDelete = segmentTarPushJob.getSegmentsToDelete(allSegmentsInCluster, currentSegments); Assert.assertEquals(segmentsToDelete.size(), 2); Assert.assertFalse(segmentsToDelete.contains("mytable_0")); }