org.apache.hadoop.fs.Path Java Examples
The following examples show how to use
org.apache.hadoop.fs.Path.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConsistentListingAspect.java From s3mper with Apache License 2.0 | 6 votes |
/** * Check the the metastore listing against the s3 listing and return any paths * missing from s3. * * @param metastoreListing * @param s3Listing * @return */ private List<Path> checkListing(List<FileInfo> metastoreListing, FileStatus [] s3Listing) { Map<String, FileStatus> s3paths = new HashMap<String, FileStatus>(); if(s3Listing != null) { for (FileStatus fileStatus : s3Listing) { s3paths.put(fileStatus.getPath().toUri().normalize().getSchemeSpecificPart(), fileStatus); } } List<Path> missingPaths = new ArrayList<Path>(); for (FileInfo f : metastoreListing) { if(f.isDeleted()) { continue; } if (!s3paths.containsKey(f.getPath().toUri().normalize().getSchemeSpecificPart())) { missingPaths.add(f.getPath()); } } return missingPaths; }
Example #2
Source File: TestHadoopDirTreeGenerator.java From hadoop-ozone with Apache License 2.0 | 6 votes |
private int traverseToLeaf(FileSystem fs, Path dirPath, int depth, int expectedDepth, int expectedSpanCnt, int expectedFileCnt, int perFileSizeInBytes) throws IOException { FileStatus[] fileStatuses = fs.listStatus(dirPath); // check the num of peer directories except root and leaf as both // has less dirs. if (depth < expectedDepth - 1) { verifyActualSpan(expectedSpanCnt, fileStatuses); } int actualNumFiles = 0; for (FileStatus fileStatus : fileStatuses) { if (fileStatus.isDirectory()) { ++depth; return traverseToLeaf(fs, fileStatus.getPath(), depth, expectedDepth, expectedSpanCnt, expectedFileCnt, perFileSizeInBytes); } else { Assert.assertEquals("Mismatches file len", perFileSizeInBytes, fileStatus.getLen()); actualNumFiles++; } } Assert.assertEquals("Mismatches files count in a directory", expectedFileCnt, actualNumFiles); return depth; }
Example #3
Source File: UtilsForTests.java From hadoop with Apache License 2.0 | 6 votes |
/** * Configure a waiting job */ static void configureWaitingJobConf(JobConf jobConf, Path inDir, Path outputPath, int numMaps, int numRed, String jobName, String mapSignalFilename, String redSignalFilename) throws IOException { jobConf.setJobName(jobName); jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outputPath); jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class); jobConf.setReducerClass(IdentityReducer.class); jobConf.setOutputKeyClass(BytesWritable.class); jobConf.setOutputValueClass(BytesWritable.class); jobConf.setInputFormat(RandomInputFormat.class); jobConf.setNumMapTasks(numMaps); jobConf.setNumReduceTasks(numRed); jobConf.setJar("build/test/mapred/testjar/testjob.jar"); jobConf.set(getTaskSignalParameter(true), mapSignalFilename); jobConf.set(getTaskSignalParameter(false), redSignalFilename); }
Example #4
Source File: SimpleFileIODatasetRuntimeTest.java From components with Apache License 2.0 | 6 votes |
@Test public void testGetSampleWithSpecialPath() throws Exception { RecordSet rs = getSimpleTestData(0); writeRandomCsvFile(mini.getFs(), "/user/test/Marketing Customer Contacts US.CSV", rs, "UTF-8"); String fileSpec = mini.getFs().getUri().resolve(new Path("/user/test/Marketing Customer Contacts US.CSV").toUri()).toString(); //the method above will escape it, so make it back here as the customer set the path, should not escape one fileSpec = fileSpec.replace("%20", " "); // Configure the component. SimpleFileIODatasetProperties props = createDatasetProperties(); props.format.setValue(SimpleFileIOFormat.CSV); props.path.setValue(fileSpec); final List<IndexedRecord> actual = getSample(props,Integer.MAX_VALUE); assertThat(actual, hasSize(10)); }
Example #5
Source File: HiveTargetPathHelper.java From incubator-gobblin with Apache License 2.0 | 6 votes |
public HiveTargetPathHelper(HiveDataset dataset) { this.dataset = dataset; this.relocateDataFiles = Boolean .valueOf(this.dataset.getProperties().getProperty(RELOCATE_DATA_FILES_KEY, DEFAULT_RELOCATE_DATA_FILES)); this.targetTableRoot = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_ROOT) ? Optional.of(resolvePath(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_ROOT), this.dataset.getTable().getDbName(), this.dataset.getTable().getTableName())) : Optional.<Path> absent(); this.targetTablePrefixTobeReplaced = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED) ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED))) : Optional.<Path> absent(); this.targetTablePrefixReplacement = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_REPLACEMENT) ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_REPLACEMENT))) : Optional.<Path> absent(); }
Example #6
Source File: TestRubixCaching.java From presto with Apache License 2.0 | 6 votes |
private FileSystem getCachingFileSystem(HdfsContext context, Path path) throws IOException { HdfsConfigurationInitializer configurationInitializer = new HdfsConfigurationInitializer(config, ImmutableSet.of()); HiveHdfsConfiguration configuration = new HiveHdfsConfiguration( configurationInitializer, ImmutableSet.of( rubixConfigInitializer, (dynamicConfig, ignoredContext, ignoredUri) -> { dynamicConfig.set("fs.file.impl", CachingLocalFileSystem.class.getName()); dynamicConfig.setBoolean("fs.gs.lazy.init.enable", true); dynamicConfig.set("fs.azure.account.key", "Zm9vCg=="); dynamicConfig.set("fs.adl.oauth2.client.id", "test"); dynamicConfig.set("fs.adl.oauth2.refresh.url", "http://localhost"); dynamicConfig.set("fs.adl.oauth2.credential", "password"); })); HdfsEnvironment environment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication()); return environment.getFileSystem(context, path); }
Example #7
Source File: TestStoreFileInfo.java From hbase with Apache License 2.0 | 6 votes |
@Test public void testOpenErrorMessageHFileLink() throws IOException, IllegalStateException { // Test file link exception // Try to open nonsense hfilelink. Make sure exception is from HFileLink. Path p = new Path("/hbase/test/0123/cf/testtb=4567-abcd"); try (FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration())) { StoreFileInfo sfi = new StoreFileInfo(TEST_UTIL.getConfiguration(), fs, p, true); try { ReaderContext context = sfi.createReaderContext(false, 1000, ReaderType.PREAD); sfi.createReader(context, null); throw new IllegalStateException(); } catch (FileNotFoundException fnfe) { assertTrue(fnfe.getMessage().contains(HFileLink.class.getSimpleName())); } } }
Example #8
Source File: BenchmarkThroughput.java From hadoop with Apache License 2.0 | 6 votes |
private void writeAndReadFile(FileSystem fs, String name, Configuration conf, long size ) throws IOException { Path f = null; try { f = writeFile(fs, name, conf, size); readFile(fs, f, name, conf); } finally { try { if (f != null) { fs.delete(f, true); } } catch (IOException ie) { // IGNORE } } }
Example #9
Source File: ParquetRecordWriter.java From Bats with Apache License 2.0 | 6 votes |
@Override public void abort() throws IOException { List<String> errors = Lists.newArrayList(); for (Path location : cleanUpLocations) { try { if (fs.exists(location)) { fs.delete(location, true); logger.info("Aborting writer. Location [{}] on file system [{}] is deleted.", location.toUri().getPath(), fs.getUri()); } } catch (IOException e) { errors.add(location.toUri().getPath()); logger.error("Failed to delete location [{}] on file system [{}].", location, fs.getUri(), e); } } if (!errors.isEmpty()) { throw new IOException(String.format("Failed to delete the following locations %s on file system [%s]" + " during aborting writer", errors, fs.getUri())); } }
Example #10
Source File: TestHftpFileSystem.java From RDFS with Apache License 2.0 | 6 votes |
public void readHftpFile( boolean strictContentLength, boolean sendContentLength ) throws IOException, URISyntaxException { int bufSize = 128 * 1024; byte[] buf = DFSTestUtil.generateSequentialBytes(0, bufSize); final ByteArrayInputStream inputStream = new ByteArrayInputStream(buf); final long contentLength = bufSize + 1; Configuration conf = new Configuration(); conf.setBoolean(HftpFileSystem.STRICT_CONTENT_LENGTH, strictContentLength); HftpFileSystem fileSystem = new MockHftpFileSystem( sendContentLength ? contentLength : null, inputStream, conf ); FSDataInputStream dataInputStream = fileSystem.open(new Path("dont-care")); byte[] readBuf = new byte[1024]; while (dataInputStream.read(readBuf) > -1) { //nothing } dataInputStream.close(); }
Example #11
Source File: TestHadoopArchives.java From hadoop with Apache License 2.0 | 6 votes |
@Test public void testGlobFiles() throws Exception { final Path sub1 = new Path(inputPath, "dir1"); final Path sub2 = new Path(inputPath, "dir2"); fs.mkdirs(sub1); String fileName = "a"; createFile(inputPath, fs, sub1.getName(), fileName); createFile(inputPath, fs, sub2.getName(), fileName); createFile(inputPath, fs, sub1.getName(), "b"); // not part of result final String glob = "dir{1,2}/a"; final FsShell shell = new FsShell(conf); final List<String> originalPaths = lsr(shell, inputPath.toString(), inputPath + "/" + glob); System.out.println("originalPaths: " + originalPaths); // make the archive: final String fullHarPathStr = makeArchive(inputPath, glob); // compare results: final List<String> harPaths = lsr(shell, fullHarPathStr, fullHarPathStr + "/" + glob); Assert.assertEquals(originalPaths, harPaths); }
Example #12
Source File: TestHadoopArchives.java From big-c with Apache License 2.0 | 6 votes |
@Test public void testSingleFile() throws Exception { final Path sub1 = new Path(inputPath, "dir1"); fs.mkdirs(sub1); String singleFileName = "a"; createFile(inputPath, fs, sub1.getName(), singleFileName); final FsShell shell = new FsShell(conf); final List<String> originalPaths = lsr(shell, sub1.toString()); System.out.println("originalPaths: " + originalPaths); // make the archive: final String fullHarPathStr = makeArchive(sub1, singleFileName); // compare results: final List<String> harPaths = lsr(shell, fullHarPathStr); Assert.assertEquals(originalPaths, harPaths); }
Example #13
Source File: TestJobCleanup.java From big-c with Apache License 2.0 | 6 votes |
@BeforeClass public static void setUp() throws IOException { JobConf conf = new JobConf(); fileSys = FileSystem.get(conf); fileSys.delete(new Path(TEST_ROOT_DIR), true); conf.set("mapred.job.tracker.handler.count", "1"); conf.set("mapred.job.tracker", "127.0.0.1:0"); conf.set("mapred.job.tracker.http.address", "127.0.0.1:0"); conf.set("mapred.task.tracker.http.address", "127.0.0.1:0"); conf.set(JHAdminConfig.MR_HISTORY_INTERMEDIATE_DONE_DIR, TEST_ROOT_DIR + "/intermediate"); conf.set(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter .SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, "true"); mr = new MiniMRCluster(1, "file:///", 1, null, null, conf); inDir = new Path(TEST_ROOT_DIR, "test-input"); String input = "The quick brown fox\n" + "has many silly\n" + "red fox sox\n"; DataOutputStream file = fileSys.create(new Path(inDir, "part-" + 0)); file.writeBytes(input); file.close(); emptyInDir = new Path(TEST_ROOT_DIR, "empty-input"); fileSys.mkdirs(emptyInDir); }
Example #14
Source File: ProviderUtils.java From hadoop with Apache License 2.0 | 6 votes |
/** * Convert a nested URI to decode the underlying path. The translation takes * the authority and parses it into the underlying scheme and authority. * For example, "myscheme://hdfs@nn/my/path" is converted to * "hdfs://nn/my/path". * @param nestedUri the URI from the nested URI * @return the unnested path */ public static Path unnestUri(URI nestedUri) { String[] parts = nestedUri.getAuthority().split("@", 2); StringBuilder result = new StringBuilder(parts[0]); result.append("://"); if (parts.length == 2) { result.append(parts[1]); } result.append(nestedUri.getPath()); if (nestedUri.getQuery() != null) { result.append("?"); result.append(nestedUri.getQuery()); } if (nestedUri.getFragment() != null) { result.append("#"); result.append(nestedUri.getFragment()); } return new Path(result.toString()); }
Example #15
Source File: TestOzoneFileSystem.java From hadoop-ozone with Apache License 2.0 | 6 votes |
private void testDeleteCreatesFakeParentDir() throws Exception { Path grandparent = new Path("/testDeleteCreatesFakeParentDir"); Path parent = new Path(grandparent, "parent"); Path child = new Path(parent, "child"); ContractTestUtils.touch(fs, child); rootItemCount++; // grandparent // Verify that parent dir key does not exist // Creating a child should not add parent keys to the bucket try { getKey(parent, true); } catch (IOException ex) { assertKeyNotFoundException(ex); } // Delete the child key fs.delete(child, false); // Deleting the only child should create the parent dir key if it does // not exist String parentKey = o3fs.pathToKey(parent) + "/"; OzoneKeyDetails parentKeyInfo = getKey(parent, true); assertEquals(parentKey, parentKeyInfo.getName()); }
Example #16
Source File: HadoopIgfs20FileSystemAbstractSelfTest.java From ignite with Apache License 2.0 | 6 votes |
/** @throws Exception If failed. */ @Test public void testSetPermissionIfOutputStreamIsNotClosed() throws Exception { Path fsHome = new Path(primaryFsUri); Path file = new Path(fsHome, "myFile"); FsPermission perm = new FsPermission((short)123); FSDataOutputStream os = fs.create(file, EnumSet.noneOf(CreateFlag.class), Options.CreateOpts.perms(FsPermission.getDefault())); fs.setPermission(file, perm); os.close(); assertEquals(perm, fs.getFileStatus(file).getPermission()); }
Example #17
Source File: NativeS3FileSystem.java From RDFS with Apache License 2.0 | 5 votes |
@Override public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { if (exists(f) && !overwrite) { throw new IOException("File already exists:"+f); } Path absolutePath = makeAbsolute(f); String key = pathToKey(absolutePath); return new FSDataOutputStream(new NativeS3FsOutputStream(getConf(), store, key, progress, bufferSize), statistics); }
Example #18
Source File: TestDataJoin.java From RDFS with Apache License 2.0 | 5 votes |
private static SequenceFile.Writer[] createWriters(Path testdir, JobConf conf, int srcs, Path[] src) throws IOException { for (int i = 0; i < srcs; ++i) { src[i] = new Path(testdir, Integer.toString(i + 10, 36)); } SequenceFile.Writer out[] = new SequenceFile.Writer[srcs]; for (int i = 0; i < srcs; ++i) { out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf, src[i], Text.class, Text.class); } return out; }
Example #19
Source File: HDFSResourceStore.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private Path getRealHDFSPath(String resourcePath) { if (resourcePath.equals("/")) return this.hdfsMetaPath; if (resourcePath.startsWith("/") && resourcePath.length() > 1) resourcePath = resourcePath.substring(1, resourcePath.length()); return new Path(this.hdfsMetaPath, resourcePath); }
Example #20
Source File: IOUtilFunctions.java From systemds with Apache License 2.0 | 5 votes |
public static Path[] getSequenceFilePaths( FileSystem fs, Path file ) throws IOException { Path[] ret = null; //Note on object stores: Since the object store file system implementations //only emulate a file system, the directory of a multi-part file does not //exist physically and hence the isDirectory call returns false. Furthermore, //listStatus call returns all files with the given directory as prefix, which //includes the mtd file which needs to be ignored accordingly. if( fs.isDirectory(file) || IOUtilFunctions.isObjectStoreFileScheme(file) ) { LinkedList<Path> tmp = new LinkedList<>(); FileStatus[] dStatus = fs.listStatus(file); for( FileStatus fdStatus : dStatus ) if( !fdStatus.getPath().getName().startsWith("_") //skip internal files && !fdStatus.getPath().toString().equals(file.toString()+".mtd") ) //mtd file tmp.add(fdStatus.getPath()); ret = tmp.toArray(new Path[0]); } else { ret = new Path[]{ file }; } return ret; }
Example #21
Source File: IngestJobTest.java From hadoop-solr with Apache License 2.0 | 5 votes |
@Test public void testRegex() throws Exception { String regex1 = "regex" + File.separator + "regex-small.txt"; File regexFile1 = new File(ClassLoader.getSystemClassLoader().getResource(regex1).getPath()); assertTrue(regex1 + " does not exist: " + regexFile1.getAbsolutePath(), regexFile1.exists()); Path input1 = new Path(tempDir, regex1); addContentToFS(input1, Files.toByteArray(regexFile1)); String regex2 = "regex" + File.separator + "regex-small-2.txt"; File regexFile2 = new File(ClassLoader.getSystemClassLoader().getResource(regex2).getPath()); assertTrue(regex2 + " does not exist: " + regexFile2.getAbsolutePath(), regexFile2.exists()); Path input2 = new Path(tempDir, regex2); addContentToFS(input2, Files.toByteArray(regexFile2)); String jobName = "testRegex"; String[] args = new JobArgs().withJobName(jobName).withClassname(RegexIngestMapper.class.getName()) .withCollection(DEFAULT_COLLECTION).withZkString(getBaseUrl()) .withInput(tempDir.toUri().toString() + File.separator + "regex" + File.separator + "regex-small*") .withDArgs("-D" + RegexIngestMapper.REGEX + "=\\w+", "-D" + RegexIngestMapper .GROUPS_TO_FIELDS + "=0=match") .getJobArgs(); int val = ToolRunner.run(conf, new IngestJob(), args); assertEquals(0, val); MockRecordWriter mockRecordWriter = IngestJobMockMapRedOutFormat.writers.get(jobName); Assert.assertNotNull(mockRecordWriter); assertEquals(2, mockRecordWriter.map.size()); }
Example #22
Source File: TestEmptyJob.java From RDFS with Apache License 2.0 | 5 votes |
@Override public void commitJob(JobContext context) throws IOException { Configuration conf = context.getConfiguration(); Path share = new Path(conf.get("share")); FileSystem fs = FileSystem.get(conf); while (true) { if (fs.exists(share)) { break; } UtilsForTests.waitFor(100); } super.commitJob(context); }
Example #23
Source File: HdfsClient.java From tunnel with Apache License 2.0 | 5 votes |
public void append(HdfsConfig config, HdfsRule rule, Event event) { try { Configuration hadoopConfig = new Configuration(); FileSystem fileSystem = FileSystem.get(URI.create(this.address), hadoopConfig); Path hdfsPath = new Path(fileName); FSDataOutputStream fileOutputStream = null; try { if (fileSystem.exists(hdfsPath)) { fileOutputStream = fileSystem.append(hdfsPath); } else { fileOutputStream = fileSystem.create(hdfsPath); } fileOutputStream.writeUTF(JSON.toJSONString(event)); } finally { if (fileSystem != null) { fileSystem.close(); } if (fileOutputStream != null) { fileOutputStream.close(); } } } catch (IOException e) { } }
Example #24
Source File: HoodieRealtimeRecordReaderUtils.java From hudi with Apache License 2.0 | 5 votes |
/** * Reads the schema from the base file. */ public static Schema readSchema(Configuration conf, Path filePath) { try { HoodieFileReader storageReader = HoodieFileReaderFactory.getFileReader(conf, filePath); return storageReader.getSchema(); } catch (IOException e) { throw new HoodieIOException("Failed to read schema from " + filePath, e); } }
Example #25
Source File: ReadsSparkSinkUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private void assertSingleShardedWritingWorks(GATKPath inputBam, String referenceFile, String outputPath, String outputPartsPath, boolean writeBai, boolean writeSbi, long sbiGranularity) throws IOException { JavaSparkContext ctx = SparkContextFactory.getTestSparkContext(); final GATKPath referencePath = referenceFile == null ? null : new GATKPath(referenceFile); ReadsSparkSource readSource = new ReadsSparkSource(ctx); JavaRDD<GATKRead> rddParallelReads = readSource.getParallelReads(inputBam, referencePath); SAMFileHeader header = readSource.getHeader(inputBam, referencePath); ReadsSparkSink.writeReads(ctx, outputPath, referencePath, rddParallelReads, header, ReadsWriteFormat.SINGLE, 0, outputPartsPath, writeBai, writeSbi, true, sbiGranularity); // check that a bai file is created if (new GATKPath(outputPath).isBam() && writeBai) { Assert.assertTrue(Files.exists(IOUtils.getPath(outputPath + FileExtensions.BAI_INDEX))); } // check that a splitting bai file is created with correct granularity if (new GATKPath(outputPath).isBam() && writeSbi) { final java.nio.file.Path sbiPath = IOUtils.getPath(outputPath + FileExtensions.SBI); Assert.assertTrue(Files.exists(sbiPath)); final SBIIndex sbi = SBIIndex.load(sbiPath); Assert.assertEquals(sbi.getGranularity(), sbiGranularity); } JavaRDD<GATKRead> rddParallelReads2 = readSource.getParallelReads(new GATKPath(outputPath), referencePath); final List<GATKRead> writtenReads = rddParallelReads2.collect(); assertReadsAreSorted(header, writtenReads); Assert.assertEquals(rddParallelReads.count(), rddParallelReads2.count()); }
Example #26
Source File: BlurUtilsTest.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
@Test public void testValidateShardCountExtraDir() throws IOException { File file = new File(TMPDIR, "ValidateShardCount-test"); rm(file); Path path = new Path(file.toURI()); Configuration conf = new Configuration(); FileSystem fileSystem = path.getFileSystem(conf); fileSystem.mkdirs(path); int shardCount = 10; createShardDirs(shardCount, fileSystem, path); fileSystem.mkdirs(new Path(path, "logs")); BlurUtil.validateShardCount(shardCount, fileSystem, path); }
Example #27
Source File: TestJobHistoryEventHandler.java From big-c with Apache License 2.0 | 5 votes |
@Override protected EventWriter createEventWriter(Path historyFilePath) throws IOException { if (mockHistoryProcessing) { this.eventWriter = mock(EventWriter.class); } else { this.eventWriter = super.createEventWriter(historyFilePath); } return this.eventWriter; }
Example #28
Source File: KeyValueTextInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
@Override protected boolean isSplitable(JobContext context, Path file) { final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); if (null == codec) { return true; } return codec instanceof SplittableCompressionCodec; }
Example #29
Source File: WordCount.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: WordCount <input path> <result path>"); return; } final String inputPath = args[0]; final String outputPath = args[1]; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // Set up the Hadoop Input Format Job job = Job.getInstance(); HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job); TextInputFormat.addInputPath(job, new Path(inputPath)); // Create a Flink job with it DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat); // Tokenize the line and convert from Writable "Text" to String for better handling DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer()); // Sum up the words DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); // Convert String back to Writable "Text" for use with Hadoop Output Format DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper()); // Set up Hadoop Output Format HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job); hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " "); hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test TextOutputFormat.setOutputPath(job, new Path(outputPath)); // Output & Execute hadoopResult.output(hadoopOutputFormat); env.execute("Word Count"); }
Example #30
Source File: DeleteExtraPushedSegmentsTest.java From incubator-pinot with Apache License 2.0 | 5 votes |
@Test public void checkDeleteWithRefresh() { List<String> allSegmentsInCluster = new ArrayList<>(); allSegmentsInCluster.add("mytable_0"); allSegmentsInCluster.add("mytable_1"); allSegmentsInCluster.add("mytable_2"); List<Path> currentSegments = new ArrayList<>(); currentSegments.add(new Path("mytable_0")); SegmentTarPushJob segmentTarPushJob = new SegmentTarPushJob(_defaultProperties); List<String> segmentsToDelete = segmentTarPushJob.getSegmentsToDelete(allSegmentsInCluster, currentSegments); Assert.assertEquals(segmentsToDelete.size(), 2); Assert.assertFalse(segmentsToDelete.contains("mytable_0")); }