org.apache.hadoop.io.SequenceFile Java Examples
The following examples show how to use
org.apache.hadoop.io.SequenceFile.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestUniformSizeInputFormat.java From big-c with Apache License 2.0 | 6 votes |
private void checkSplits(Path listFile, List<InputSplit> splits) throws IOException { long lastEnd = 0; //Verify if each split's start is matching with the previous end and //we are not missing anything for (InputSplit split : splits) { FileSplit fileSplit = (FileSplit) split; long start = fileSplit.getStart(); Assert.assertEquals(lastEnd, start); lastEnd = start + fileSplit.getLength(); } //Verify there is nothing more to read from the input file SequenceFile.Reader reader = new SequenceFile.Reader(cluster.getFileSystem().getConf(), SequenceFile.Reader.file(listFile)); try { reader.seek(lastEnd); CopyListingFileStatus srcFileStatus = new CopyListingFileStatus(); Text srcRelPath = new Text(); Assert.assertFalse(reader.next(srcRelPath, srcFileStatus)); } finally { IOUtils.closeStream(reader); } }
Example #2
Source File: SequenceWriter.java From nutchpy with Apache License 2.0 | 6 votes |
public static void write_seq() throws IOException { String uri = "test_file.seq"; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i = 0; i < 100; i++) { key.set(100 - i); value.set(DATA[i % DATA.length]); System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
Example #3
Source File: TestSequenceFileLoader.java From spork with Apache License 2.0 | 6 votes |
@Override public void setUp() throws Exception { pigServer = new PigServer(LOCAL); File tmpFile = File.createTempFile("test", ".txt"); tmpFileName = tmpFile.getAbsolutePath(); System.err.println("fileName: "+tmpFileName); Path path = new Path("file:///"+tmpFileName); JobConf conf = new JobConf(); FileSystem fs = FileSystem.get(path.toUri(), conf); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i=0; i < DATA.length; i++) { key.set(i); value.set(DATA[i]); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
Example #4
Source File: DistributedFSCheck.java From hadoop with Apache License 2.0 | 6 votes |
private void createInputFile(String rootName) throws IOException { cleanup(); // clean up if previous run failed Path inputFile = new Path(MAP_INPUT_DIR, "in_file"); SequenceFile.Writer writer = SequenceFile.createWriter(fs, fsConfig, inputFile, Text.class, LongWritable.class, CompressionType.NONE); try { nrFiles = 0; listSubtree(new Path(rootName), writer); } finally { writer.close(); } LOG.info("Created map input files."); }
Example #5
Source File: TestJoinDatamerge.java From hadoop with Apache License 2.0 | 6 votes |
private static int countProduct(IntWritable key, Path[] src, Configuration conf) throws IOException { int product = 1; for (Path p : src) { int count = 0; SequenceFile.Reader r = new SequenceFile.Reader( cluster.getFileSystem(), p, conf); IntWritable k = new IntWritable(); IntWritable v = new IntWritable(); while (r.next(k, v)) { if (k.equals(key)) { count++; } } r.close(); if (count != 0) { product *= count; } } return product; }
Example #6
Source File: KMeansReducer.java From mapreduce-kmeans with Apache License 2.0 | 6 votes |
@SuppressWarnings("deprecation") @Override protected void cleanup(Context context) throws IOException, InterruptedException { super.cleanup(context); Configuration conf = context.getConfiguration(); Path outPath = new Path(conf.get("centroid.path")); FileSystem fs = FileSystem.get(conf); fs.delete(outPath, true); try (SequenceFile.Writer out = SequenceFile.createWriter(fs, context.getConfiguration(), outPath, ClusterCenter.class, IntWritable.class)) { final IntWritable value = new IntWritable(0); for (ClusterCenter center : centers) { out.append(center, value); } } }
Example #7
Source File: SeqCombiner.java From compiler with Apache License 2.0 | 6 votes |
public static long readAndAppendAst(Configuration conf, FileSystem fileSystem, MapFile.Writer writer, String fileName, long lastKey) throws IOException { long newLastKey = lastKey; SequenceFile.Reader r = new SequenceFile.Reader(fileSystem, new Path(fileName), conf); LongWritable longKey = new LongWritable(); BytesWritable value = new BytesWritable(); try { while (r.next(longKey, value)) { newLastKey = longKey.get() + lastKey; writer.append(new LongWritable(newLastKey), value); } } catch (Exception e) { System.err.println(fileName); e.printStackTrace(); } finally { r.close(); } return newLastKey; }
Example #8
Source File: ProxiedFileSystemWrapper.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * Get token from the token sequence file. * @param authPath * @param proxyUserName * @return Token for proxyUserName if it exists. * @throws IOException */ private static Optional<Token<?>> getTokenFromSeqFile(String authPath, String proxyUserName) throws IOException { try (Closer closer = Closer.create()) { FileSystem localFs = FileSystem.getLocal(new Configuration()); SequenceFile.Reader tokenReader = closer.register(new SequenceFile.Reader(localFs, new Path(authPath), localFs.getConf())); Text key = new Text(); Token<?> value = new Token<>(); while (tokenReader.next(key, value)) { LOG.info("Found token for " + key); if (key.toString().equals(proxyUserName)) { return Optional.<Token<?>> of(value); } } } return Optional.absent(); }
Example #9
Source File: 1000021_TestCDbwEvaluator_t.java From coming with MIT License | 6 votes |
private void checkRefPoints(int numIterations) throws IOException { for (int i = 0; i <= numIterations; i++) { Path out = new Path(getTestTempDirPath("output"), "representativePoints-" + i); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); for (FileStatus file : fs.listStatus(out)) { if (!file.getPath().getName().startsWith(".")) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf); try { Writable clusterId = new IntWritable(0); VectorWritable point = new VectorWritable(); while (reader.next(clusterId, point)) { System.out.println("\tC-" + clusterId + ": " + AbstractCluster.formatVector(point.get(), null)); } } finally { reader.close(); } } } } }
Example #10
Source File: TestSequenceFileWriter.java From suro with Apache License 2.0 | 6 votes |
private int checkFileContents(String filePath, String message) throws IOException { SequenceFile.Reader r = new SequenceFile.Reader( FileSystem.get(new Configuration()), new Path(filePath), new Configuration()); Text routingKey = new Text(); MessageWritable value = new MessageWritable(); StringSerDe serde = new StringSerDe(); int i = 0; while (r.next(routingKey, value)) { assertEquals(routingKey.toString(), "routingKey"); assertEquals(serde.deserialize(value.getMessage().getPayload()), message + i); ++i; } r.close(); return i; }
Example #11
Source File: DistributedFSCheck.java From big-c with Apache License 2.0 | 6 votes |
private void createInputFile(String rootName) throws IOException { cleanup(); // clean up if previous run failed Path inputFile = new Path(MAP_INPUT_DIR, "in_file"); SequenceFile.Writer writer = SequenceFile.createWriter(fs, fsConfig, inputFile, Text.class, LongWritable.class, CompressionType.NONE); try { nrFiles = 0; listSubtree(new Path(rootName), writer); } finally { writer.close(); } LOG.info("Created map input files."); }
Example #12
Source File: TestMapRed.java From RDFS with Apache License 2.0 | 6 votes |
public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter ) throws IOException { if (first) { first = false; MapOutputFile mapOutputFile = new MapOutputFile(taskId.getJobID()); mapOutputFile.setConf(conf); Path input = mapOutputFile.getInputFile(0, taskId); FileSystem fs = FileSystem.get(conf); assertTrue("reduce input exists " + input, fs.exists(input)); SequenceFile.Reader rdr = new SequenceFile.Reader(fs, input, conf); assertEquals("is reduce input compressed " + input, compressInput, rdr.isCompressed()); rdr.close(); } }
Example #13
Source File: TensorReaderBinaryBlockParallel.java From systemds with Apache License 2.0 | 6 votes |
@Override public Object call() throws Exception { TensorBlock value = new TensorBlock(); TensorIndexes key = new TensorIndexes(); //directly read from sequence files (individual partfiles) try(SequenceFile.Reader reader = new SequenceFile.Reader(_job, SequenceFile.Reader.file(_path))) { //note: next(key, value) does not yet exploit the given serialization classes, //record reader does but is generally slower. while (reader.next(key, value)) { if( value.isEmpty(false) ) continue; int[] lower = new int[_dims.length]; int[] upper = new int[lower.length]; UtilFunctions.getBlockBounds(key, value.getLongDims(), _blen, lower, upper); _dest.copy(lower, upper, value); } } return null; }
Example #14
Source File: SequenceFileUtil.java From alchemy with Apache License 2.0 | 6 votes |
public static void writeSequenceFile(String path) throws Exception{ Writer.Option filePath = Writer.file(new Path(path)); Writer.Option keyClass = Writer.keyClass(IntWritable.class); Writer.Option valueClass = Writer.valueClass(Text.class); Writer.Option compression = Writer.compression(CompressionType.NONE); Writer writer = SequenceFile.createWriter(configuration, filePath, keyClass, valueClass, compression); IntWritable key = new IntWritable(); Text value = new Text(""); for(int i=0;i<100;i++){ key.set(i); value.set("value_"+i); writer.append(key, value); } writer.hflush(); writer.close(); }
Example #15
Source File: TestGlobbedCopyListing.java From big-c with Apache License 2.0 | 6 votes |
private void verifyContents(Path listingPath) throws Exception { SequenceFile.Reader reader = new SequenceFile.Reader(cluster.getFileSystem(), listingPath, new Configuration()); Text key = new Text(); CopyListingFileStatus value = new CopyListingFileStatus(); Map<String, String> actualValues = new HashMap<String, String>(); while (reader.next(key, value)) { if (value.isDirectory() && key.toString().equals("")) { // ignore root with empty relPath, which is an entry to be // used for preserving root attributes etc. continue; } actualValues.put(value.getPath().toString(), key.toString()); } Assert.assertEquals(expectedValues.size(), actualValues.size()); for (Map.Entry<String, String> entry : actualValues.entrySet()) { Assert.assertEquals(entry.getValue(), expectedValues.get(entry.getKey())); } }
Example #16
Source File: BlurInputFormatTest.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
private void walkOutput(Path output, Configuration conf, ResultReader resultReader) throws IOException { FileSystem fileSystem = output.getFileSystem(conf); FileStatus fileStatus = fileSystem.getFileStatus(output); if (fileStatus.isDir()) { FileStatus[] listStatus = fileSystem.listStatus(output, new PathFilter() { @Override public boolean accept(Path path) { return !path.getName().startsWith("_"); } }); for (FileStatus fs : listStatus) { walkOutput(fs.getPath(), conf, resultReader); } } else { Reader reader = new SequenceFile.Reader(fileSystem, output, conf); Text rowId = new Text(); TableBlurRecord tableBlurRecord = new TableBlurRecord(); while (reader.next(rowId, tableBlurRecord)) { resultReader.read(rowId, tableBlurRecord); } reader.close(); } }
Example #17
Source File: TestCopyListing.java From hadoop with Apache License 2.0 | 6 votes |
@Test public void testFailOnCloseError() throws IOException { File inFile = File.createTempFile("TestCopyListingIn", null); inFile.deleteOnExit(); File outFile = File.createTempFile("TestCopyListingOut", null); outFile.deleteOnExit(); List<Path> srcs = new ArrayList<Path>(); srcs.add(new Path(inFile.toURI())); Exception expectedEx = new IOException("boom"); SequenceFile.Writer writer = mock(SequenceFile.Writer.class); doThrow(expectedEx).when(writer).close(); SimpleCopyListing listing = new SimpleCopyListing(getConf(), CREDENTIALS); DistCpOptions options = new DistCpOptions(srcs, new Path(outFile.toURI())); Exception actualEx = null; try { listing.doBuildListing(writer, options); } catch (Exception e) { actualEx = e; } Assert.assertNotNull("close writer didn't fail", actualEx); Assert.assertEquals(expectedEx, actualEx); }
Example #18
Source File: HadoopSequenceFileFormatter.java From CloverETL-Engine with GNU Lesser General Public License v2.1 | 5 votes |
@Override public void setDataTarget(Object outputDataTarget) throws IOException { if (outputDataTarget instanceof SequenceFile.Writer) { writer = (SequenceFile.Writer) outputDataTarget; return; } if (outputDataTarget instanceof URI) { URI targetURI = (URI) outputDataTarget; targetURI = HadoopSequenceFileParser.sandboxToFileURI(targetURI); ClassLoader formerContextClassloader = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); try { if (fs == null) { fs = HadoopSequenceFileParser.getFileSystem(targetURI, graph, user, config, this); } writer = SequenceFile.createWriter(fs, config, new Path(targetURI.getPath()), // Path to new file on fileSystem keyCopy.getValueClass(), // Key Data Type valCopy.getValueClass(), // Value Data Type SequenceFile.CompressionType.NONE); } catch (ComponentNotReadyException e) { throw new IOException("Failed to create Hadoop sequence file writer", e); } finally { Thread.currentThread().setContextClassLoader(formerContextClassloader); } } else { throw new IOException("Unsupported data target type: " + outputDataTarget.getClass().getName()); } }
Example #19
Source File: SeqFileReader.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
public static Object getFirstValue(String filename) throws IOException { Reader r = null; try { // read from local filesystem Configuration conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); } FileSystem fs = FileSystem.get(conf); r = new SequenceFile.Reader(fs, new Path(filename), conf); Object key = ReflectionUtils.newInstance(r.getKeyClass(), conf); Object val = ReflectionUtils.newInstance(r.getValueClass(), conf); LOG.info("Reading value of type " + r.getValueClassName() + " from SequenceFile " + filename); r.next(key); r.getCurrentValue(val); LOG.info("Value as string: " + val.toString()); return val; } finally { if (null != r) { try { r.close(); } catch (IOException ioe) { LOG.warn("IOException during close: " + ioe.toString()); } } } }
Example #20
Source File: DistBlockIntegrityMonitor.java From RDFS with Apache License 2.0 | 5 votes |
/** * determines which files have failed for a given job */ private HashMap<String, String> getFailedFiles(Job job) throws IOException { HashMap<String, String> failedFiles = new HashMap<String, String>(); Path outDir = SequenceFileOutputFormat.getOutputPath(job); FileSystem fs = outDir.getFileSystem(getConf()); if (!fs.getFileStatus(outDir).isDir()) { throw new IOException(outDir.toString() + " is not a directory"); } FileStatus[] files = fs.listStatus(outDir); for (FileStatus f: files) { Path fPath = f.getPath(); if ((!f.isDir()) && (fPath.getName().startsWith(PART_PREFIX))) { LOG.info("opening " + fPath.toString()); SequenceFile.Reader reader = new SequenceFile.Reader(fs, fPath, getConf()); Text key = new Text(); Text value = new Text(); while (reader.next(key, value)) { LOG.info("key: " + key.toString() + " , value: " + value.toString()); failedFiles.put(key.toString(), value.toString()); } reader.close(); } } return failedFiles; }
Example #21
Source File: SequenceFileOutputFormat.java From hadoop with Apache License 2.0 | 5 votes |
/** Open the output generated by this format. */ public static SequenceFile.Reader[] getReaders(Configuration conf, Path dir) throws IOException { FileSystem fs = dir.getFileSystem(conf); Path[] names = FileUtil.stat2Paths(fs.listStatus(dir)); // sort names, so that hash partitioning works Arrays.sort(names); SequenceFile.Reader[] parts = new SequenceFile.Reader[names.length]; for (int i = 0; i < names.length; i++) { parts[i] = new SequenceFile.Reader(fs, names[i], conf); } return parts; }
Example #22
Source File: TestDataJoin.java From hadoop-gpu with Apache License 2.0 | 5 votes |
private static Path[] writeSimpleSrc(Path testdir, JobConf conf, int srcs) throws IOException { SequenceFile.Writer out[] = null; Path[] src = new Path[srcs]; try { out = createWriters(testdir, conf, srcs, src); final int capacity = srcs * 2 + 1; Text key = new Text(); key.set("ignored"); Text val = new Text(); for (int k = 0; k < capacity; ++k) { for (int i = 0; i < srcs; ++i) { val.set(Integer.toString(k % srcs == 0 ? k * srcs : k * srcs + i) + "\t" + Integer.toString(10 * k + i)); out[i].append(key, val); if (i == k) { // add duplicate key out[i].append(key, val); } } } } finally { if (out != null) { for (int i = 0; i < srcs; ++i) { if (out[i] != null) out[i].close(); } } } return src; }
Example #23
Source File: MapTask.java From big-c with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private void writeSkippedRec(K key, V value) throws IOException{ if(skipWriter==null) { Path skipDir = SkipBadRecords.getSkipOutputPath(conf); Path skipFile = new Path(skipDir, getTaskID().toString()); skipWriter = SequenceFile.createWriter( skipFile.getFileSystem(conf), conf, skipFile, (Class<K>) createKey().getClass(), (Class<V>) createValue().getClass(), CompressionType.BLOCK, getTaskReporter()); } skipWriter.append(key, value); }
Example #24
Source File: SimpleCopyListing.java From big-c with Apache License 2.0 | 5 votes |
private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException { FileSystem fs = pathToListFile.getFileSystem(getConf()); if (fs.exists(pathToListFile)) { fs.delete(pathToListFile, false); } return SequenceFile.createWriter(getConf(), SequenceFile.Writer.file(pathToListFile), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(CopyListingFileStatus.class), SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE)); }
Example #25
Source File: HdfsExtractor.java From sqoop-on-spark with Apache License 2.0 | 5 votes |
/** * Returns true if given file is sequence * @param file * @return boolean */ @SuppressWarnings("deprecation") private boolean isSequenceFile(Path file) { SequenceFile.Reader filereader = null; try { filereader = new SequenceFile.Reader(file.getFileSystem(conf), file, conf); filereader.close(); } catch (IOException e) { return false; } return true; }
Example #26
Source File: CubeStatsWriter.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private static void writeCuboidStatisticsInner(Configuration conf, Path outputFilePath, // Map<Long, HLLCounter> cuboidHLLMap, int samplingPercentage, int mapperNumber, double mapperOverlapRatio, long sourceRecordCount) throws IOException { List<Long> allCuboids = Lists.newArrayList(); allCuboids.addAll(cuboidHLLMap.keySet()); Collections.sort(allCuboids); ByteBuffer valueBuf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE); SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(outputFilePath), SequenceFile.Writer.keyClass(LongWritable.class), SequenceFile.Writer.valueClass(BytesWritable.class)); try { // mapper overlap ratio at key -1 writer.append(new LongWritable(-1), new BytesWritable(Bytes.toBytes(mapperOverlapRatio))); // mapper number at key -2 writer.append(new LongWritable(-2), new BytesWritable(Bytes.toBytes(mapperNumber))); // sampling percentage at key 0 writer.append(new LongWritable(0L), new BytesWritable(Bytes.toBytes(samplingPercentage))); // flat table source_count at key -3 writer.append(new LongWritable(-3), new BytesWritable(Bytes.toBytes(sourceRecordCount))); for (long i : allCuboids) { valueBuf.clear(); cuboidHLLMap.get(i).writeRegisters(valueBuf); valueBuf.flip(); writer.append(new LongWritable(i), new BytesWritable(valueBuf.array(), valueBuf.limit())); } } finally { IOUtils.closeQuietly(writer); } }
Example #27
Source File: TestMapRed.java From hadoop with Apache License 2.0 | 5 votes |
private static void printSequenceFile(FileSystem fs, Path p, Configuration conf) throws IOException { SequenceFile.Reader r = new SequenceFile.Reader(fs, p, conf); Object key = null; Object value = null; while ((key = r.next(key)) != null) { value = r.getCurrentValue(value); System.out.println(" Row: " + key + ", " + value); } r.close(); }
Example #28
Source File: HdfsMrsPyramidOutputFormatTest.java From mrgeo with Apache License 2.0 | 5 votes |
@Before public void setup() throws Exception { defaultCodec = DefaultCodec.class.newInstance(); defaultCompressionType = SequenceFile.CompressionType.BLOCK; mockWriter = mock(MapFile.Writer.class); outputPath = new Path(outputPathString); }
Example #29
Source File: HadoopUtil.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@SuppressWarnings("deprecation") public static List<String> readDistinctColumnValues(Configuration conf, String inputPath) throws IOException { try (SequenceFile.Reader reader = new SequenceFile.Reader(HadoopUtil.getWorkingFileSystem(conf), new Path(inputPath), conf)) { List<String> values = Lists.newArrayList(); NullWritable key = (NullWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { values.add(value.toString()); } return values; } }
Example #30
Source File: HdfsSequenceWriter.java From sqoop-on-spark with Apache License 2.0 | 5 votes |
@SuppressWarnings("deprecation") public void initialize(Path filepath, Configuration conf, CompressionCodec codec) throws IOException { if (codec != null) { filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf), conf, filepath, Text.class, NullWritable.class, SequenceFile.CompressionType.BLOCK, codec); } else { filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf), conf, filepath, Text.class, NullWritable.class, SequenceFile.CompressionType.NONE); } text = new Text(); }