org.apache.hadoop.mapred.MultiFileSplit Java Examples
The following examples show how to use
org.apache.hadoop.mapred.MultiFileSplit.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WarcFileRecordReader.java From wikireverse with MIT License | 5 votes |
public WarcFileRecordReader(Configuration conf, InputSplit split) throws IOException { if (split instanceof FileSplit) { this.filePathList=new Path[1]; this.filePathList[0]=((FileSplit)split).getPath(); } else if (split instanceof MultiFileSplit) { this.filePathList=((MultiFileSplit)split).getPaths(); } else { throw new IOException("InputSplit is not a file split or a multi-file split - aborting"); } // Use FileSystem.get to open Common Crawl URIs using the S3 protocol. URI uri = filePathList[0].toUri(); this.fs = FileSystem.get(uri, conf); // get the total file sizes for (int i=0; i < filePathList.length; i++) { totalFileSize += fs.getFileStatus(filePathList[i]).getLen(); } Class<? extends CompressionCodec> codecClass=null; try { codecClass=conf.getClassByName("org.apache.hadoop.io.compress.GzipCodec").asSubclass(CompressionCodec.class); compressionCodec=(CompressionCodec)ReflectionUtils.newInstance(codecClass, conf); } catch (ClassNotFoundException cnfEx) { compressionCodec=null; LOG.info("!!! ClassNotFoun Exception thrown setting Gzip codec"); } openNextFile(); }
Example #2
Source File: MultiFileWordCount.java From RDFS with Apache License 2.0 | 5 votes |
public MultiFileLineRecordReader(Configuration conf, MultiFileSplit split) throws IOException { this.split = split; fs = FileSystem.get(conf); this.paths = split.getPaths(); this.totLength = split.getLength(); this.offset = 0; //open the first file Path file = paths[count]; currentStream = fs.open(file); currentReader = new BufferedReader(new InputStreamReader(currentStream)); }
Example #3
Source File: MultiFileWordCount.java From hadoop-book with Apache License 2.0 | 5 votes |
public MultiFileLineRecordReader(Configuration conf, MultiFileSplit split) throws IOException { this.split = split; fs = FileSystem.get(conf); this.paths = split.getPaths(); this.totLength = split.getLength(); this.offset = 0; //open the first file Path file = paths[count]; currentStream = fs.open(file); currentReader = new BufferedReader(new InputStreamReader(currentStream)); }
Example #4
Source File: MultiFileWordCount.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public MultiFileLineRecordReader(Configuration conf, MultiFileSplit split) throws IOException { this.split = split; fs = FileSystem.get(conf); this.paths = split.getPaths(); this.totLength = split.getLength(); this.offset = 0; //open the first file Path file = paths[count]; currentStream = fs.open(file); currentReader = new BufferedReader(new InputStreamReader(currentStream)); }
Example #5
Source File: MultiFileWordCount.java From RDFS with Apache License 2.0 | 4 votes |
@Override public RecordReader<WordOffset,Text> getRecordReader(InputSplit split , JobConf job, Reporter reporter) throws IOException { return new MultiFileLineRecordReader(job, (MultiFileSplit)split); }
Example #6
Source File: MultiFileWordCount.java From hadoop-book with Apache License 2.0 | 4 votes |
@Override public RecordReader<WordOffset, Text> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { return new MultiFileLineRecordReader(job, (MultiFileSplit) split); }
Example #7
Source File: MultiFileWordCount.java From hadoop-gpu with Apache License 2.0 | 4 votes |
@Override public RecordReader<WordOffset,Text> getRecordReader(InputSplit split , JobConf job, Reporter reporter) throws IOException { return new MultiFileLineRecordReader(job, (MultiFileSplit)split); }