Java Code Examples for org.apache.hadoop.io.MapFile#Reader

The following examples show how to use org.apache.hadoop.io.MapFile#Reader . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestCodec.java From hadoop with Apache License 2.0

6 votes

private void codecTestMapFile(Class<? extends CompressionCodec> clazz,
    CompressionType type, int records) throws Exception {
  
  FileSystem fs = FileSystem.get(conf);
  LOG.info("Creating MapFiles with " + records  + 
          " records using codec " + clazz.getSimpleName());
  Path path = new Path(new Path(
      System.getProperty("test.build.data", "/tmp")),
    clazz.getSimpleName() + "-" + type + "-" + records);

  LOG.info("Writing " + path);
  createMapFile(conf, fs, path, clazz.newInstance(), type, records);
  MapFile.Reader reader = new MapFile.Reader(path, conf);
  Text key1 = new Text("002");
  assertNotNull(reader.get(key1, new Text()));
  Text key2 = new Text("004");
  assertNotNull(reader.get(key2, new Text()));
}

Example 2

Source File: MapFileReader.java From deeplearning4j with Apache License 2.0

6 votes

public MapFileReader(List<String> paths, IndexToKey indexToKey, Class<? extends Writable> recordClass)
                throws IOException {

    this.indexToKey = indexToKey;
    this.recordClass = recordClass;
    this.readers = new MapFile.Reader[paths.size()];

    SequenceFile.Reader.Option[] opts = new SequenceFile.Reader.Option[0];

    Configuration config = new Configuration();
    for (int i = 0; i < paths.size(); i++) {
        readers[i] = new MapFile.Reader(new Path(paths.get(i)), config, opts);
        if (readers[i].getValueClass() != recordClass) {
            throw new UnsupportedOperationException("MapFile record class: " + readers[i].getValueClass()
                            + ", but got class " + recordClass + ", path = " + paths.get(i));
        }
    }

    recordIndexesEachReader = indexToKey.initialize(readers, recordClass);
}

Example 3

Source File: MapFileOutputFormat.java From RDFS with Apache License 2.0

6 votes

/** Open the output generated by this format. */
public static MapFile.Reader[] getReaders(FileSystem ignored, Path dir,
                                          Configuration conf)
  throws IOException {
  FileSystem fs = dir.getFileSystem(conf);
  Path[] names = FileUtil.stat2Paths(fs.listStatus(dir));

  // sort names, so that hash partitioning works
  Arrays.sort(names);
  
  MapFile.Reader[] parts = new MapFile.Reader[names.length];
  for (int i = 0; i < names.length; i++) {
    parts[i] = new MapFile.Reader(fs, names[i].toString(), conf);
  }
  return parts;
}

Example 4

Source File: MapFileOutputFormat.java From hadoop with Apache License 2.0

5 votes

/** Get an entry from output generated by this class. */
public static <K extends WritableComparable, V extends Writable>
Writable getEntry(MapFile.Reader[] readers,
                                Partitioner<K, V> partitioner,
                                K key,
                                V value) throws IOException {
  int part = partitioner.getPartition(key, value, readers.length);
  return readers[part].get(key, value);
}

Example 5

Source File: LinkDumper.java From anthelion with Apache License 2.0

5 votes

public static void main(String[] args)
  throws Exception {
  
  if (args == null || args.length < 2) {
    System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>");
    return;
  }

  // open the readers for the linkdump directory
  Configuration conf = NutchConfiguration.create();
  FileSystem fs = FileSystem.get(conf);
  Path webGraphDb = new Path(args[0]);
  String url = args[1];
  MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
    webGraphDb, DUMP_DIR), conf);

  // get the link nodes for the url
  Text key = new Text(url);
  LinkNodes nodes = new LinkNodes();
  MapFileOutputFormat.getEntry(readers,
    new HashPartitioner<Text, LinkNodes>(), key, nodes);

  // print out the link nodes
  LinkNode[] linkNodesAr = nodes.getLinks();
  System.out.println(url + ":");
  for (LinkNode node : linkNodesAr) {
    System.out.println("  " + node.getUrl() + " - "
      + node.getNode().toString());
  }

  // close the readers
  FSUtils.closeReaders(readers);
}

Example 6

Source File: MapFileOutputFormat.java From hadoop-gpu with Apache License 2.0

5 votes

/** Get an entry from output generated by this class. */
public static <K extends WritableComparable, V extends Writable>
Writable getEntry(MapFile.Reader[] readers,
                                Partitioner<K, V> partitioner,
                                K key,
                                V value) throws IOException {
  int part = partitioner.getPartition(key, value, readers.length);
  return readers[part].get(key, value);
}

Example 7

Source File: SegmentHandler.java From anthelion with Apache License 2.0

5 votes

/** Open the output generated by this format. */
private MapFile.Reader[] getReaders(String subDir) throws IOException {
  Path dir = new Path(segmentDir, subDir);
  FileSystem fs = dir.getFileSystem(conf);
  Path[] names = FileUtil.stat2Paths(fs.listStatus(dir, SegmentPathFilter.INSTANCE));

  // sort names, so that hash partitioning works
  Arrays.sort(names);
  
  MapFile.Reader[] parts = new MapFile.Reader[names.length];
  for (int i = 0; i < names.length; i++) {
    parts[i] = new MapFile.Reader(fs, names[i].toString(), conf);
  }
  return parts;
}

Example 8

Source File: TestSegmentMergerCrawlDatums.java From nutch-htmlunit with Apache License 2.0

5 votes

/**
 * Checks the merged segment and removes the stuff again.
 *
 * @param the test directory
 * @param the merged segment
 * @return the final status
 */
protected byte checkMergedSegment(Path testDir, Path mergedSegment) throws Exception  {
  // Get a MapFile reader for the <Text,CrawlDatum> pairs
  MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(mergedSegment, CrawlDatum.FETCH_DIR_NAME), conf);
  
  Text key = new Text();
  CrawlDatum value = new CrawlDatum();
  byte finalStatus = 0x0;
  
  for (MapFile.Reader reader : readers) {
    while (reader.next(key, value)) {
      LOG.info("Reading status for: " + key.toString() + " > " + CrawlDatum.getStatusName(value.getStatus()));
      
      // Only consider fetch status
      if (CrawlDatum.hasFetchStatus(value) && key.toString().equals("http://nutch.apache.org/")) {
        finalStatus = value.getStatus();
      }
    }
    
    // Close the reader again
    reader.close();
  }

  // Remove the test directory again
  fs.delete(testDir, true);
  
  LOG.info("Final fetch status for: http://nutch.apache.org/ > " + CrawlDatum.getStatusName(finalStatus));

  // Return the final status
  return finalStatus;
}

Example 9

Source File: MapFileOutputFormat.java From big-c with Apache License 2.0

5 votes

/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
    Writable getEntry(MapFile.Reader[] readers, 
    Partitioner<K, V> partitioner, K key, V value) throws IOException {
  int part = partitioner.getPartition(key, value, readers.length);
  return readers[part].get(key, value);
}

Example 10

Source File: MapFileOutputFormat.java From big-c with Apache License 2.0

5 votes

/** Get an entry from output generated by this class. */
public static <K extends WritableComparable, V extends Writable>
Writable getEntry(MapFile.Reader[] readers,
                                Partitioner<K, V> partitioner,
                                K key,
                                V value) throws IOException {
  int part = partitioner.getPartition(key, value, readers.length);
  return readers[part].get(key, value);
}

Example 11

Source File: FSUtils.java From nutch-htmlunit with Apache License 2.0

5 votes

/**
 * Closes a group of MapFile readers.
 * 
 * @param readers The MapFile readers to close.
 * @throws IOException If an error occurs while closing a reader.
 */
public static void closeReaders(MapFile.Reader[] readers)
  throws IOException {
  
  // loop through the readers closing one by one
  if (readers != null) {
    for (int i = 0; i < readers.length; i++) {
      MapFile.Reader reader = readers[i];
      if (reader != null) {
        reader.close();
      }
    }
  }
}

Example 12

Source File: LinkDumper.java From nutch-htmlunit with Apache License 2.0

5 votes

public static void main(String[] args)
  throws Exception {
  
  if (args == null || args.length < 2) {
    System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>");
    return;
  }

  // open the readers for the linkdump directory
  Configuration conf = NutchConfiguration.create();
  FileSystem fs = FileSystem.get(conf);
  Path webGraphDb = new Path(args[0]);
  String url = args[1];
  MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
    webGraphDb, DUMP_DIR), conf);

  // get the link nodes for the url
  Text key = new Text(url);
  LinkNodes nodes = new LinkNodes();
  MapFileOutputFormat.getEntry(readers,
    new HashPartitioner<Text, LinkNodes>(), key, nodes);

  // print out the link nodes
  LinkNode[] linkNodesAr = nodes.getLinks();
  System.out.println(url + ":");
  for (LinkNode node : linkNodesAr) {
    System.out.println("  " + node.getUrl() + " - "
      + node.getNode().toString());
  }

  // close the readers
  FSUtils.closeReaders(readers);
}

Example 13

Source File: TestSequenceFile.java From compiler with Apache License 2.0

5 votes

private static void closeMap(MapFile.Reader map) {
	if (map != null)
		try {
			map.close();
		} catch (final IOException e) {
			e.printStackTrace();
		}
	map = null;
}

Example 14

Source File: FileSplit.java From mrgeo with Apache License 2.0

4 votes

public void generateSplits(Path parent, Configuration conf) throws IOException
{
  List<FileSplitInfo> list = new ArrayList<>();

  // get a Hadoop file system handle
  FileSystem fs = getFileSystem(parent);

  // get the list of paths of the subdirectories of the parent
  Path[] paths = FileUtil.stat2Paths(fs.listStatus(parent));

  Arrays.sort(paths);

  int partition = 0;
  // look inside each subdirectory for a data dir and keep track
  for (Path p : paths)
  {
    Path mapfile = null;
    FileStatus[] dirFiles = fs.listStatus(p);
    for (FileStatus dirFile : dirFiles)
    {
      if (dirFile.getPath().getName().equals("data"))
      {
        mapfile = dirFile.getPath().getParent();
        break;
      }
    }

    if (mapfile != null)
    {
      RasterWritable val = new RasterWritable();
      MapFile.Reader reader = createMapFileReader(conf, mapfile);
      TileIdWritable firstKey = (TileIdWritable) reader.getClosest(new TileIdWritable(0), val);
      TileIdWritable lastKey = (TileIdWritable) reader.getClosest(new TileIdWritable(Long.MAX_VALUE), val, true);
      if (firstKey != null && lastKey != null)
      {
        list.add(new FileSplitInfo(firstKey.get(), lastKey.get(), mapfile.getName(), partition));
      }

      partition++;
    }
  }

  splits = list.toArray(new FileSplitInfo[list.size()]);
}

Example 15

Source File: FileSplitTest.java From mrgeo with Apache License 2.0

4 votes

@Test
@Category(UnitTest.class)
public void testGenerateSplitsFromPath() throws Exception
{
  // Setup a mock directory structure
  Path rootPath = new Path(FileSplitTest.class.getName() + "-testRootPath");
  Path path1 = new Path(rootPath, FileSplitTest.class.getName() + "-testPath1");
  Path path2 = new Path(rootPath, FileSplitTest.class.getName() + "-testPath2");
  Path path3 = new Path(rootPath, FileSplitTest.class.getName() + "-testPath3");
  Path path1_1 = new Path(path1, "notDataDir");
  Path path1_2 = new Path(path1, "data");
  Path path2_1 = new Path(path2, "data");
  Path path3_1 = new Path(path3, "notDataDir");

  // Setup the FileSystem
  FileSystem mockFS = new FileSystemBuilder()
      .fileStatus(rootPath, new FileStatusBuilder().path(path1).build())
      .fileStatus(rootPath, new FileStatusBuilder().path(path2).build())
      .fileStatus(rootPath, new FileStatusBuilder().path(path3).build())
      .fileStatus(path1, new FileStatusBuilder().path(path1_1).build())
      .fileStatus(path1, new FileStatusBuilder().path(path1_2).build())
      .fileStatus(path2, new FileStatusBuilder().path(path2_1).build())
      .fileStatus(path3, new FileStatusBuilder().path(path3_1).build())
      .build();

  // setup map file readers for each of the data directories
  RasterWritable mockValue = new RasterWritable();
  TileIdWritable[] path1Keys = {new TileIdWritable(2L), new TileIdWritable(4L), new TileIdWritable(6L)};
  RasterWritable[] path1Values = {mockValue, mockValue, mockValue};
  TileIdWritable[] path2Keys = {new TileIdWritable(5L), new TileIdWritable(6L), new TileIdWritable(7L)};
  RasterWritable[] path2Values = {mockValue, mockValue, mockValue};
  MapFile.Reader mockMapFileReaderPath1 = new MapFileReaderBuilder()
      .keyClass(TileIdWritable.class)
      .valueClass(RasterWritable.class)
      .keys(path1Keys)
      .values(path1Values)
      .build();

  MapFile.Reader mockMapFileReaderPath2 = new MapFileReaderBuilder()
      .keyClass(TileIdWritable.class)
      .valueClass(RasterWritable.class)
      .keys(path2Keys)
      .values(path2Values)
      .build();

  // Setup a Configuration
  Configuration mockConfiguration = new ConfigurationBuilder().build();


  FileSplit spySubject = new FileSplit();
  subject = spy(spySubject);
  doReturn(mockFS).when(subject).getFileSystem(rootPath);
  doReturn(mockMapFileReaderPath1).when(subject).createMapFileReader(mockConfiguration, path1);
  doReturn(mockMapFileReaderPath2).when(subject).createMapFileReader(mockConfiguration, path2);
  subject.generateSplits(rootPath, mockConfiguration);

  // Verify we got splits for path 1 and 2
  SplitInfo[] splits = subject.getSplits();
  Assert.assertEquals(2, splits.length);
  verifySplit(path1, path1Keys, splits, 0);
  verifySplit(path2, path2Keys, splits, 1);
}

Example 16

Source File: MapFileReader.java From DataVec with Apache License 2.0

4 votes

@Override
public void close() throws IOException {
    for (MapFile.Reader r : readers) {
        r.close();
    }
}

Example 17

Source File: LongIndexToKey.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public List<Pair<Long, Long>> initialize(MapFile.Reader[] readers, Class<? extends Writable> valueClass)
                throws IOException {

    List<Pair<Long, Long>> l = new ArrayList<>(readers.length);
    for (MapFile.Reader r : readers) {
        //Get the first and last keys:
        long first = -1;
        long last = -1;

        //First key: no method for this for some inexplicable reason :/
        LongWritable k = new LongWritable();
        Writable v = ReflectionUtils.newInstance(valueClass, null);
        boolean hasNext = r.next(k, v);
        if(!hasNext){
            //This map file is empty - no data
            l.add(new Pair<>(-1L, -1L));
            continue;
        }
        first = k.get();

        //Last key: easy
        r.reset();
        r.finalKey(k);
        last = k.get();

        l.add(new Pair<>(first, last));
    }

    //Check that things are actually contiguous:
    List<Pair<Long, Long>> sorted = new ArrayList<>(l.size());
    for(Pair<Long,Long> p : l){
        if(p.getLeft() >= 0){
            sorted.add(p);
        }
    }
    Collections.sort(sorted, new Comparator<Pair<Long, Long>>() {
        @Override
        public int compare(Pair<Long, Long> o1, Pair<Long, Long> o2) {
            return Long.compare(o1.getFirst(), o2.getFirst());
        }
    });

    if (sorted.size() == 0){
        throw new IllegalStateException("Map file is empty - no data available");
    }
    if (sorted.get(0).getFirst() != 0L) {
        throw new UnsupportedOperationException("Minimum key value is not 0: got " + sorted.get(0).getFirst());
    }

    for (int i = 0; i < sorted.size() - 1; i++) {
        long currLast = sorted.get(i).getSecond();
        long nextFirst = sorted.get(i + 1).getFirst();

        if(nextFirst == -1){
            //Skip empty map file
            continue;
        }

        if (currLast + 1 != nextFirst) {
            throw new IllegalStateException(
                            "Keys are not contiguous between readers: first/last indices (inclusive) " + "are "
                                            + sorted
                                            + ".\n LongIndexKey assumes unique and contiguous LongWritable keys");
        }
    }

    readerIndices = l;
    return readerIndices;
}

Example 18

Source File: SegmentHandler.java From nutch-htmlunit with Apache License 2.0

4 votes

private Writable getEntry(MapFile.Reader[] readers, Text url,
                          Writable entry) throws IOException {
  return MapFileOutputFormat.getEntry(readers, PARTITIONER, url, entry);
}

Example 19

Source File: FileSplit.java From mrgeo with Apache License 2.0

4 votes

protected MapFile.Reader createMapFileReader(Configuration conf, Path mapfile) throws IOException
{
  return new MapFile.Reader(mapfile, conf);
}

Example 20

Source File: SegmentHandler.java From anthelion with Apache License 2.0

4 votes

private void closeReaders(MapFile.Reader[] readers) throws IOException {
  for (int i = 0; i < readers.length; i++) {
    readers[i].close();
  }
}