org.apache.hadoop.mapred.SplitLocationInfo Java Examples

The following examples show how to use org.apache.hadoop.mapred.SplitLocationInfo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FileSplit.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/** Constructs a split with host and cached-blocks information
 *
 * @param file the file name
 * @param start the position of the first byte in the file to process
 * @param length the number of bytes in the file to process
 * @param hosts the list of hosts containing the block
 * @param inMemoryHosts the list of hosts containing the block in memory
 */
public FileSplit(Path file, long start, long length, String[] hosts,
    String[] inMemoryHosts) {
  this(file, start, length, hosts);
  hostInfos = new SplitLocationInfo[hosts.length];
  for (int i = 0; i < hosts.length; i++) {
    // because N will be tiny, scanning is probably faster than a HashSet
    boolean inMemory = false;
    for (String inMemoryHost : inMemoryHosts) {
      if (inMemoryHost.equals(hosts[i])) {
        inMemory = true;
        break;
      }
    }
    hostInfos[i] = new SplitLocationInfo(hosts[i], inMemory);
  }
}
 
Example #2
Source File: TestFileInputFormat.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testSplitLocationInfo() throws Exception {
  Configuration conf = getConfiguration();
  conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
      "test:///a1/a2");
  Job job = Job.getInstance(conf);
  TextInputFormat fileInputFormat = new TextInputFormat();
  List<InputSplit> splits = fileInputFormat.getSplits(job);
  String[] locations = splits.get(0).getLocations();
  Assert.assertEquals(2, locations.length);
  SplitLocationInfo[] locationInfo = splits.get(0).getLocationInfo();
  Assert.assertEquals(2, locationInfo.length);
  SplitLocationInfo localhostInfo = locations[0].equals("localhost") ?
      locationInfo[0] : locationInfo[1];
  SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ?
      locationInfo[0] : locationInfo[1];
  Assert.assertTrue(localhostInfo.isOnDisk());
  Assert.assertTrue(localhostInfo.isInMemory());
  Assert.assertTrue(otherhostInfo.isOnDisk());
  Assert.assertFalse(otherhostInfo.isInMemory());
}
 
Example #3
Source File: FileSplit.java    From big-c with Apache License 2.0 6 votes vote down vote up
/** Constructs a split with host and cached-blocks information
 *
 * @param file the file name
 * @param start the position of the first byte in the file to process
 * @param length the number of bytes in the file to process
 * @param hosts the list of hosts containing the block
 * @param inMemoryHosts the list of hosts containing the block in memory
 */
public FileSplit(Path file, long start, long length, String[] hosts,
    String[] inMemoryHosts) {
  this(file, start, length, hosts);
  hostInfos = new SplitLocationInfo[hosts.length];
  for (int i = 0; i < hosts.length; i++) {
    // because N will be tiny, scanning is probably faster than a HashSet
    boolean inMemory = false;
    for (String inMemoryHost : inMemoryHosts) {
      if (inMemoryHost.equals(hosts[i])) {
        inMemory = true;
        break;
      }
    }
    hostInfos[i] = new SplitLocationInfo(hosts[i], inMemory);
  }
}
 
Example #4
Source File: TestFileInputFormat.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testSplitLocationInfo() throws Exception {
  Configuration conf = getConfiguration();
  conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
      "test:///a1/a2");
  Job job = Job.getInstance(conf);
  TextInputFormat fileInputFormat = new TextInputFormat();
  List<InputSplit> splits = fileInputFormat.getSplits(job);
  String[] locations = splits.get(0).getLocations();
  Assert.assertEquals(2, locations.length);
  SplitLocationInfo[] locationInfo = splits.get(0).getLocationInfo();
  Assert.assertEquals(2, locationInfo.length);
  SplitLocationInfo localhostInfo = locations[0].equals("localhost") ?
      locationInfo[0] : locationInfo[1];
  SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ?
      locationInfo[0] : locationInfo[1];
  Assert.assertTrue(localhostInfo.isOnDisk());
  Assert.assertTrue(localhostInfo.isInMemory());
  Assert.assertTrue(otherhostInfo.isOnDisk());
  Assert.assertFalse(otherhostInfo.isInMemory());
}
 
Example #5
Source File: HDFSFunctions.java    From vxquery with Apache License 2.0 6 votes vote down vote up
public HashMap<String, ArrayList<Integer>> getLocationsOfSplits() throws IOException {
    HashMap<String, ArrayList<Integer>> splitsMap = new HashMap<>();
    ArrayList<Integer> temp;
    int i = 0;
    String hostname;
    for (InputSplit s : this.splits) {
        SplitLocationInfo[] info = s.getLocationInfo();
        hostname = info[0].getLocation();
        if (splitsMap.containsKey(hostname)) {
            temp = splitsMap.get(hostname);
            temp.add(i);
        } else {
            temp = new ArrayList<>();
            temp.add(i);
            splitsMap.put(hostname, temp);
        }
        i++;
    }

    return splitsMap;
}
 
Example #6
Source File: FileSplit.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
@Evolving
public SplitLocationInfo[] getLocationInfo() throws IOException {
  return hostInfos;
}
 
Example #7
Source File: FileSplit.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
@Evolving
public SplitLocationInfo[] getLocationInfo() throws IOException {
  return hostInfos;
}
 
Example #8
Source File: InputSplit.java    From hadoop with Apache License 2.0 2 votes vote down vote up
/**
 * Gets info about which nodes the input split is stored on and how it is
 * stored at each location.
 * 
 * @return list of <code>SplitLocationInfo</code>s describing how the split
 *    data is stored at each location. A null value indicates that all the
 *    locations have the data stored on disk.
 * @throws IOException
 */
@Evolving
public SplitLocationInfo[] getLocationInfo() throws IOException {
  return null;
}
 
Example #9
Source File: InputSplit.java    From big-c with Apache License 2.0 2 votes vote down vote up
/**
 * Gets info about which nodes the input split is stored on and how it is
 * stored at each location.
 * 
 * @return list of <code>SplitLocationInfo</code>s describing how the split
 *    data is stored at each location. A null value indicates that all the
 *    locations have the data stored on disk.
 * @throws IOException
 */
@Evolving
public SplitLocationInfo[] getLocationInfo() throws IOException {
  return null;
}