Java Code Examples for org.apache.hadoop.mapreduce.InputSplit#getLocations()

The following examples show how to use org.apache.hadoop.mapreduce.InputSplit#getLocations() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TabletSplitSplit.java    From datawave with Apache License 2.0 5 votes vote down vote up
/**
 * Collect a set of hosts from all child InputSplits.
 * 
 * @throws InterruptedException
 */
public String[] getLocations() throws IOException, InterruptedException {
    HashSet<String> hosts = new HashSet<>();
    for (InputSplit s : splits) {
        String[] hints = s.getLocations();
        if (hints != null && hints.length > 0) {
            Collections.addAll(hosts, hints);
        }
    }
    return hosts.toArray(new String[hosts.size()]);
}
 
Example 2
Source File: RedisHashRecordReader.java    From Redis-4.x-Cookbook with MIT License 5 votes vote down vote up
public void initialize(InputSplit split, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    host = split.getLocations()[0];
    prefix = ((RedisHashInputSplit) split).getPrefix();
    key = ((RedisHashInputSplit) split).getKey();
    String hashKey = prefix+":"+key;

    jedis = new Jedis(host);
    log.info("Connect to " + host);
    jedis.connect();
    jedis.getClient().setTimeoutInfinite();

    totalKVs = jedis.hlen(hashKey);
    keyValueMapIter = jedis.hgetAll(hashKey).entrySet().iterator();
}
 
Example 3
Source File: CompositeInputSplit.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Collect a set of hosts from all child InputSplits.
 */
public String[] getLocations() throws IOException, InterruptedException {
  HashSet<String> hosts = new HashSet<String>();
  for (InputSplit s : splits) {
    String[] hints = s.getLocations();
    if (hints != null && hints.length > 0) {
      for (String host : hints) {
        hosts.add(host);
      }
    }
  }
  return hosts.toArray(new String[hosts.size()]);
}
 
Example 4
Source File: JobSplit.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public SplitMetaInfo(InputSplit split, long startOffset) throws IOException {
  try {
    this.locations = split.getLocations();
    this.inputDataLength = split.getLength();
    this.startOffset = startOffset;
  } catch (InterruptedException ie) {
    throw new IOException(ie);
  }
}
 
Example 5
Source File: CombineDocumentSplit.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
public CombineDocumentSplit(List<FileSplit> splits) 
throws IOException, InterruptedException {
    this.splits = splits;
    locations = new HashSet<String>();
    for (InputSplit split : splits) {
        length += split.getLength();
        for (String loc : split.getLocations()) {
            if (!locations.contains(loc)) {
                locations.add(loc);
            }
        }
    }
}
 
Example 6
Source File: CompositeInputSplit.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Collect a set of hosts from all child InputSplits.
 */
public String[] getLocations() throws IOException, InterruptedException {
  HashSet<String> hosts = new HashSet<String>();
  for (InputSplit s : splits) {
    String[] hints = s.getLocations();
    if (hints != null && hints.length > 0) {
      for (String host : hints) {
        hosts.add(host);
      }
    }
  }
  return hosts.toArray(new String[hosts.size()]);
}
 
Example 7
Source File: JobSplit.java    From big-c with Apache License 2.0 5 votes vote down vote up
public SplitMetaInfo(InputSplit split, long startOffset) throws IOException {
  try {
    this.locations = split.getLocations();
    this.inputDataLength = split.getLength();
    this.startOffset = startOffset;
  } catch (InterruptedException ie) {
    throw new IOException(ie);
  }
}
 
Example 8
Source File: PigSplit.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public String[] getLocations() throws IOException, InterruptedException {
    if (locations == null) {
        HashMap<String, Long> locMap = new HashMap<String, Long>();
        Long lenInMap;
        for (InputSplit split : wrappedSplits)
        {
            String[] locs = split.getLocations();
            for (String loc : locs)
            {
                if ((lenInMap = locMap.get(loc)) == null)
                    locMap.put(loc, split.getLength());
                else
                    locMap.put(loc, lenInMap + split.getLength());
            }
        }
        Set<Map.Entry<String, Long>> entrySet = locMap.entrySet();
        Map.Entry<String, Long>[] hostSize =
            entrySet.toArray(new Map.Entry[entrySet.size()]);
        Arrays.sort(hostSize, new Comparator<Map.Entry<String, Long>>() {

          @Override
          public int compare(Entry<String, Long> o1, Entry<String, Long> o2) {
            long diff = o1.getValue() - o2.getValue();
            if (diff < 0) return 1;
            if (diff > 0) return -1;
            return 0;
          }
        });
        // maximum 5 locations are in list: refer to PIG-1648 for more details
        int nHost = Math.min(hostSize.length, 5);
        locations = new String[nHost];
        for (int i = 0; i < nHost; ++i) {
          locations[i] = hostSize[i].getKey();
        }
    }
    return locations;
}
 
Example 9
Source File: TestCombineFileInputFormat.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testNodeDistribution() throws IOException, InterruptedException {
  DummyInputFormat inFormat = new DummyInputFormat();
  int numBlocks = 60;
  long totLength = 0;
  long blockSize = 100;
  int numNodes = 10;

  long minSizeNode = 50;
  long minSizeRack = 50;
  int maxSplitSize = 200; // 4 blocks per split.

  String[] locations = new String[numNodes];
  for (int i = 0; i < numNodes; i++) {
    locations[i] = "h" + i;
  }
  String[] racks = new String[0];
  Path path = new Path("hdfs://file");

  OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];

  int hostCountBase = 0;
  // Generate block list. Replication 3 per block.
  for (int i = 0; i < numBlocks; i++) {
    int localHostCount = hostCountBase;
    String[] blockHosts = new String[3];
    for (int j = 0; j < 3; j++) {
      int hostNum = localHostCount % numNodes;
      blockHosts[j] = "h" + hostNum;
      localHostCount++;
    }
    hostCountBase++;
    blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, blockHosts,
        racks);
    totLength += blockSize;
  }

  List<InputSplit> splits = new ArrayList<InputSplit>();
  HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>();
  HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>();
  HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>();
  Map<String, Set<OneBlockInfo>> nodeToBlocks = new TreeMap<String, Set<OneBlockInfo>>();

  OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes,
      nodeToBlocks, rackToNodes);
  
  inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength,
      maxSplitSize, minSizeNode, minSizeRack, splits);

  int expectedSplitCount = (int) (totLength / maxSplitSize);
  assertEquals(expectedSplitCount, splits.size());

  // Ensure 90+% of the splits have node local blocks.
  // 100% locality may not always be achieved.
  int numLocalSplits = 0;
  for (InputSplit inputSplit : splits) {
    assertEquals(maxSplitSize, inputSplit.getLength());
    if (inputSplit.getLocations().length == 1) {
      numLocalSplits++;
    }
  }
  assertTrue(numLocalSplits >= 0.9 * splits.size());
}
 
Example 10
Source File: JobSplit.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public TaskSplitMetaInfo(InputSplit split, long startOffset) 
throws InterruptedException, IOException {
  this(new TaskSplitIndex("", startOffset), split.getLocations(), 
      split.getLength());
}
 
Example 11
Source File: TestCombineFileInputFormat.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testNodeDistribution() throws IOException, InterruptedException {
  DummyInputFormat inFormat = new DummyInputFormat();
  int numBlocks = 60;
  long totLength = 0;
  long blockSize = 100;
  int numNodes = 10;

  long minSizeNode = 50;
  long minSizeRack = 50;
  int maxSplitSize = 200; // 4 blocks per split.

  String[] locations = new String[numNodes];
  for (int i = 0; i < numNodes; i++) {
    locations[i] = "h" + i;
  }
  String[] racks = new String[0];
  Path path = new Path("hdfs://file");

  OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];

  int hostCountBase = 0;
  // Generate block list. Replication 3 per block.
  for (int i = 0; i < numBlocks; i++) {
    int localHostCount = hostCountBase;
    String[] blockHosts = new String[3];
    for (int j = 0; j < 3; j++) {
      int hostNum = localHostCount % numNodes;
      blockHosts[j] = "h" + hostNum;
      localHostCount++;
    }
    hostCountBase++;
    blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, blockHosts,
        racks);
    totLength += blockSize;
  }

  List<InputSplit> splits = new ArrayList<InputSplit>();
  HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>();
  HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>();
  HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>();
  Map<String, Set<OneBlockInfo>> nodeToBlocks = new TreeMap<String, Set<OneBlockInfo>>();

  OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes,
      nodeToBlocks, rackToNodes);
  
  inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength,
      maxSplitSize, minSizeNode, minSizeRack, splits);

  int expectedSplitCount = (int) (totLength / maxSplitSize);
  assertEquals(expectedSplitCount, splits.size());

  // Ensure 90+% of the splits have node local blocks.
  // 100% locality may not always be achieved.
  int numLocalSplits = 0;
  for (InputSplit inputSplit : splits) {
    assertEquals(maxSplitSize, inputSplit.getLength());
    if (inputSplit.getLocations().length == 1) {
      numLocalSplits++;
    }
  }
  assertTrue(numLocalSplits >= 0.9 * splits.size());
}
 
Example 12
Source File: JobSplit.java    From big-c with Apache License 2.0 4 votes vote down vote up
public TaskSplitMetaInfo(InputSplit split, long startOffset) 
throws InterruptedException, IOException {
  this(new TaskSplitIndex("", startOffset), split.getLocations(), 
      split.getLength());
}
 
Example 13
Source File: CqlRecordReader.java    From stratio-cassandra with Apache License 2.0 4 votes vote down vote up
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException
{
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE)
                  ? (int) this.split.getLength()
                  : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try
    {
        if (cluster != null)
            return;

        // create a Cluster instance
        String[] locations = split.getLocations();
        cluster = CqlConfigHelper.getInputCluster(locations, conf);
    }
    catch (Exception e)
    {
        throw new RuntimeException(e);
    }

    if (cluster != null)
        session = cluster.connect(quote(keyspace));

    if (session == null)
      throw new RuntimeException("Can't create connection session");

    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) ||
                                             StringUtils.isNotEmpty(userDefinedWhereClauses)))
    {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery))
        cqlQuery = buildQuery();
    logger.debug("cqlQuery {}", cqlQuery);

    rowIterator = new RowIterator();
    logger.debug("created {}", rowIterator);
}