Java Code Examples for org.apache.hadoop.mapreduce.InputSplit#getLocations()

The following examples show how to use org.apache.hadoop.mapreduce.InputSplit#getLocations() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TabletSplitSplit.java From datawave with Apache License 2.0

5 votes

/**
 * Collect a set of hosts from all child InputSplits.
 * 
 * @throws InterruptedException
 */
public String[] getLocations() throws IOException, InterruptedException {
    HashSet<String> hosts = new HashSet<>();
    for (InputSplit s : splits) {
        String[] hints = s.getLocations();
        if (hints != null && hints.length > 0) {
            Collections.addAll(hosts, hints);
        }
    }
    return hosts.toArray(new String[hosts.size()]);
}

Example 2

Source File: RedisHashRecordReader.java From Redis-4.x-Cookbook with MIT License

5 votes

public void initialize(InputSplit split, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    host = split.getLocations()[0];
    prefix = ((RedisHashInputSplit) split).getPrefix();
    key = ((RedisHashInputSplit) split).getKey();
    String hashKey = prefix+":"+key;

    jedis = new Jedis(host);
    log.info("Connect to " + host);
    jedis.connect();
    jedis.getClient().setTimeoutInfinite();

    totalKVs = jedis.hlen(hashKey);
    keyValueMapIter = jedis.hgetAll(hashKey).entrySet().iterator();
}

Example 3

Source File: CompositeInputSplit.java From hadoop with Apache License 2.0

5 votes

/**
 * Collect a set of hosts from all child InputSplits.
 */
public String[] getLocations() throws IOException, InterruptedException {
  HashSet<String> hosts = new HashSet<String>();
  for (InputSplit s : splits) {
    String[] hints = s.getLocations();
    if (hints != null && hints.length > 0) {
      for (String host : hints) {
        hosts.add(host);
      }
    }
  }
  return hosts.toArray(new String[hosts.size()]);
}

Example 4

Source File: JobSplit.java From hadoop with Apache License 2.0

5 votes

public SplitMetaInfo(InputSplit split, long startOffset) throws IOException {
  try {
    this.locations = split.getLocations();
    this.inputDataLength = split.getLength();
    this.startOffset = startOffset;
  } catch (InterruptedException ie) {
    throw new IOException(ie);
  }
}

Example 5

Source File: CombineDocumentSplit.java From marklogic-contentpump with Apache License 2.0

5 votes

public CombineDocumentSplit(List<FileSplit> splits) 
throws IOException, InterruptedException {
    this.splits = splits;
    locations = new HashSet<String>();
    for (InputSplit split : splits) {
        length += split.getLength();
        for (String loc : split.getLocations()) {
            if (!locations.contains(loc)) {
                locations.add(loc);
            }
        }
    }
}

Example 6

Source File: CompositeInputSplit.java From big-c with Apache License 2.0

5 votes

/**
 * Collect a set of hosts from all child InputSplits.
 */
public String[] getLocations() throws IOException, InterruptedException {
  HashSet<String> hosts = new HashSet<String>();
  for (InputSplit s : splits) {
    String[] hints = s.getLocations();
    if (hints != null && hints.length > 0) {
      for (String host : hints) {
        hosts.add(host);
      }
    }
  }
  return hosts.toArray(new String[hosts.size()]);
}

Example 7

Source File: JobSplit.java From big-c with Apache License 2.0

5 votes

public SplitMetaInfo(InputSplit split, long startOffset) throws IOException {
  try {
    this.locations = split.getLocations();
    this.inputDataLength = split.getLength();
    this.startOffset = startOffset;
  } catch (InterruptedException ie) {
    throw new IOException(ie);
  }
}

Example 8

Source File: PigSplit.java From spork with Apache License 2.0

5 votes

@Override
@SuppressWarnings("unchecked")
public String[] getLocations() throws IOException, InterruptedException {
    if (locations == null) {
        HashMap<String, Long> locMap = new HashMap<String, Long>();
        Long lenInMap;
        for (InputSplit split : wrappedSplits)
        {
            String[] locs = split.getLocations();
            for (String loc : locs)
            {
                if ((lenInMap = locMap.get(loc)) == null)
                    locMap.put(loc, split.getLength());
                else
                    locMap.put(loc, lenInMap + split.getLength());
            }
        }
        Set<Map.Entry<String, Long>> entrySet = locMap.entrySet();
        Map.Entry<String, Long>[] hostSize =
            entrySet.toArray(new Map.Entry[entrySet.size()]);
        Arrays.sort(hostSize, new Comparator<Map.Entry<String, Long>>() {

          @Override
          public int compare(Entry<String, Long> o1, Entry<String, Long> o2) {
            long diff = o1.getValue() - o2.getValue();
            if (diff < 0) return 1;
            if (diff > 0) return -1;
            return 0;
          }
        });
        // maximum 5 locations are in list: refer to PIG-1648 for more details
        int nHost = Math.min(hostSize.length, 5);
        locations = new String[nHost];
        for (int i = 0; i < nHost; ++i) {
          locations[i] = hostSize[i].getKey();
        }
    }
    return locations;
}

Example 9

Source File: TestCombineFileInputFormat.java From hadoop with Apache License 2.0

4 votes

@Test
public void testNodeDistribution() throws IOException, InterruptedException {
  DummyInputFormat inFormat = new DummyInputFormat();
  int numBlocks = 60;
  long totLength = 0;
  long blockSize = 100;
  int numNodes = 10;

  long minSizeNode = 50;
  long minSizeRack = 50;
  int maxSplitSize = 200; // 4 blocks per split.

  String[] locations = new String[numNodes];
  for (int i = 0; i < numNodes; i++) {
    locations[i] = "h" + i;
  }
  String[] racks = new String[0];
  Path path = new Path("hdfs://file");

  OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];

  int hostCountBase = 0;
  // Generate block list. Replication 3 per block.
  for (int i = 0; i < numBlocks; i++) {
    int localHostCount = hostCountBase;
    String[] blockHosts = new String[3];
    for (int j = 0; j < 3; j++) {
      int hostNum = localHostCount % numNodes;
      blockHosts[j] = "h" + hostNum;
      localHostCount++;
    }
    hostCountBase++;
    blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, blockHosts,
        racks);
    totLength += blockSize;
  }

  List<InputSplit> splits = new ArrayList<InputSplit>();
  HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>();
  HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>();
  HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>();
  Map<String, Set<OneBlockInfo>> nodeToBlocks = new TreeMap<String, Set<OneBlockInfo>>();

  OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes,
      nodeToBlocks, rackToNodes);
  
  inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength,
      maxSplitSize, minSizeNode, minSizeRack, splits);

  int expectedSplitCount = (int) (totLength / maxSplitSize);
  assertEquals(expectedSplitCount, splits.size());

  // Ensure 90+% of the splits have node local blocks.
  // 100% locality may not always be achieved.
  int numLocalSplits = 0;
  for (InputSplit inputSplit : splits) {
    assertEquals(maxSplitSize, inputSplit.getLength());
    if (inputSplit.getLocations().length == 1) {
      numLocalSplits++;
    }
  }
  assertTrue(numLocalSplits >= 0.9 * splits.size());
}

Example 10

Source File: JobSplit.java From hadoop with Apache License 2.0

4 votes

public TaskSplitMetaInfo(InputSplit split, long startOffset) 
throws InterruptedException, IOException {
  this(new TaskSplitIndex("", startOffset), split.getLocations(), 
      split.getLength());
}

Example 11

Source File: TestCombineFileInputFormat.java From big-c with Apache License 2.0

4 votes

@Test
public void testNodeDistribution() throws IOException, InterruptedException {
  DummyInputFormat inFormat = new DummyInputFormat();
  int numBlocks = 60;
  long totLength = 0;
  long blockSize = 100;
  int numNodes = 10;

  long minSizeNode = 50;
  long minSizeRack = 50;
  int maxSplitSize = 200; // 4 blocks per split.

  String[] locations = new String[numNodes];
  for (int i = 0; i < numNodes; i++) {
    locations[i] = "h" + i;
  }
  String[] racks = new String[0];
  Path path = new Path("hdfs://file");

  OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];

  int hostCountBase = 0;
  // Generate block list. Replication 3 per block.
  for (int i = 0; i < numBlocks; i++) {
    int localHostCount = hostCountBase;
    String[] blockHosts = new String[3];
    for (int j = 0; j < 3; j++) {
      int hostNum = localHostCount % numNodes;
      blockHosts[j] = "h" + hostNum;
      localHostCount++;
    }
    hostCountBase++;
    blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, blockHosts,
        racks);
    totLength += blockSize;
  }

  List<InputSplit> splits = new ArrayList<InputSplit>();
  HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>();
  HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>();
  HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>();
  Map<String, Set<OneBlockInfo>> nodeToBlocks = new TreeMap<String, Set<OneBlockInfo>>();

  OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes,
      nodeToBlocks, rackToNodes);
  
  inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength,
      maxSplitSize, minSizeNode, minSizeRack, splits);

  int expectedSplitCount = (int) (totLength / maxSplitSize);
  assertEquals(expectedSplitCount, splits.size());

  // Ensure 90+% of the splits have node local blocks.
  // 100% locality may not always be achieved.
  int numLocalSplits = 0;
  for (InputSplit inputSplit : splits) {
    assertEquals(maxSplitSize, inputSplit.getLength());
    if (inputSplit.getLocations().length == 1) {
      numLocalSplits++;
    }
  }
  assertTrue(numLocalSplits >= 0.9 * splits.size());
}

Example 12

Source File: JobSplit.java From big-c with Apache License 2.0

4 votes

public TaskSplitMetaInfo(InputSplit split, long startOffset) 
throws InterruptedException, IOException {
  this(new TaskSplitIndex("", startOffset), split.getLocations(), 
      split.getLength());
}

Example 13

Source File: CqlRecordReader.java From stratio-cassandra with Apache License 2.0

4 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException
{
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE)
                  ? (int) this.split.getLength()
                  : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try
    {
        if (cluster != null)
            return;

        // create a Cluster instance
        String[] locations = split.getLocations();
        cluster = CqlConfigHelper.getInputCluster(locations, conf);
    }
    catch (Exception e)
    {
        throw new RuntimeException(e);
    }

    if (cluster != null)
        session = cluster.connect(quote(keyspace));

    if (session == null)
      throw new RuntimeException("Can't create connection session");

    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) ||
                                             StringUtils.isNotEmpty(userDefinedWhereClauses)))
    {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery))
        cqlQuery = buildQuery();
    logger.debug("cqlQuery {}", cqlQuery);

    rowIterator = new RowIterator();
    logger.debug("created {}", rowIterator);
}