Java Code Examples for org.apache.kafka.common.PartitionInfo#replicas()

The following examples show how to use org.apache.kafka.common.PartitionInfo#replicas() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TopicReplicationFactorAnomalyFinder.java    From cruise-control with BSD 2-Clause "Simplified" License 6 votes vote down vote up
@Override
public Set<TopicAnomaly> topicAnomalies() {
  LOG.info("Start to detect topic replication factor anomaly.");
  Cluster cluster = _kafkaCruiseControl.kafkaCluster();
  Set<String> topicsToCheck = new HashSet<>();
  for (String topic : cluster.topics()) {
    if (_topicExcludedFromCheck.matcher(topic).matches()) {
      continue;
    }
    for (PartitionInfo partition : cluster.partitionsForTopic(topic)) {
      if (partition.replicas().length != _targetReplicationFactor) {
        topicsToCheck.add(topic);
        break;
      }
    }
  }
  refreshTopicMinISRCache();
  if (!topicsToCheck.isEmpty()) {
    maybeRetrieveAndCacheTopicMinISR(topicsToCheck);
    Map<Short, Set<TopicReplicationFactorAnomalyEntry>> badTopicsByReplicationFactor = populateBadTopicsByReplicationFactor(topicsToCheck, cluster);
    if (!badTopicsByReplicationFactor.isEmpty()) {
      return Collections.singleton(createTopicReplicationFactorAnomaly(badTopicsByReplicationFactor, _targetReplicationFactor));
    }
  }
  return Collections.emptySet();
}
 
Example 2
Source File: TopicReplicationFactorAnomalyFinder.java    From cruise-control with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * Scan through topics to check whether the topic having partition(s) with bad replication factor. For each topic, the
 * target replication factor to check against is the maximum value of {@link #SELF_HEALING_TARGET_TOPIC_REPLICATION_FACTOR_CONFIG}
 * and topic's minISR plus value of {@link #TOPIC_REPLICATION_FACTOR_MARGIN_CONFIG}.
 *
 * @param topicsToCheck Set of topics to check.
 * @return Map of detected topic replication factor anomaly entries by target replication factor.
 */
private Map<Short, Set<TopicReplicationFactorAnomalyEntry>> populateBadTopicsByReplicationFactor(Set<String> topicsToCheck, Cluster cluster) {
  Map<Short, Set<TopicReplicationFactorAnomalyEntry>> topicsByReplicationFactor = new HashMap<>();
  for (String topic : topicsToCheck) {
    if (_cachedTopicMinISR.containsKey(topic)) {
      short topicMinISR = _cachedTopicMinISR.get(topic).minISR();
      short targetReplicationFactor = (short) Math.max(_targetReplicationFactor, topicMinISR + _topicReplicationFactorMargin);
      int violatedPartitionCount = 0;
      for (PartitionInfo partitionInfo : cluster.partitionsForTopic(topic)) {
        if (partitionInfo.replicas().length != targetReplicationFactor) {
          violatedPartitionCount++;
        }
      }
      if (violatedPartitionCount > 0) {
        topicsByReplicationFactor.putIfAbsent(targetReplicationFactor, new HashSet<>());
        topicsByReplicationFactor.get(targetReplicationFactor).add(
            new TopicReplicationFactorAnomalyEntry(topic, (double) violatedPartitionCount /  cluster.partitionCountForTopic(topic)));
      }
    }
  }
  return topicsByReplicationFactor;
}
 
Example 3
Source File: MonitorUtils.java    From cruise-control with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * When the replica is a leader replica, we need to fill in the replication bytes out if it has not been filled in
 * yet. This is because currently Kafka does not report this metric. We simply use the leader bytes in rate multiplied
 * by the number of followers as the replication bytes out rate. The assumption is that all the followers will
 * eventually keep up with the leader.
 *
 * We only fill in the replication bytes out rate when creating the cluster model because the replication factor
 * may have changed since the time the PartitionMetricSample was created.
 *
 * @param aggregatedMetricValues the {@link AggregatedMetricValues} for the leader replica.
 * @param info the partition info for the partition.
 * @return The {@link AggregatedMetricValues} with the replication bytes out rate filled in.
 */
private static AggregatedMetricValues fillInReplicationBytesOut(AggregatedMetricValues aggregatedMetricValues,
                                                                PartitionInfo info) {
  int numFollowers = info.replicas().length - 1;
  short leaderBytesInRateId = KafkaMetricDef.commonMetricDefId(KafkaMetricDef.LEADER_BYTES_IN);
  short replicationBytesOutRateId = KafkaMetricDef.commonMetricDefId(KafkaMetricDef.REPLICATION_BYTES_OUT_RATE);

  MetricValues leaderBytesInRate = aggregatedMetricValues.valuesFor(leaderBytesInRateId);
  MetricValues replicationBytesOutRate = aggregatedMetricValues.valuesFor(replicationBytesOutRateId);
  // If the replication bytes out rate is already reported, update it. Otherwise add a new MetricValues.
  if (replicationBytesOutRate == null) {
    replicationBytesOutRate = new MetricValues(leaderBytesInRate.length());
    aggregatedMetricValues.add(replicationBytesOutRateId, replicationBytesOutRate);
  }
  for (int i = 0; i < leaderBytesInRate.length(); i++) {
    replicationBytesOutRate.set(i, leaderBytesInRate.get(i) * numFollowers);
  }

  return aggregatedMetricValues;
}
 
Example 4
Source File: KafkaClusterManager.java    From doctorkafka with Apache License 2.0 6 votes vote down vote up
/**
 *  Remove the under-replicated partitions that are in the middle of partition reassignment.
 */
public List<PartitionInfo> filterOutInReassignmentUrps(List<PartitionInfo> urps,
                                                       Map<String, Integer> replicationFactors) {
  List<PartitionInfo> result = new ArrayList<>();
  for (PartitionInfo urp : urps) {
    if (urp.replicas().length <= replicationFactors.get(urp.topic())) {
      // # of replicas <= replication factor
      result.add(urp);
    } else {
      // # of replicas > replication factor. this can happen after
      // a failed partition reassignment
      Set<Integer> liveReplicas = new HashSet<>();
      for (Node node : urp.replicas()) {
        if (node.host() != null && OperatorUtil.pingKafkaBroker(node.host(), 9092, 5000)) {
          liveReplicas.add(node.id());
        }
      }
      if (liveReplicas.size() < replicationFactors.get(urp.topic())) {
        result.add(urp);
      }
    }
  }
  return result;
}
 
Example 5
Source File: ClusterPartitionState.java    From cruise-control with BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * Gather the Kafka partition state within the given under replicated, offline, under minIsr,
 * and other partitions (if verbose).
 *
 * @param underReplicatedPartitions state of under replicated partitions.
 * @param offlinePartitions state of offline partitions.
 * @param otherPartitions state of partitions other than offline or urp.
 * @param partitionsWithOfflineReplicas state of partitions with offline replicas.
 * @param underMinIsrPartitions state of under min isr partitions.
 * @param verbose true if requested to gather state of partitions other than offline or urp.
 * @param topicPattern regex of topic to filter partition states by, is null if no filter is to be applied
 */
protected void populateKafkaPartitionState(Set<PartitionInfo> underReplicatedPartitions,
    Set<PartitionInfo> offlinePartitions,
    Set<PartitionInfo> otherPartitions,
    Set<PartitionInfo> partitionsWithOfflineReplicas,
    Set<PartitionInfo> underMinIsrPartitions,
    boolean verbose,
    Pattern topicPattern) {
  for (String topic : _kafkaCluster.topics()) {
    if (topicPattern == null || topicPattern.matcher(topic).matches()) {
      int minInsyncReplicas = minInsyncReplicas(topic);
      for (PartitionInfo partitionInfo : _kafkaCluster.partitionsForTopic(topic)) {
        int numInsyncReplicas = partitionInfo.inSyncReplicas().length;
        boolean isURP = numInsyncReplicas != partitionInfo.replicas().length;
        if (numInsyncReplicas < minInsyncReplicas) {
          underMinIsrPartitions.add(partitionInfo);
        }
        if (isURP || verbose) {
          boolean hasOfflineReplica = partitionInfo.offlineReplicas().length != 0;
          if (hasOfflineReplica) {
            partitionsWithOfflineReplicas.add(partitionInfo);
          }
          boolean isOffline = partitionInfo.inSyncReplicas().length == 0;
          if (isOffline) {
            offlinePartitions.add(partitionInfo);
          } else if (isURP) {
            underReplicatedPartitions.add(partitionInfo);
          } else {
            // verbose -- other
            otherPartitions.add(partitionInfo);
          }
        }
      }
    }
  }
}
 
Example 6
Source File: MonitorUtils.java    From cruise-control with BSD 2-Clause "Simplified" License 5 votes vote down vote up
private static boolean replicaListChanged(PartitionInfo prevPartInfo, PartitionInfo currPartInfo) {
  if (prevPartInfo.replicas().length != currPartInfo.replicas().length) {
    return true;
  }
  for (int i = 0; i < prevPartInfo.replicas().length; i++) {
    if (prevPartInfo.replicas()[i].id() != currPartInfo.replicas()[i].id()) {
      return true;
    }
  }
  return false;
}
 
Example 7
Source File: HolderUtils.java    From cruise-control with BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * Check if a broker raw metric is reasonable to be missing. As of now, it looks that only the following metrics
 * might be missing:
 * <ul>
 *   <li>BROKER_FOLLOWER_FETCH_REQUEST_RATE (with additional constraints)</li>
 *   <li>BROKER_LOG_FLUSH_RATE</li>
 *   <li>BROKER_LOG_FLUSH_TIME_MS_MEAN</li>
 *   <li>BROKER_LOG_FLUSH_TIME_MS_MAX</li>
 *   <li>BROKER_LOG_FLUSH_TIME_MS_50TH</li>
 *   <li>BROKER_LOG_FLUSH_TIME_MS_999TH</li>
 *   <li>BROKER_PRODUCE_REQUEST_RATE</li>
 *   <li>BROKER_CONSUMER_FETCH_REQUEST_RATE</li>
 * </ul>
 * When these raw metrics are missing, broker load is expected to use {@link #MISSING_BROKER_METRIC_VALUE} as the value.
 *
 * @param cluster The Kafka cluster.
 * @param brokerId The id of the broker whose raw metric is missing
 * @param rawMetricType The raw metric type that is missing.
 * @return True if the missing is allowed, false otherwise.
 */
static boolean allowMissingBrokerMetric(Cluster cluster, int brokerId, RawMetricType rawMetricType) {
  switch (rawMetricType) {
    case BROKER_FOLLOWER_FETCH_REQUEST_RATE:
      for (PartitionInfo partitionInfo : cluster.partitionsForNode(brokerId)) {
        // If there is at least one leader partition on the broker that meets the following condition:
        // 1. replication factor is greater than 1,
        // 2. there are more than one alive replicas.
        // Then the broker must report BrokerFollowerFetchRequestRate.
        if (partitionInfo.replicas().length > 1
            && partitionInfo.leader() != null
            && partitionInfo.leader().id() == brokerId) {
          return false;
        }
      }
      return true;
    case BROKER_LOG_FLUSH_RATE:
    case BROKER_LOG_FLUSH_TIME_MS_MEAN:
    case BROKER_LOG_FLUSH_TIME_MS_MAX:
    case BROKER_LOG_FLUSH_TIME_MS_50TH:
    case BROKER_LOG_FLUSH_TIME_MS_999TH:
    case BROKER_PRODUCE_REQUEST_RATE:
    case BROKER_CONSUMER_FETCH_REQUEST_RATE:
      return true;
    default:
      return false;
  }
}
 
Example 8
Source File: KafkaUtils.java    From doctorkafka with Apache License 2.0 5 votes vote down vote up
/**
 * Get the under replicated nodes from PartitionInfo
 */
public static Set<Node> getNotInSyncBrokers(PartitionInfo partitionInfo) {
  if (partitionInfo.inSyncReplicas().length == partitionInfo.replicas().length) {
    return new HashSet<>();
  }
  Set<Node> nodes = new HashSet<>(Arrays.asList(partitionInfo.replicas()));
  for (Node node : partitionInfo.inSyncReplicas()) {
    nodes.remove(node);
  }
  return nodes;
}
 
Example 9
Source File: OutOfSyncReplica.java    From doctorkafka with Apache License 2.0 5 votes vote down vote up
/**
 * Get the under replicated nodes from PartitionInfo
 */
public static Set<Integer> getOutOfSyncReplicas(PartitionInfo partitionInfo) {
  if (partitionInfo.inSyncReplicas().length == partitionInfo.replicas().length) {
    return new HashSet<>();
  }
  Set<Node> nodes = new HashSet<>(Arrays.asList(partitionInfo.replicas()));
  for (Node node : partitionInfo.inSyncReplicas()) {
    nodes.remove(node);
  }
  return nodes.stream().map(nd -> nd.id()).collect(Collectors.toSet());
}
 
Example 10
Source File: MonitorUtils.java    From cruise-control with BSD 2-Clause "Simplified" License 4 votes vote down vote up
/**
 * Create replicas of the partition with the given (1) identifier and (2) load information to populate the given cluster model.
 * If partition with the given identifier does not exist in the given cluster, do nothing.
 *
 * @param cluster Kafka cluster.
 * @param clusterModel The cluster model to populate load information.
 * @param tp Topic partition that identifies the partition to populate the load for.
 * @param valuesAndExtrapolations The values and extrapolations of the leader replica.
 * @param replicaPlacementInfo The distribution of replicas over broker logdirs if available, {@code null} otherwise.
 * @param brokerCapacityConfigResolver The resolver for retrieving broker capacities.
 * @param allowCapacityEstimation whether allow capacity estimation in cluster model if the underlying live broker capacity is unavailable.
 */
static void populatePartitionLoad(Cluster cluster,
                                  ClusterModel clusterModel,
                                  TopicPartition tp,
                                  ValuesAndExtrapolations valuesAndExtrapolations,
                                  Map<TopicPartition, Map<Integer, String>> replicaPlacementInfo,
                                  BrokerCapacityConfigResolver brokerCapacityConfigResolver,
                                  boolean allowCapacityEstimation)
    throws TimeoutException {
  PartitionInfo partitionInfo = cluster.partition(tp);
  // If partition info does not exist, the topic may have been deleted.
  if (partitionInfo != null) {
    Set<Integer> aliveBrokers = cluster.nodes().stream().mapToInt(Node::id).boxed().collect(Collectors.toSet());
    boolean needToAdjustCpuUsage = true;
    Set<Integer> deadBrokersWithUnknownCapacity = new HashSet<>();
    for (int index = 0; index < partitionInfo.replicas().length; index++) {
      Node replica = partitionInfo.replicas()[index];
      String rack = getRackHandleNull(replica);
      BrokerCapacityInfo brokerCapacity;
      try {
        // Do not allow capacity estimation for dead brokers.
        brokerCapacity = brokerCapacityConfigResolver.capacityForBroker(rack, replica.host(), replica.id(), BROKER_CAPACITY_FETCH_TIMEOUT_MS,
                                                                        aliveBrokers.contains(replica.id()) && allowCapacityEstimation);
      } catch (TimeoutException | BrokerCapacityResolutionException e) {
        // Capacity resolver may not be able to return the capacity information of dead brokers.
        if (!aliveBrokers.contains(replica.id())) {
          brokerCapacity = new BrokerCapacityInfo(EMPTY_BROKER_CAPACITY);
          deadBrokersWithUnknownCapacity.add(replica.id());
        } else {
          String errorMessage = String.format("Unable to retrieve capacity for broker %d. This may be caused by churn in "
                                              + "the cluster, please retry.", replica.id());
          LOG.warn(errorMessage, e);
          throw new TimeoutException(errorMessage);
        }
      }
      clusterModel.handleDeadBroker(rack, replica.id(), brokerCapacity);
      boolean isLeader;
      if (partitionInfo.leader() == null) {
        LOG.warn("Detected offline partition {}-{}, skipping", partitionInfo.topic(), partitionInfo.partition());
        continue;
      } else {
        isLeader = replica.id() == partitionInfo.leader().id();
      }
      boolean isOffline = Arrays.stream(partitionInfo.offlineReplicas())
                                .anyMatch(offlineReplica -> offlineReplica.id() == replica.id());

      String logdir = replicaPlacementInfo == null ? null : replicaPlacementInfo.get(tp).get(replica.id());
      // If the replica's logdir is null, it is either because replica placement information is not populated for the cluster
      // model or this replica is hosted on a dead disk and is not considered for intra-broker replica operations.
      clusterModel.createReplica(rack, replica.id(), tp, index, isLeader, isOffline, logdir, false);
      clusterModel.setReplicaLoad(rack,
                                  replica.id(),
                                  tp,
                                  getAggregatedMetricValues(valuesAndExtrapolations,
                                                            cluster.partition(tp),
                                                            isLeader,
                                                            needToAdjustCpuUsage),
                                  valuesAndExtrapolations.windows());
      needToAdjustCpuUsage = false;
    }
    if (!deadBrokersWithUnknownCapacity.isEmpty()) {
      LOG.info("Assign empty capacity to brokers {} because they are dead and capacity resolver is unable to fetch their capacity.",
               deadBrokersWithUnknownCapacity);
    }
  }
}
 
Example 11
Source File: KafkaClusterManager.java    From doctorkafka with Apache License 2.0 4 votes vote down vote up
/**
 * Call the kafka api to get the list of under-replicated partitions.
 * When a topic partition loses all of its replicas, it will not have a leader broker.
 * We need to handle this special case in detecting under replicated topic partitions.
 */
public static List<PartitionInfo> getUnderReplicatedPartitions(
    String zkUrl, SecurityProtocol securityProtocol, Map<String, String> consumerConfigs,
    List<String> topics,
    scala.collection.mutable.Map<String, scala.collection.Map<Object, Seq<Object>>> partitionAssignments,
    Map<String, Integer> replicationFactors,
    Map<String, Integer> partitionCounts) {
  List<PartitionInfo> underReplicated = new ArrayList();
  KafkaConsumer kafkaConsumer = KafkaUtils.getKafkaConsumer(zkUrl, securityProtocol, consumerConfigs);
  for (String topic : topics) {
    List<PartitionInfo> partitionInfoList = kafkaConsumer.partitionsFor(topic);
    if (partitionInfoList == null) {
      LOG.error("Failed to get partition info for {}", topic);
      continue;
    }
    int numPartitions = partitionCounts.get(topic);

    // when a partition loses all replicas and does not have a live leader,
    // kafkaconsumer.partitionsFor(...) will not return info for that partition.
    // the noLeaderFlag array is used to detect partitions that have no leaders
    boolean[] noLeaderFlags = new boolean[numPartitions];
    for (int i = 0; i < numPartitions; i++) {
      noLeaderFlags[i] = true;
    }
    for (PartitionInfo info : partitionInfoList) {
      if (info.inSyncReplicas().length < info.replicas().length &&
          replicationFactors.get(info.topic()) > info.inSyncReplicas().length) {
        underReplicated.add(info);
      }
      noLeaderFlags[info.partition()] = false;
    }

    // deal with the partitions that do not have leaders
    for (int partitionId = 0; partitionId < numPartitions; partitionId++) {
      if (noLeaderFlags[partitionId]) {
        Seq<Object> seq = partitionAssignments.get(topic).get().get(partitionId).get();
        Node[] nodes = JavaConverters.seqAsJavaList(seq).stream()
            .map(val -> new Node((Integer) val, "", -1)).toArray(Node[]::new);
        PartitionInfo partitionInfo =
            new PartitionInfo(topic, partitionId, null, nodes, new Node[0]);
        underReplicated.add(partitionInfo);
      }
    }
  }
  return underReplicated;
}
 
Example 12
Source File: KafkaCruiseControlUtils.java    From cruise-control with BSD 2-Clause "Simplified" License 2 votes vote down vote up
/**
 * Check if the partition is currently under replicated.
 * @param cluster The current cluster state.
 * @param tp The topic partition to check.
 * @return True if the partition is currently under replicated.
 */
public static boolean isPartitionUnderReplicated(Cluster cluster, TopicPartition tp) {
  PartitionInfo partitionInfo = cluster.partition(tp);
  return partitionInfo.inSyncReplicas().length != partitionInfo.replicas().length;
}
 
Example 13
Source File: ExecutionProposal.java    From cruise-control with BSD 2-Clause "Simplified" License 2 votes vote down vote up
/**
 * Check whether the successful completion of inter-broker replica movement from this proposal is reflected in the current
 * ordered replicas in the given cluster and all replicas are in-sync.
 *
 * @param partitionInfo Current partition state.
 * @return True if successfully completed, false otherwise.
 */
public boolean isInterBrokerMovementCompleted(PartitionInfo partitionInfo) {
  return brokerOrderMatched(partitionInfo.replicas(), _newReplicas)
         && partitionInfo.replicas().length == partitionInfo.inSyncReplicas().length;
}