* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to you under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.

package io.kareldb.server.leader;

import org.apache.kafka.clients.GroupRebalanceConfig;
import org.apache.kafka.clients.consumer.internals.AbstractCoordinator;
import org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient;
import org.apache.kafka.common.message.JoinGroupRequestData;
import org.apache.kafka.common.message.JoinGroupResponseData;
import org.apache.kafka.common.metrics.Metrics;
import org.apache.kafka.common.utils.LogContext;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.common.utils.Timer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Closeable;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;

 * This class manages the coordination process with the Kafka group coordinator on the broker for
 * coordinating group members.
final class KarelDbCoordinator extends AbstractCoordinator implements Closeable {
    private static final Logger LOG = LoggerFactory.getLogger(KarelDbCoordinator.class);

    public static final String KDB_SUBPROTOCOL_V0 = "v0";

    private final KarelDbIdentity identity;
    private KarelDbProtocol.Assignment assignmentSnapshot;
    private final KarelDbRebalanceListener listener;

     * Initialize the coordination manager.
    public KarelDbCoordinator(
        LogContext logContext,
        ConsumerNetworkClient client,
        String groupId,
        int rebalanceTimeoutMs,
        int sessionTimeoutMs,
        int heartbeatIntervalMs,
        Metrics metrics,
        String metricGrpPrefix,
        Time time,
        long retryBackoffMs,
        KarelDbIdentity identity,
        KarelDbRebalanceListener listener) {
            new GroupRebalanceConfig(
        this.identity = identity;
        this.assignmentSnapshot = null;
        this.listener = listener;

    public String protocolType() {
        return "kdb";

    public void poll(long timeout) {
        // poll for io until the timeout expires
        final long start = time.milliseconds();
        long now = start;
        long remaining;

        do {
            if (coordinatorUnknown()) {
                now = time.milliseconds();

            if (rejoinNeededOrPending()) {
                now = time.milliseconds();


            long elapsed = now - start;
            remaining = timeout - elapsed;

            // Note that because the network client is shared with the background heartbeat thread,
            // we do not want to block in poll longer than the time to the next heartbeat.
            client.poll(time.timer(Math.min(Math.max(0, remaining), timeToNextHeartbeat(now))));

            now = time.milliseconds();
            elapsed = now - start;
            remaining = timeout - elapsed;
        } while (remaining > 0);

    public JoinGroupRequestData.JoinGroupRequestProtocolCollection metadata() {
        ByteBuffer metadata = KarelDbProtocol.serializeMetadata(identity);
        return new JoinGroupRequestData.JoinGroupRequestProtocolCollection(
            Collections.singletonList(new JoinGroupRequestData.JoinGroupRequestProtocol()

    protected void onJoinComplete(
        int generation,
        String memberId,
        String protocol,
        ByteBuffer memberAssignment
    ) {
        assignmentSnapshot = KarelDbProtocol.deserializeAssignment(memberAssignment);
        listener.onAssigned(assignmentSnapshot, generation);

    protected Map<String, ByteBuffer> performAssignment(
        String kafkaLeaderId, // Kafka group "leader" who does assignment, *not* the cluster leader
        String protocol,
        List<JoinGroupResponseData.JoinGroupResponseMember> allMemberMetadata
    ) {
        LOG.debug("Performing assignment");

        Map<String, KarelDbIdentity> memberConfigs = new HashMap<>();
        for (JoinGroupResponseData.JoinGroupResponseMember entry : allMemberMetadata) {
            KarelDbIdentity identity
                = KarelDbProtocol.deserializeMetadata(ByteBuffer.wrap(entry.metadata()));
            memberConfigs.put(entry.memberId(), identity);

        LOG.debug("Member information: {}", memberConfigs);

        // Compute the leader as the leader-eligible member with the "smallest" (lexicographically) ID.
        // This doesn't guarantee a member will stay leader until it leaves the group, but should
        // usually keep the leader assigned to the same member across rebalances.
        KarelDbIdentity leaderIdentity = null;
        String leaderKafkaId = null;
        Set<String> urls = new HashSet<>();
        for (Map.Entry<String, KarelDbIdentity> entry : memberConfigs.entrySet()) {
            String kafkaMemberId = entry.getKey();
            KarelDbIdentity memberIdentity = entry.getValue();
            boolean eligible = memberIdentity.getLeaderEligibility();
            boolean smallerIdentity = leaderIdentity == null
                || memberIdentity.getUrl().compareTo(leaderIdentity.getUrl()) < 0;
            if (eligible && smallerIdentity) {
                leaderKafkaId = kafkaMemberId;
                leaderIdentity = memberIdentity;
        short error = KarelDbProtocol.Assignment.NO_ERROR;

        // Validate that group members aren't trying to use the same URL
        if (urls.size() != memberConfigs.size()) {
            LOG.error("Found duplicate URLs for group members. This indicates a "
                + "misconfiguration and is common when executing in containers. Use the host.name "
                + "configuration to set each instance's advertised host name to a value that is "
                + "routable from all other group members.");
            error = KarelDbProtocol.Assignment.DUPLICATE_URLS;

        // All members currently receive the same assignment information since it is just the leader ID
        Map<String, ByteBuffer> groupAssignment = new HashMap<>();
        KarelDbProtocol.Assignment assignment
            = new KarelDbProtocol.Assignment(error, leaderKafkaId, leaderIdentity);
        LOG.debug("Assignment: {}", assignment);
        for (String member : memberConfigs.keySet()) {
            groupAssignment.put(member, KarelDbProtocol.serializeAssignment(assignment));
        return groupAssignment;

    protected void onJoinPrepare(int generation, String memberId) {
        LOG.debug("Revoking previous assignment {}", assignmentSnapshot);
        if (assignmentSnapshot != null) {

    protected synchronized boolean ensureCoordinatorReady(Timer timer) {
        return super.ensureCoordinatorReady(timer);

    protected boolean rejoinNeededOrPending() {
        return super.rejoinNeededOrPending() || assignmentSnapshot == null;