java source code of AnomalyDetector

cruise-control-master
- .circleci
  - config.yml
- kafka-cruise-control-start.sh
- cruise-control-metrics-reporter
  - src
    - main
      - java
        com
        linkedin
        kafka
        cruisecontrol
        metricsreporter
        exception
        UnknownVersionException.java
        CruiseControlMetricsReporterException.java
        CruiseControlMetricsReporter.java
        metric
        TopicMetric.java
        CruiseControlMetric.java
        YammerMetricProcessor.java
        PartitionMetric.java
        RawMetricType.java
        BrokerMetric.java
        MetricSerde.java
        MetricsUtils.java
        CruiseControlMetricsUtils.java
        CruiseControlMetricsReporterConfig.java
    - test
      - resources
        log4j.properties
      - java
        com
        linkedin
        kafka
        cruisecontrol
        metricsreporter
        utils
        CCEmbeddedBrokerBuilder.java
        CCKafkaIntegrationTestHarness.java
        CCEmbeddedZookeeper.java
        CCKafkaClientsIntegrationTestHarness.java
        CCKafkaTestUtils.java
        CCAbstractZookeeperTestHarness.java
        CCEmbeddedBroker.java
        CruiseControlMetricsReporterTest.java
        CruiseControlMetricsReporterSslTest.java
        metric
        MetricSerdeTest.java
        CruiseControlMetricsReporterAutoCreateTopicTest.java
- build_api_wiki.sh
- gradle.properties
- gradle
  - wrapper
    - gradle-wrapper.properties
  - findbugs-exclude.xml
- gradlew.bat
- LICENSE
- gradlew
- semantic-build-versioning.gradle
- CONTRIBUTING.md
- config
  - capacityJBOD.json
  - log4j.properties
  - cruise_control_jaas.conf_template
  - capacity.json
  - cruisecontrol.properties
  - clusterConfigs.json
  - capacityCores.json
- build.gradle
- buildSrc
  - src
    - main
      - groovy
        com
        linkedin
        gradle
        build
        DistributeTask.groovy
  - build.gradle
- checkstyle
  - checkstyle.xml
  - suppressions.xml
- README.md
- cruise-control-core
  - src
    - main
      - java
        com
        linkedin
        cruisecontrol
        metricdef
        MetricInfo.java
        ValueComputingStrategy.java
        AggregationFunction.java
        MetricDef.java
        common
        utils
        Utils.java
        Generationed.java
        WindowIndexedArrays.java
        config
        AbstractConfig.java
        ConfigValue.java
        ConfigException.java
        Config.java
        types
        Password.java
        ConfigDef.java
        LongGenerationed.java
        CruiseControlConfigurable.java
        detector
        Anomaly.java
        metricanomaly
        MetricAnomalyFinder.java
        PercentileMetricAnomalyFinder.java
        MetricAnomaly.java
        PercentileMetricAnomalyFinderConfig.java
        PercentileMetricAnomalyFinderUtils.java
        AnomalyType.java
        config
        CruiseControlConfig.java
        exception
        NotEnoughValidWindowsException.java
        CruiseControlException.java
        monitor
        sampling
        aggregator
        MetricSampleCompleteness.java
        MetricValues.java
        Extrapolation.java
        AggregatedMetricValues.java
        WindowState.java
        MetricSampleAggregatorState.java
        AggregationOptions.java
        RawMetricValues.java
        MetricSampleAggregator.java
        MetricSampleAggregationResult.java
        ValuesAndExtrapolations.java
        MetricSample.java
        model
        Entity.java
        servlet
        parameters
        CruiseControlParameters.java
        response
        CruiseControlResponse.java
        EndPoint.java
        handler
        Request.java
        EndpointType.java
        CruiseControlUtils.java
    - test
      - resources
        log4j.properties
      - java
        com
        linkedin
        cruisecontrol
        CruiseControlUnitTestUtils.java
        monitor
        sampling
        aggregator
        AggregatedMetricValuesTest.java
        MetricSampleAggregatorTest.java
        RawMetricValuesTest.java
        IntegerEntity.java
- settings.gradle
- NOTICE
- cruise-control
  - src
    - main
      - scala
        com
        linkedin
        kafka
        cruisecontrol
        executor
        ExecutorUtils.scala
      - java
        com
        linkedin
        kafka
        cruisecontrol
        KafkaCruiseControlUtils.java
        executor
        ExecutorNotifier.java
        ExecutionTaskTracker.java
        ReplicationThrottleHelper.java
        ExecutorState.java
        ExecutionTaskManager.java
        ExecutionTask.java
        ExecutorAdminUtils.java
        ExecutionTaskPlanner.java
        Executor.java
        ExecutorNoopNotifier.java
        ExecutionProposal.java
        strategy
        PrioritizeSmallReplicaMovementStrategy.java
        ReplicaMovementStrategy.java
        PrioritizeLargeReplicaMovementStrategy.java
        AbstractReplicaMovementStrategy.java
        PostponeUrpReplicaMovementStrategy.java
        BaseReplicaMovementStrategy.java
        common
        Resource.java
        MetadataClient.java
        Statistic.java
        KafkaCruiseControlThreadFactory.java
        NetworkClientProvider.java
        KafkaNetworkClientProvider.java
        KafkaCruiseControlApp.java
        detector
        MeanTimeBetweenAnomaliesMs.java
        AnomalyDetector.java
        MetricAnomalyDetector.java
        AnomalyMetrics.java
        AnomalyState.java
        AnomalyUtils.java
        AnomalyDetectionStatus.java
        TopicAnomalyDetector.java
        KafkaMetricAnomalyFinder.java
        TopicReplicationFactorAnomaly.java
        AnomalyDetails.java
        TopicAnomalyFinder.java
        SlowBrokerFinder.java
        TopicPartitionSizeAnomaly.java
        notifier
        SlackMessage.java
        KafkaAnomalyType.java
        NoopNotifier.java
        AnomalyNotificationResult.java
        AnomalyNotifier.java
        SelfHealingNotifier.java
        SlackSelfHealingNotifier.java
        KafkaAnomaly.java
        GoalViolations.java
        AnomalyDetectorState.java
        SelfHealingEnabledRatio.java
        GoalViolationDetector.java
        KafkaMetricAnomaly.java
        AnomalyDetectorUtils.java
        BrokerFailureDetector.java
        TopicReplicationFactorAnomalyFinder.java
        SlowBrokers.java
        BrokerFailures.java
        DiskFailureDetector.java
        NoopTopicAnomalyFinder.java
        PartitionSizeAnomalyFinder.java
        TopicAnomaly.java
        NoopMetricAnomalyFinder.java
        DiskFailures.java
        config
        KafkaCruiseControlConfig.java
        EnvConfigProvider.java
        RequestParameterWrapper.java
        constants
        CruiseControlRequestConfig.java
        CruiseControlParametersConfig.java
        ExecutorConfig.java
        AnomalyDetectorConfig.java
        WebServerConfig.java
        UserTaskManagerConfig.java
        AnalyzerConfig.java
        MonitorConfig.java
        BrokerCapacityInfo.java
        KafkaCruiseControlConfigUtils.java
        BrokerCapacityConfigResolver.java
        BrokerCapacityConfigFileResolver.java
        KafkaTopicConfigProvider.java
        TopicConfigProvider.java
        exception
        MetricSamplingException.java
        OngoingExecutionException.java
        BrokerCapacityResolutionException.java
        KafkaCruiseControlException.java
        OptimizationFailureException.java
        monitor
        LoadMonitorState.java
        task
        TrainingTask.java
        BootstrapTask.java
        SamplingTask.java
        LoadMonitorTaskRunner.java
        SampleLoadingTask.java
        metricdefinition
        KafkaMetricDef.java
        sampling
        CruiseControlMetricsProcessor.java
        holder
        BrokerEntity.java
        HolderUtils.java
        BrokerMetricSample.java
        RawMetricsHolder.java
        PartitionEntity.java
        BrokerLoad.java
        ValueAndCount.java
        ValueHolder.java
        ValueAndTime.java
        ValueMax.java
        PartitionMetricSample.java
        SampleStore.java
        CruiseControlMetricsReporterSampler.java
        TrainingFetcher.java
        ReadOnlyKafkaSampleStore.java
        MetricSampler.java
        aggregator
        SampleExtrapolation.java
        KafkaBrokerMetricSampleAggregator.java
        KafkaPartitionMetricSampleAggregator.java
        SamplingUtils.java
        NoopSampleStore.java
        MetricFetcherManager.java
        SamplingFetcher.java
        KafkaSampleStore.java
        NoopSampler.java
        DefaultMetricSamplerPartitionAssignor.java
        MetricFetcher.java
        MetricSamplerPartitionAssignor.java
        ModelGeneration.java
        LoadMonitor.java
        MonitorUtils.java
        ModelCompletenessRequirements.java
        KafkaCruiseControl.java
        KafkaCruiseControlMain.java
        model
        Rack.java
        ClusterModelStats.java
        ReplicaPlacementInfo.java
        ReplicaSortFunctionFactory.java
        Broker.java
        Replica.java
        Load.java
        SortedReplicasHelper.java
        Partition.java
        DiskStats.java
        ClusterModelStatsMetaData.java
        RawAndDerivedResource.java
        LinearRegressionModelParameters.java
        ClusterModel.java
        ModelUtils.java
        SortedReplicas.java
        ClusterModelStatsValueHolder.java
        ModelParameters.java
        Host.java
        Disk.java
        ClusterModelStatsValue.java
        servlet
        security
        BasicSecurityProvider.java
        CruiseControlSecurityHandler.java
        jwt
        JwtLoginService.java
        JwtUserIdentity.java
        JwtSecurityProvider.java
        JwtAuthenticator.java
        JwtUserPrincipal.java
        trustedproxy
        TrustedProxyAuthorizationService.java
        TrustedProxyPrincipal.java
        TrustedProxyLoginService.java
        TrustedProxySecurityProvider.java
        SecurityUtils.java
        SecurityProvider.java
        DefaultRoleSecurityProvider.java
        UserStoreAuthorizationService.java
        spnego
        SpnegoLoginServiceWithAuthServiceLifecycle.java
        SpnegoSecurityProvider.java
        SpnegoUserStoreAuthorizationService.java
        parameters
        AdminParameters.java
        ReviewBoardParameters.java
        TopicConfigurationParameters.java
        GoalsAndRequirements.java
        RemoveBrokerParameters.java
        KafkaOptimizationParameters.java
        ReviewParameters.java
        CruiseControlStateParameters.java
        DropRecentBrokersParameters.java
        TopicReplicationFactorChangeParameters.java
        GoalBasedOptimizationParameters.java
        ParameterUtils.java
        TrainParameters.java
        AbstractParameters.java
        DemoteBrokerParameters.java
        StopProposalParameters.java
        FixOfflineReplicasParameters.java
        AddBrokerParameters.java
        ClusterLoadParameters.java
        PartitionLoadParameters.java
        RebalanceParameters.java
        UpdateSelfHealingParameters.java
        ChangeExecutionConcurrencyParameters.java
        BootstrapParameters.java
        AddedOrRemovedBrokerParameters.java
        ProposalsParameters.java
        KafkaClusterStateParameters.java
        UserTasksParameters.java
        PauseResumeParameters.java
        KafkaCruiseControlServletUtils.java
        response
        JsonResponseClass.java
        TrainResult.java
        KafkaClusterState.java
        BootstrapResult.java
        ReviewResult.java
        PartitionState.java
        ProgressResult.java
        ResponseUtils.java
        CruiseControlState.java
        PauseSamplingResult.java
        JsonResponseExternalFields.java
        AdminResult.java
        AbstractCruiseControlResponse.java
        OptimizationResult.java
        ClusterPartitionState.java
        UserTaskState.java
        JsonResponseField.java
        ClusterBrokerState.java
        stats
        SingleHostStats.java
        SingleBrokerStats.java
        BasicStats.java
        BrokerStats.java
        ResumeSamplingResult.java
        PartitionLoadState.java
        StopProposalResult.java
        SessionManager.java
        CruiseControlEndpointType.java
        UserTaskManager.java
        CruiseControlEndPoint.java
        UserRequestException.java
        handler
        AbstractRequest.java
        sync
        ReviewRequest.java
        StopProposalRequest.java
        AdminRequest.java
        PauseRequest.java
        TrainRequest.java
        KafkaClusterStateRequest.java
        BootstrapRequest.java
        ResumeRequest.java
        UserTasksRequest.java
        ReviewBoardRequest.java
        AbstractSyncRequest.java
        async
        RemoveBrokerRequest.java
        runnable
        FixOfflineReplicasRunnable.java
        PartitionLoadRunnable.java
        DemoteBrokerRunnable.java
        RebalanceRunnable.java
        OperationRunnable.java
        GetStateRunnable.java
        LoadRunnable.java
        AddBrokersRunnable.java
        ProposalsRunnable.java
        UpdateTopicConfigurationRunnable.java
        RemoveBrokersRunnable.java
        RunnableUtils.java
        GoalBasedOperationRunnable.java
        OperationFuture.java
        FixOfflineReplicasRequest.java
        TopicConfigurationRequest.java
        ProposalsRequest.java
        DemoteRequest.java
        RebalanceRequest.java
        AddBrokerRequest.java
        PartitionLoadRequest.java
        AbstractAsyncRequest.java
        CruiseControlStateRequest.java
        ClusterLoadRequest.java
        purgatory
        RequestInfo.java
        Purgatory.java
        ReviewStatus.java
        KafkaCruiseControlServlet.java
        async
        progress
        Pending.java
        RetrievingMetrics.java
        OptimizationForGoal.java
        StepProgress.java
        WaitingForOngoingExecutionToStop.java
        GeneratingClusterModel.java
        OperationStep.java
        WaitingForClusterModel.java
        OperationProgress.java
        AsyncKafkaCruiseControl.java
        analyzer
        DefaultOptimizationOptionsGenerator.java
        OptimizationOptions.java
        kafkaassigner
        KafkaAssignerEvenRackAwareGoal.java
        KafkaAssignerUtils.java
        KafkaAssignerDiskUsageDistributionGoal.java
        ActionType.java
        ActionAcceptance.java
        goals
        ReplicaDistributionAbstractGoal.java
        TopicReplicaDistributionGoal.java
        PreferredLeaderElectionGoal.java
        internals
        BrokerAndSortedReplicas.java
        PotentialNwOutGoal.java
        ResourceDistributionGoal.java
        RackAwareGoal.java
        NetworkOutboundCapacityGoal.java
        CpuCapacityGoal.java
        DiskCapacityGoal.java
        Goal.java
        DiskUsageDistributionGoal.java
        LeaderReplicaDistributionGoal.java
        IntraBrokerDiskCapacityGoal.java
        ReplicaCapacityGoal.java
        LeaderBytesInDistributionGoal.java
        IntraBrokerDiskUsageDistributionGoal.java
        ReplicaDistributionGoal.java
        CpuUsageDistributionGoal.java
        GoalUtils.java
        CapacityGoal.java
        NetworkInboundCapacityGoal.java
        NetworkInboundUsageDistributionGoal.java
        AbstractGoal.java
        NetworkOutboundUsageDistributionGoal.java
        BalancingAction.java
        OptimizerResult.java
        BalancingConstraint.java
        GoalOptimizer.java
        OptimizationOptionsGenerator.java
        AnalyzerState.java
        GoalReadinessRecord.java
        AnalyzerUtils.java
    - yaml
      - base.yaml
      - responses
        errorResponse.yaml
        executionProposal.yaml
        adminResult.yaml
        pauseSamplingResult.yaml
        brokerStats.yaml
        cruiseControlState.yaml
        analyzerState.yaml
        optimizationResult.yaml
        anomalyDetectorState.yaml
        executorState.yaml
        goalStatus.yaml
        userTaskState.yaml
        trainResult.yaml
        clusterModelStats.yaml
        stopProposalResult.yaml
        resumeSamplingResult.yaml
        partitionLoadState.yaml
        loadMonitorState.yaml
        statistic.yaml
        kafkaClusterState.yaml
        bootstrapResult.yaml
        reviewResult.yaml
        progressResult.yaml
      - endpoints
        stopProposalExecution.yaml
        demoteBroker.yaml
        train.yaml
        userTasks.yaml
        state.yaml
        partitionLoad.yaml
        fixOfflineReplicas.yaml
        load.yaml
        resumeSampling.yaml
        addBroker.yaml
        pauseSampling.yaml
        topicConfiguration.yaml
        bootstrap.yaml
        proposals.yaml
        review.yaml
        kafkaClusterState.yaml
        rebalance.yaml
        reviewBoard.yaml
        removeBroker.yaml
        admin.yaml
      - README.md
    - test
      - resources
        ssl_integration_test.keystore
        log4j.properties
        testCapacityConfigJBOD.json
        basic-auth.credentials
        auth.credentials
        testCapacityConfig.json
        DefaultCapacityConfig.json
        testCapacityConfigCores.json
        envConfigProviderTest.properties
        DefaultClusterConfigs.json
      - java
        com
        linkedin
        kafka
        cruisecontrol
        KafkaCruiseControlUnitTestUtils.java
        executor
        ExecutionProposalTest.java
        ExecutionTaskPlannerTest.java
        ReplicationThrottleHelperTest.java
        ExecutionTaskManagerTest.java
        ExecutorTest.java
        common
        DeterministicCluster.java
        TestConstants.java
        ClusterProperty.java
        KafkaCruiseControlTest.java
        detector
        SlowBrokerFinderTest.java
        AnomalyDetectorTest.java
        AnomalyDetectorTestUtils.java
        KafkaMetricAnomalyFinderTest.java
        notifier
        SelfHealingNotifierTest.java
        SlackSelfHealingNotifierTest.java
        SlackMessageTest.java
        BrokerFailureDetectorTest.java
        AnomalyUtilsTest.java
        TopicReplicationFactorAnomalyFinderTest.java
        config
        BrokerCapacityConfigFileResolverTest.java
        SecurityAndSslConfigTest.java
        EnvConfigProviderTest.java
        CaseInsensitiveGoalConfigTest.java
        monitor
        ModelCompletenessRequirementsTest.java
        task
        LoadMonitorTaskRunnerTest.java
        sampling
        holder
        BrokerMetricSampleTest.java
        PartitionMetricSampleTest.java
        aggregator
        KafkaPartitionMetricSampleAggregatorTest.java
        DefaultMetricSamplerPartitionAssignorTest.java
        SamplingUtilsTest.java
        CruiseControlMetricsProcessorTest.java
        MonitorUnitTestUtils.java
        MonitorUtilsTest.java
        LoadMonitorTest.java
        CruiseControlIntegrationTestHarness.java
        model
        LoadConsistencyTest.java
        LoadTest.java
        SortedReplicasTest.java
        RandomCluster.java
        servlet
        security
        jwt
        JwtLoginServiceTest.java
        JwtAuthenticatorTest.java
        JwtSecurityProviderIntegrationTest.java
        TokenGenerator.java
        AuthenticationIntegrationTest.java
        BasicAuthenticationIntegrationTest.java
        trustedproxy
        TrustedProxyLoginServiceTest.java
        TrustedProxyAuthorizationServiceTest.java
        TrustedProxySecurityProviderIntegrationTest.java
        SslConnectionIntegrationTest.java
        MiniKdc.java
        SecurityTestUtils.java
        spnego
        SpnegoSecurityProviderIntegrationTest.java
        SpnegoUserStoreAuthorizationServiceTest.java
        parameters
        RequestParameterTest.java
        SessionManagerTest.java
        UserTaskManagerTest.java
        response
        ResponseTest.java
        KafkaCruiseControlServletEndpointTest.java
        handler
        async
        runnable
        OperationFutureTest.java
        RunnableUtilsTest.java
        async
        progress
        OperationProgressTest.java
        analyzer
        kafkaassigner
        KafkaAssignerDiskUsageDistributionGoalTest.java
        OptimizationVerifier.java
        SelfHealingWithOptimizedGoalTest.java
        DeterministicClusterTest.java
        IntraBrokerRebalanceTest.java
        ExcludedTopicsTest.java
        AnalyzerUnitTestUtils.java
        RandomClusterLinearDistNewBrokerTest.java
        RandomClusterUniformDistNewBrokerTest.java
        ExcludedBrokersForReplicaMoveTest.java
        ReplicationFactorChangeTest.java
        RandomGoalTest.java
        PreferredLeaderElectionGoalTest.java
        RandomClusterExpDistNewBrokerTest.java
        ExcludedBrokersForLeadershipTest.java
        GoalOptimizerTest.java
        FixOfflineReplicaTest.java
        RandomClusterTest.java
        OfflineProposalGenerator.java
        RandomSelfHealingTest.java
- .gitignore
- docs
  - images
    - cc-logo.svg
  - pull_request_template.md
  - wiki
    - Python Client
      - cruise-control-client-Usage-in-Python-Applications.md
      - cccli-Command-Line-Usage.md
      - Getting-Started.md
    - User Guide
      - Change-topic-replication-factor-through-Cruise-Control.md
      - Pluggable-Components.md
      - Security.md
      - Configurations.md
      - REST-APIs.md
      - Sensors.md
      - Configure-Slack-notifications.md
      - 2-step-verification-for-POST-requests.md
      - Secure-zookeeper-configuration.md
    - Overview.md
    - Home.md
    - Developer Guide
      - Write-your-own-goals.md
- cruise-control-client
  - cruisecontrolclient
    - client
      - Query.py
      - Endpoint.py
      - cccli.py
      - Responder.py
      - __init__.py
      - CCParameter
        RegularExpressionParameter.py
        CommaSeparatedParameter.py
        BooleanParameter.py
        PositiveIntegerParameter.py
        NonNegativeIntegerParameter.py
        Parameter.py
        __init__.py
        TimeStampParameter.py
        SetOfChoicesParameter.py
      - ExecutionContext.py
    - util
      - print.py
      - __init__.py
    - __init__.py
  - setup.py
  - __init__.py
  - requirements.txt
- kafka-cruise-control-stop.sh

/*
 * Copyright 2017 LinkedIn Corp. Licensed under the BSD 2-Clause License (the "License"). See License in the project root for license information.
 */

package com.linkedin.kafka.cruisecontrol.detector;

import com.codahale.metrics.Gauge;
import com.codahale.metrics.MetricRegistry;
import com.linkedin.cruisecontrol.detector.Anomaly;
import com.linkedin.cruisecontrol.detector.AnomalyType;
import com.linkedin.kafka.cruisecontrol.KafkaCruiseControl;
import com.linkedin.kafka.cruisecontrol.KafkaCruiseControlUtils;
import com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig;
import com.linkedin.kafka.cruisecontrol.common.KafkaCruiseControlThreadFactory;
import com.linkedin.kafka.cruisecontrol.config.constants.AnomalyDetectorConfig;
import com.linkedin.kafka.cruisecontrol.detector.notifier.AnomalyNotificationResult;
import com.linkedin.kafka.cruisecontrol.detector.notifier.AnomalyNotifier;
import com.linkedin.kafka.cruisecontrol.detector.notifier.KafkaAnomalyType;
import com.linkedin.kafka.cruisecontrol.exception.OptimizationFailureException;
import com.linkedin.kafka.cruisecontrol.executor.ExecutorState;
import com.linkedin.kafka.cruisecontrol.monitor.task.LoadMonitorTaskRunner;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import org.apache.kafka.clients.admin.AdminClient;
import org.apache.kafka.common.utils.SystemTime;
import org.apache.kafka.common.utils.Time;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static com.linkedin.kafka.cruisecontrol.KafkaCruiseControlUtils.OPERATION_LOGGER;
import static com.linkedin.kafka.cruisecontrol.KafkaCruiseControlUtils.sanityCheckGoals;
import static com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorUtils.anomalyComparator;
import static com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorUtils.getSelfHealingGoalNames;
import static com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorUtils.SHUTDOWN_ANOMALY;
import static com.linkedin.kafka.cruisecontrol.detector.notifier.KafkaAnomalyType.*;


/**
 * The anomaly detector class that helps detect and handle anomalies.
 */
public class AnomalyDetector {
  static final String METRIC_REGISTRY_NAME = "AnomalyDetector";
  private static final int INIT_JITTER_BOUND = 10000;
  private static final long SCHEDULER_SHUTDOWN_TIMEOUT_MS = 5000L;
  // For each anomaly type, one thread is needed to run corresponding anomaly detector.
  // One more thread is needed to run anomaly handler task.
  private static final int NUM_ANOMALY_DETECTION_THREADS = KafkaAnomalyType.cachedValues().size() + 1;
  private static final int ANOMALY_QUEUE_INITIAL_CAPACITY = 10;
  private static final Logger LOG = LoggerFactory.getLogger(AnomalyDetector.class);
  private static final Logger OPERATION_LOG = LoggerFactory.getLogger(OPERATION_LOGGER);
  private final KafkaCruiseControl _kafkaCruiseControl;
  private final AnomalyNotifier _anomalyNotifier;
  private final AdminClient _adminClient;
  // Detectors
  private final GoalViolationDetector _goalViolationDetector;
  private final BrokerFailureDetector _brokerFailureDetector;
  private final MetricAnomalyDetector _metricAnomalyDetector;
  private final DiskFailureDetector _diskFailureDetector;
  private final TopicAnomalyDetector _topicAnomalyDetector;
  private final ScheduledExecutorService _detectorScheduler;
  private final Map<KafkaAnomalyType, Long> _anomalyDetectionIntervalMsByType;
  private final long _brokerFailureDetectionBackoffMs;
  private final PriorityBlockingQueue<Anomaly> _anomalies;
  private volatile boolean _shutdown;
  private final AnomalyDetectorState _anomalyDetectorState;
  private final List<String> _selfHealingGoals;
  private final ExecutorService _anomalyLoggerExecutor;
  private volatile Anomaly _anomalyInProgress;
  private final AtomicLong _numCheckedWithDelay;
  private final Object _shutdownLock;

  public AnomalyDetector(KafkaCruiseControl kafkaCruiseControl,
                         Time time,
                         MetricRegistry dropwizardMetricRegistry) {
    // For anomalies of different types, prioritize handling anomaly of higher priority;
    // otherwise, handle anomaly in order of detected time.
    _anomalies = new PriorityBlockingQueue<>(ANOMALY_QUEUE_INITIAL_CAPACITY, anomalyComparator());
    KafkaCruiseControlConfig config = kafkaCruiseControl.config();
    _adminClient = KafkaCruiseControlUtils.createAdminClient(KafkaCruiseControlUtils.parseAdminClientConfigs(config));
    Long anomalyDetectionIntervalMs = config.getLong(AnomalyDetectorConfig.ANOMALY_DETECTION_INTERVAL_MS_CONFIG);
    _anomalyDetectionIntervalMsByType = new HashMap<>(KafkaAnomalyType.cachedValues().size() - 1);
    Long goalViolationDetectionIntervalMs = config.getLong(AnomalyDetectorConfig.GOAL_VIOLATION_DETECTION_INTERVAL_MS_CONFIG);
    _anomalyDetectionIntervalMsByType.put(GOAL_VIOLATION, goalViolationDetectionIntervalMs == null ? anomalyDetectionIntervalMs
                                                                                                   : goalViolationDetectionIntervalMs);
    Long metricAnomalyDetectionIntervalMs = config.getLong(AnomalyDetectorConfig.METRIC_ANOMALY_DETECTION_INTERVAL_MS_CONFIG);
    _anomalyDetectionIntervalMsByType.put(METRIC_ANOMALY, metricAnomalyDetectionIntervalMs == null ? anomalyDetectionIntervalMs
                                                                                                   : metricAnomalyDetectionIntervalMs);
    Long topicAnomalyDetectionIntervalMs = config.getLong(AnomalyDetectorConfig.TOPIC_ANOMALY_DETECTION_INTERVAL_MS_CONFIG);
    _anomalyDetectionIntervalMsByType.put(TOPIC_ANOMALY, topicAnomalyDetectionIntervalMs == null ? anomalyDetectionIntervalMs
                                                                                                 : topicAnomalyDetectionIntervalMs);
    Long diskFailureDetectionIntervalMs = config.getLong(AnomalyDetectorConfig.DISK_FAILURE_DETECTION_INTERVAL_MS_CONFIG);
    _anomalyDetectionIntervalMsByType.put(DISK_FAILURE, diskFailureDetectionIntervalMs == null ? anomalyDetectionIntervalMs
                                                                                               : diskFailureDetectionIntervalMs);
    _brokerFailureDetectionBackoffMs = config.getLong(AnomalyDetectorConfig.BROKER_FAILURE_DETECTION_BACKOFF_MS_CONFIG);
    _anomalyNotifier = config.getConfiguredInstance(AnomalyDetectorConfig.ANOMALY_NOTIFIER_CLASS_CONFIG,
                                                    AnomalyNotifier.class);
    _kafkaCruiseControl = kafkaCruiseControl;
    _selfHealingGoals = getSelfHealingGoalNames(config);
    sanityCheckGoals(_selfHealingGoals, false, config);
    _goalViolationDetector = new GoalViolationDetector(_anomalies, _kafkaCruiseControl);
    _brokerFailureDetector = new BrokerFailureDetector(_anomalies, _kafkaCruiseControl);
    _metricAnomalyDetector = new MetricAnomalyDetector(_anomalies, _kafkaCruiseControl);
    _diskFailureDetector = new DiskFailureDetector(_adminClient, _anomalies, _kafkaCruiseControl);
    _topicAnomalyDetector = new TopicAnomalyDetector(_anomalies, _kafkaCruiseControl);
    _detectorScheduler = Executors.newScheduledThreadPool(NUM_ANOMALY_DETECTION_THREADS,
                                                          new KafkaCruiseControlThreadFactory(METRIC_REGISTRY_NAME, false, LOG));
    _shutdown = false;
    // Add anomaly detector state
    int numCachedRecentAnomalyStates = config.getInt(AnomalyDetectorConfig.NUM_CACHED_RECENT_ANOMALY_STATES_CONFIG);
    _anomalyLoggerExecutor =
        Executors.newSingleThreadScheduledExecutor(new KafkaCruiseControlThreadFactory("AnomalyLogger", true, null));
    _anomalyInProgress = null;
    _numCheckedWithDelay = new AtomicLong();
    _shutdownLock = new Object();
    // Register gauge sensors.
    registerGaugeSensors(dropwizardMetricRegistry);
    _anomalyDetectorState = new AnomalyDetectorState(time,
                                                     _anomalyNotifier.selfHealingEnabled(),
                                                     numCachedRecentAnomalyStates,
                                                     dropwizardMetricRegistry);
  }

  /**
   * Package private constructor for unit test.
   */
  AnomalyDetector(PriorityBlockingQueue<Anomaly> anomalies,
                  AdminClient adminClient,
                  long anomalyDetectionIntervalMs,
                  KafkaCruiseControl kafkaCruiseControl,
                  AnomalyNotifier anomalyNotifier,
                  GoalViolationDetector goalViolationDetector,
                  BrokerFailureDetector brokerFailureDetector,
                  MetricAnomalyDetector metricAnomalyDetector,
                  DiskFailureDetector diskFailureDetector,
                  TopicAnomalyDetector topicAnomalyDetector,
                  ScheduledExecutorService detectorScheduler) {
    _anomalies = anomalies;
    _adminClient = adminClient;
    _anomalyDetectionIntervalMsByType = new HashMap<>(KafkaAnomalyType.cachedValues().size() - 1);
    KafkaAnomalyType.cachedValues().stream().filter(type -> type != BROKER_FAILURE)
                    .forEach(type -> _anomalyDetectionIntervalMsByType.put(type, anomalyDetectionIntervalMs));

    _brokerFailureDetectionBackoffMs = anomalyDetectionIntervalMs;
    _anomalyNotifier = anomalyNotifier;
    _goalViolationDetector = goalViolationDetector;
    _brokerFailureDetector = brokerFailureDetector;
    _metricAnomalyDetector = metricAnomalyDetector;
    _diskFailureDetector = diskFailureDetector;
    _topicAnomalyDetector = topicAnomalyDetector;
    _kafkaCruiseControl = kafkaCruiseControl;
    _detectorScheduler = detectorScheduler;
    _shutdown = false;
    _selfHealingGoals = Collections.emptyList();
    _anomalyLoggerExecutor =
        Executors.newSingleThreadScheduledExecutor(new KafkaCruiseControlThreadFactory("AnomalyLogger", true, null));
    _anomalyInProgress = null;
    _numCheckedWithDelay = new AtomicLong();
    _shutdownLock = new Object();
    // Add anomaly detector state
    _anomalyDetectorState = new AnomalyDetectorState(new SystemTime(), new HashMap<>(KafkaAnomalyType.cachedValues().size()), 10, null);
  }

  /**
   * Register gauge sensors.
   */
  private void registerGaugeSensors(MetricRegistry dropwizardMetricRegistry) {
    dropwizardMetricRegistry.register(MetricRegistry.name(METRIC_REGISTRY_NAME, "balancedness-score"),
                                      (Gauge<Double>) _goalViolationDetector::balancednessScore);

    // Self-Healing is turned on/off. 1/0 metric for each of the self-healing options.
    for (KafkaAnomalyType anomalyType : KafkaAnomalyType.cachedValues()) {
      dropwizardMetricRegistry.register(MetricRegistry.name(METRIC_REGISTRY_NAME,
                                                            String.format("%s-self-healing-enabled", anomalyType.toString().toLowerCase())),
                                        (Gauge<Integer>) () -> _anomalyNotifier.selfHealingEnabled().get(anomalyType) ? 1 : 0);
    }
  }

  /**
   * Start each anomaly detector.
   */
  public void startDetection() {
    LOG.info("Starting anomaly detector.");
    _brokerFailureDetector.startDetection();
    int jitter = new Random().nextInt(INIT_JITTER_BOUND);
    LOG.debug("Starting goal violation detector with delay of {} ms", jitter);
    long goalViolationDetectionIntervalMs = _anomalyDetectionIntervalMsByType.get(GOAL_VIOLATION);
    _detectorScheduler.scheduleAtFixedRate(_goalViolationDetector,
                                           goalViolationDetectionIntervalMs / 2 + jitter,
                                           goalViolationDetectionIntervalMs,
                                           TimeUnit.MILLISECONDS);
    jitter = new Random().nextInt(INIT_JITTER_BOUND);
    long metricAnomalyDetectionIntervalMs = _anomalyDetectionIntervalMsByType.get(METRIC_ANOMALY);
    LOG.debug("Starting metric anomaly detector with delay of {} ms", jitter);
    _detectorScheduler.scheduleAtFixedRate(_metricAnomalyDetector,
                                           metricAnomalyDetectionIntervalMs / 2 + jitter,
                                           metricAnomalyDetectionIntervalMs,
                                           TimeUnit.MILLISECONDS);
    jitter = new Random().nextInt(INIT_JITTER_BOUND);
    long topicAnomalyDetectionIntervalMs = _anomalyDetectionIntervalMsByType.get(TOPIC_ANOMALY);
    LOG.debug("Starting topic anomaly detector with delay of {} ms", jitter);
    _detectorScheduler.scheduleAtFixedRate(_topicAnomalyDetector,
                                           topicAnomalyDetectionIntervalMs / 2 + jitter,
                                           topicAnomalyDetectionIntervalMs,
                                           TimeUnit.MILLISECONDS);
    jitter = new Random().nextInt(INIT_JITTER_BOUND);
    long diskFailureDetectionIntervalMs = _anomalyDetectionIntervalMsByType.get(DISK_FAILURE);
    LOG.debug("Starting disk failure detector with delay of {} ms", jitter);
    _detectorScheduler.scheduleAtFixedRate(_diskFailureDetector,
                                           diskFailureDetectionIntervalMs / 2 + jitter,
                                           diskFailureDetectionIntervalMs,
                                           TimeUnit.MILLISECONDS);
    _detectorScheduler.submit(new AnomalyHandlerTask());
  }

  /**
   * Shutdown the anomaly detector.
   * Note that if a fix is being started as shutdown is requested, shutdown will wait until the fix is initiated.
   */
  public void shutdown() {
    LOG.info("Shutting down anomaly detector.");
    synchronized (_shutdownLock) {
      _shutdown = true;
    }
    // SHUTDOWN_ANOMALY is a broker failure with detection time set to 0ms. Here we expect it is added to the front of the
    // priority queue and notify anomaly handler immediately.
    _anomalies.add(SHUTDOWN_ANOMALY);
    _detectorScheduler.shutdown();
    KafkaCruiseControlUtils.closeAdminClientWithTimeout(_adminClient);
    try {
      _detectorScheduler.awaitTermination(SCHEDULER_SHUTDOWN_TIMEOUT_MS, TimeUnit.MILLISECONDS);
      if (!_detectorScheduler.isTerminated()) {
        LOG.warn("The sampling scheduler failed to shutdown in " + SCHEDULER_SHUTDOWN_TIMEOUT_MS + " ms.");
      }
    } catch (InterruptedException e) {
      LOG.warn("Interrupted while waiting for anomaly detector to shutdown.");
    }
    _brokerFailureDetector.shutdown();
    _anomalyLoggerExecutor.shutdownNow();
    LOG.info("Anomaly detector shutdown completed.");
  }

  /**
   * @return Anomaly detector state.
   */
  public synchronized AnomalyDetectorState anomalyDetectorState() {
    _anomalyDetectorState.refreshMetrics(_anomalyNotifier.selfHealingEnabledRatio(), _goalViolationDetector.balancednessScore());
    return _anomalyDetectorState;
  }

  /**
   * @return Number of anomaly fixes started by the anomaly detector for self healing.
   */
  long numSelfHealingStarted() {
    return _anomalyDetectorState.numSelfHealingStarted();
  }

  /**
   * @return Number of anomaly fixes failed to start despite the anomaly in progress being ready to fix. This typically
   * indicates the need for expanding the cluster or relaxing the constraints of self-healing goals.
   */
  long numSelfHealingFailedToStart() {
    return _anomalyDetectorState.numSelfHealingFailedToStart();
  }

  /**
   * See {@link AnomalyDetectorState#maybeClearOngoingAnomalyDetectionTimeMs}.
   */
  public void maybeClearOngoingAnomalyDetectionTimeMs() {
    _anomalyDetectorState.maybeClearOngoingAnomalyDetectionTimeMs();
  }

  /**
   * See {@link AnomalyDetectorState#resetHasUnfixableGoals}.
   */
  public void resetHasUnfixableGoals() {
    _anomalyDetectorState.resetHasUnfixableGoals();
  }

  /**
   * (1) Enable or disable self healing for the given anomaly type and (2) update the cached anomaly detector state.
   *
   * @param anomalyType Type of anomaly for which to enable or disable self healing.
   * @param isSelfHealingEnabled True if self healing is enabled, false otherwise.
   * @return The old value of self healing for the given anomaly type.
   */
  public boolean setSelfHealingFor(AnomalyType anomalyType, boolean isSelfHealingEnabled) {
    boolean oldSelfHealingEnabled = _anomalyNotifier.setSelfHealingFor(anomalyType, isSelfHealingEnabled);
    _anomalyDetectorState.setSelfHealingFor(anomalyType, isSelfHealingEnabled);

    return oldSelfHealingEnabled;
  }

  /**
   * @return Number of anomalies checked with delay.
   */
  public long numCheckedWithDelay() {
    return _numCheckedWithDelay.get();
  }

  /**
   * Update anomaly status once associated self-healing operation has finished.
   *
   * @param anomalyId Unique id of anomaly which triggered self-healing operation.
   */
  public void markSelfHealingFinished(String anomalyId) {
    LOG.debug("Self healing with id {} has finished.", anomalyId);
    _anomalyDetectorState.markSelfHealingFinished(anomalyId);
  }

  /**
   * A class that handles all the anomalies.
   */
  class AnomalyHandlerTask implements Runnable {
    @Override
    public void run() {
      LOG.info("Starting anomaly handler");
      while (true) {
        // In case handling the anomaly in progress fails, do some post processing.
        boolean postProcessAnomalyInProgress = false;
        _anomalyInProgress = null;
        try {
          _anomalyInProgress = _anomalies.take();
          LOG.trace("Processing anomaly {}.", _anomalyInProgress);
          if (_anomalyInProgress == SHUTDOWN_ANOMALY) {
            // Service has shutdown.
            _anomalyInProgress = null;
            break;
          }
          handleAnomalyInProgress();
        } catch (InterruptedException e) {
          LOG.debug("Received interrupted exception.", e);
          postProcessAnomalyInProgress = true;
        } catch (OptimizationFailureException ofe) {
          LOG.warn("Encountered optimization failure when trying to fix the anomaly {}.", _anomalyInProgress, ofe);
          // If self-healing failed due to an optimization failure, that indicates a hard goal violation; hence there is
          // no further processing anomaly detector can do without human intervention for the anomaly (i.e. other than
          // what has already been done in the {@link #handlePostFixAnomaly(boolean, boolean, String)}).
          postProcessAnomalyInProgress = false;
        } catch (IllegalStateException ise) {
          LOG.warn("Unexpected state prevents anomaly detector from handling the anomaly {}.", _anomalyInProgress, ise);
          // An illegal state may indicate a transient process blocking self-healing (e.g. an ongoing execution not
          // started by Cruise Control).
          postProcessAnomalyInProgress = false;
        } catch (Throwable t) {
          LOG.error("Uncaught exception in anomaly handler.", t);
          postProcessAnomalyInProgress = true;
        }
        if (postProcessAnomalyInProgress) {
          LOG.info("Post processing anomaly {}.", _anomalyInProgress);
          postProcessAnomalyInProgress(_brokerFailureDetectionBackoffMs);
        }
      }
      LOG.info("Anomaly handler exited.");
    }

    private void handleAnomalyInProgress() throws Exception {
      // Add anomaly detection to anomaly detector state.
      AnomalyType anomalyType = _anomalyInProgress.anomalyType();
      _anomalyDetectorState.addAnomalyDetection(anomalyType, _anomalyInProgress);

      // We schedule a delayed check if the executor is doing some work.
      ExecutorState.State executionState = _kafkaCruiseControl.executionState();
      if (executionState != ExecutorState.State.NO_TASK_IN_PROGRESS) {
        LOG.info("Post processing anomaly {} because executor is in {} state.", _anomalyInProgress, executionState);
        postProcessAnomalyInProgress(_brokerFailureDetectionBackoffMs);
      } else {
        processAnomalyInProgress(anomalyType);
      }
    }

    /**
     * Processes the anomaly based on the notification result.
     *
     * @param anomalyType The type of the ongoing anomaly
     */
    private void processAnomalyInProgress(AnomalyType anomalyType) throws Exception {
      _anomalyDetectorState.markAnomalyRate(anomalyType);
      // Call the anomaly notifier to see if an action is requested.
      AnomalyNotificationResult notificationResult = notifyAnomalyInProgress(anomalyType);
      if (notificationResult != null) {
        _anomalyDetectorState.maybeSetOngoingAnomalyDetectionTimeMs();
        switch (notificationResult.action()) {
          case FIX:
            fixAnomalyInProgress(anomalyType);
            break;
          case CHECK:
            LOG.info("Post processing anomaly {} for {}.", _anomalyInProgress, AnomalyState.Status.CHECK_WITH_DELAY);
            postProcessAnomalyInProgress(notificationResult.delay());
            break;
          case IGNORE:
            _anomalyDetectorState.onAnomalyHandle(_anomalyInProgress, AnomalyState.Status.IGNORED);
            break;
          default:
            throw new IllegalStateException("Unrecognized anomaly notification result.");
        }
      }
    }

    /**
     * Call the {@link AnomalyNotifier} handler corresponding to the type of {@link #_anomalyInProgress} to get the
     * notification result.
     *
     * @param anomalyType The type of the {@link #_anomalyInProgress}.
     * @return The notification result corresponding to the {@link #_anomalyInProgress}.
     */
    private AnomalyNotificationResult notifyAnomalyInProgress(AnomalyType anomalyType) {
      // Call the anomaly notifier to see if a fix is desired.
      AnomalyNotificationResult notificationResult;
      switch ((KafkaAnomalyType) anomalyType) {
        case GOAL_VIOLATION:
          GoalViolations goalViolations = (GoalViolations) _anomalyInProgress;
          notificationResult = _anomalyNotifier.onGoalViolation(goalViolations);
          _anomalyDetectorState.refreshHasUnfixableGoal(goalViolations);
          break;
        case BROKER_FAILURE:
          BrokerFailures brokerFailures = (BrokerFailures) _anomalyInProgress;
          notificationResult = _anomalyNotifier.onBrokerFailure(brokerFailures);
          break;
        case METRIC_ANOMALY:
          KafkaMetricAnomaly metricAnomaly = (KafkaMetricAnomaly) _anomalyInProgress;
          notificationResult = _anomalyNotifier.onMetricAnomaly(metricAnomaly);
          break;
        case DISK_FAILURE:
          DiskFailures diskFailures = (DiskFailures) _anomalyInProgress;
          notificationResult = _anomalyNotifier.onDiskFailure(diskFailures);
          break;
        case TOPIC_ANOMALY:
          TopicAnomaly topicAnomaly = (TopicAnomaly) _anomalyInProgress;
          notificationResult = _anomalyNotifier.onTopicAnomaly(topicAnomaly);
          break;
        default:
          throw new IllegalStateException("Unrecognized anomaly type.");
      }
      LOG.debug("Received notification result {}", notificationResult);

      return notificationResult;
    }

    /**
     * Updates the state of the anomaly in progress and if the anomaly is a {@link KafkaAnomalyType#BROKER_FAILURE}, then it
     * schedules a broker failure detection after the given delay.
     *
     * @param delayMs The delay for broker failure detection.
     */
    private void postProcessAnomalyInProgress(long delayMs) {
      // Anomaly detector does delayed check for broker failures, otherwise it ignores the anomaly.
      if (_anomalyInProgress.anomalyType() == KafkaAnomalyType.BROKER_FAILURE) {
        synchronized (_shutdownLock) {
          if (_shutdown) {
            LOG.debug("Skip delayed checking anomaly {}, because anomaly detector is shutting down.", _anomalyInProgress);
          } else {
            LOG.debug("Scheduling broker failure detection with delay of {} ms", delayMs);
            _numCheckedWithDelay.incrementAndGet();
            _detectorScheduler.schedule(() -> _brokerFailureDetector.detectBrokerFailures(false), delayMs, TimeUnit.MILLISECONDS);
            _anomalyDetectorState.onAnomalyHandle(_anomalyInProgress, AnomalyState.Status.CHECK_WITH_DELAY);
          }
        }
      } else {
        _anomalyDetectorState.onAnomalyHandle(_anomalyInProgress, AnomalyState.Status.IGNORED);
      }
    }

    /**
     * Check whether the anomaly in progress is ready for fix. An anomaly is ready if it (1) meets completeness
     * requirements and (2) load monitor is not in an unexpected state.
     *
     * @return True if ready for a fix, false otherwise.
     */
    private boolean isAnomalyInProgressReadyToFix(AnomalyType anomalyType) {
      LoadMonitorTaskRunner.LoadMonitorTaskRunnerState loadMonitorTaskRunnerState = _kafkaCruiseControl.getLoadMonitorTaskRunnerState();

      // Fixing anomalies is possible only when (1) the state is not in and unavailable state ( e.g. loading or
      // bootstrapping) and (2) the completeness requirements are met for all goals.
      if (!AnomalyUtils.isLoadMonitorReady(loadMonitorTaskRunnerState)) {
        LOG.info("Skipping {} fix because load monitor is in {} state.", anomalyType, loadMonitorTaskRunnerState);
        _anomalyDetectorState.onAnomalyHandle(_anomalyInProgress, AnomalyState.Status.LOAD_MONITOR_NOT_READY);
      } else {
        if (_kafkaCruiseControl.meetCompletenessRequirements(_selfHealingGoals)) {
          return true;
        } else {
          LOG.warn("Skipping {} fix because load completeness requirement is not met for goals.", anomalyType);
          _anomalyDetectorState.onAnomalyHandle(_anomalyInProgress, AnomalyState.Status.COMPLETENESS_NOT_READY);
        }
      }
      return false;
    }

    private void logSelfHealingOperation(String anomalyId, OptimizationFailureException ofe, String optimizationResult) {
      if (optimizationResult != null) {
        OPERATION_LOG.info("[{}] Self-healing started successfully:\n{}", anomalyId, optimizationResult);
      } else if (ofe != null) {
        OPERATION_LOG.warn("[{}] Self-healing failed to start:\n{}", anomalyId, ofe);
      } else {
        OPERATION_LOG.warn("[{}] Self-healing failed to start due to inability to optimize combined self-healing goals ({}).",
                           anomalyId, _selfHealingGoals);
      }
    }

    private void fixAnomalyInProgress(AnomalyType anomalyType) throws Exception {
      synchronized (_shutdownLock) {
        if (_shutdown) {
          LOG.info("Skip fixing anomaly {}, because anomaly detector is shutting down.", _anomalyInProgress);
        } else {
          boolean isReadyToFix = isAnomalyInProgressReadyToFix(anomalyType);
          boolean fixStarted = false;
          String anomalyId = _anomalyInProgress.anomalyId();
          // Upon post-handling the anomaly, skip reporting broker failure if the failed brokers have not changed.
          boolean skipReportingIfNotUpdated = false;
          try {
            if (isReadyToFix) {
              LOG.info("Generating a fix for the anomaly {}.", _anomalyInProgress);
              fixStarted = _anomalyInProgress.fix();
              LOG.info("{} the anomaly {}.", fixStarted ? "Fixing" : "Cannot fix", _anomalyInProgress);
              String optimizationResult = fixStarted ? _anomalyInProgress.optimizationResult(false) : null;
              _anomalyLoggerExecutor.submit(() -> logSelfHealingOperation(anomalyId, null, optimizationResult));
            }
          } catch (OptimizationFailureException ofe) {
            _anomalyLoggerExecutor.submit(() -> logSelfHealingOperation(anomalyId, ofe, null));
            skipReportingIfNotUpdated = anomalyType == KafkaAnomalyType.BROKER_FAILURE;
            throw ofe;
          } finally {
            handlePostFixAnomaly(isReadyToFix, fixStarted, anomalyId, skipReportingIfNotUpdated);
          }
        }
      }
    }

    private void handlePostFixAnomaly(boolean isReadyToFix, boolean fixStarted, String anomalyId, boolean skipReportingIfNotUpdated) {
      if (isReadyToFix) {
        _anomalyDetectorState.onAnomalyHandle(_anomalyInProgress, fixStarted ? AnomalyState.Status.FIX_STARTED
                                                                             : AnomalyState.Status.FIX_FAILED_TO_START);
        if (fixStarted) {
          _anomalyDetectorState.incrementNumSelfHealingStarted();
          LOG.info("[{}] Self-healing started successfully.", anomalyId);
        } else {
          _anomalyDetectorState.incrementNumSelfHealingFailedToStart();
          LOG.warn("[{}] Self-healing failed to start.", anomalyId);
        }
      }
      if (LOG.isDebugEnabled()) {
        LOG.debug("Clearing {} anomalies and scheduling a broker failure detection in {}ms.", _anomalies.size(),
                  isReadyToFix ? 0L : _brokerFailureDetectionBackoffMs);
      }
      _anomalies.clear();
      // Explicitly detect broker failures after clearing the queue. This ensures that anomaly detector does not miss
      // broker failures upon (1) fixing another anomaly, or (2) having broker failures that are not yet ready for fix.
      // We don't need to worry about other anomaly types because they run periodically.
      // If there has not been any failed brokers at the time of detecting broker failures, this is a no-op. Otherwise,
      // the call will create a broker failure anomaly. Depending on the time of the first broker failure in that anomaly,
      // it will trigger either a delayed check or a fix.
      _detectorScheduler.schedule(() -> _brokerFailureDetector.detectBrokerFailures(skipReportingIfNotUpdated),
                                  isReadyToFix ? 0L : _brokerFailureDetectionBackoffMs, TimeUnit.MILLISECONDS);
    }
  }
}