/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.dispatcher.runner;

import org.apache.flink.api.common.JobID;
import org.apache.flink.runtime.client.DuplicateJobSubmissionException;
import org.apache.flink.runtime.concurrent.FutureUtils;
import org.apache.flink.runtime.dispatcher.Dispatcher;
import org.apache.flink.runtime.dispatcher.DispatcherGateway;
import org.apache.flink.runtime.dispatcher.DispatcherId;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobmanager.JobGraphStore;
import org.apache.flink.runtime.rpc.FatalErrorHandler;
import org.apache.flink.runtime.rpc.RpcUtils;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.FlinkRuntimeException;
import org.apache.flink.util.Preconditions;
import org.apache.flink.util.function.FunctionUtils;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.Executor;

/**
 * Process which encapsulates the job recovery logic and life cycle management of a
 * {@link Dispatcher}.
 */
public class SessionDispatcherLeaderProcess extends AbstractDispatcherLeaderProcess implements JobGraphStore.JobGraphListener {

	private final DispatcherGatewayServiceFactory dispatcherGatewayServiceFactory;

	private final JobGraphStore jobGraphStore;

	private final Executor ioExecutor;

	private CompletableFuture<Void> onGoingRecoveryOperation = FutureUtils.completedVoidFuture();

	private SessionDispatcherLeaderProcess(
			UUID leaderSessionId,
			DispatcherGatewayServiceFactory dispatcherGatewayServiceFactory,
			JobGraphStore jobGraphStore,
			Executor ioExecutor,
			FatalErrorHandler fatalErrorHandler) {
		super(leaderSessionId, fatalErrorHandler);

		this.dispatcherGatewayServiceFactory = dispatcherGatewayServiceFactory;
		this.jobGraphStore = jobGraphStore;
		this.ioExecutor = ioExecutor;
	}

	@Override
	protected void onStart() {
		startServices();

		onGoingRecoveryOperation = recoverJobsAsync()
			.thenAccept(this::createDispatcherIfRunning)
			.handle(this::onErrorIfRunning);
	}

	private void startServices() {
		try {
			jobGraphStore.start(this);
		} catch (Exception e) {
			throw new FlinkRuntimeException(
				String.format(
					"Could not start %s when trying to start the %s.",
					jobGraphStore.getClass().getSimpleName(),
					getClass().getSimpleName()),
				e);
		}
	}

	private void createDispatcherIfRunning(Collection<JobGraph> jobGraphs) {
		runIfStateIs(State.RUNNING, () -> createDispatcher(jobGraphs));
	}

	private void createDispatcher(Collection<JobGraph> jobGraphs) {

		final DispatcherGatewayService dispatcherService = dispatcherGatewayServiceFactory.create(
			DispatcherId.fromUuid(getLeaderSessionId()),
			jobGraphs,
			jobGraphStore);

		completeDispatcherSetup(dispatcherService);
	}

	private CompletableFuture<Collection<JobGraph>> recoverJobsAsync() {
		return CompletableFuture.supplyAsync(
			this::recoverJobsIfRunning,
			ioExecutor);
	}

	private Collection<JobGraph> recoverJobsIfRunning() {
		return supplyUnsynchronizedIfRunning(this::recoverJobs).orElse(Collections.emptyList());

	}

	private Collection<JobGraph> recoverJobs() {
		log.info("Recover all persisted job graphs.");
		final Collection<JobID> jobIds = getJobIds();
		final Collection<JobGraph> recoveredJobGraphs = new ArrayList<>();

		for (JobID jobId : jobIds) {
			recoveredJobGraphs.add(recoverJob(jobId));
		}

		log.info("Successfully recovered {} persisted job graphs.", recoveredJobGraphs.size());

		return recoveredJobGraphs;
	}

	private Collection<JobID> getJobIds() {
		try {
			return jobGraphStore.getJobIds();
		} catch (Exception e) {
			throw new FlinkRuntimeException(
				"Could not retrieve job ids of persisted jobs.",
				e);
		}
	}

	private JobGraph recoverJob(JobID jobId) {
		log.info("Trying to recover job with job id {}.", jobId);
		try {
			return jobGraphStore.recoverJobGraph(jobId);
		} catch (Exception e) {
			throw new FlinkRuntimeException(
				String.format("Could not recover job with job id %s.", jobId),
				e);
		}
	}

	@Override
	protected CompletableFuture<Void> onClose() {
		return CompletableFuture.runAsync(
			this::stopServices,
			ioExecutor);
	}

	private void stopServices() {
		try {
			jobGraphStore.stop();
		} catch (Exception e) {
			ExceptionUtils.rethrow(e);
		}
	}

	// ------------------------------------------------------------
	// JobGraphListener
	// ------------------------------------------------------------

	@Override
	public void onAddedJobGraph(JobID jobId) {
		runIfStateIs(
			State.RUNNING,
			() -> handleAddedJobGraph(jobId));
	}

	private void handleAddedJobGraph(JobID jobId) {
		log.debug(
			"Job {} has been added to the {} by another process.",
			jobId,
			jobGraphStore.getClass().getSimpleName());

		// serialize all ongoing recovery operations
		onGoingRecoveryOperation = onGoingRecoveryOperation
			.thenApplyAsync(
				ignored -> recoverJobIfRunning(jobId),
				ioExecutor)
			.thenCompose(optionalJobGraph -> optionalJobGraph
				.flatMap(this::submitAddedJobIfRunning)
				.orElse(FutureUtils.completedVoidFuture()))
			.handle(this::onErrorIfRunning);
	}

	private Optional<CompletableFuture<Void>> submitAddedJobIfRunning(JobGraph jobGraph) {
		return supplyIfRunning(() -> submitAddedJob(jobGraph));
	}

	private CompletableFuture<Void> submitAddedJob(JobGraph jobGraph) {
		final DispatcherGateway dispatcherGateway = getDispatcherGatewayInternal();

		return dispatcherGateway
			.submitJob(jobGraph, RpcUtils.INF_TIMEOUT)
			.thenApply(FunctionUtils.nullFn())
			.exceptionally(this::filterOutDuplicateJobSubmissionException);
	}

	private Void filterOutDuplicateJobSubmissionException(Throwable throwable) {
		final Throwable strippedException = ExceptionUtils.stripCompletionException(throwable);
		if (strippedException instanceof DuplicateJobSubmissionException) {
			final DuplicateJobSubmissionException duplicateJobSubmissionException = (DuplicateJobSubmissionException) strippedException;

			log.debug("Ignore recovered job {} because the job is currently being executed.", duplicateJobSubmissionException.getJobID(), duplicateJobSubmissionException);

			return null;
		} else {
			throw new CompletionException(throwable);
		}
	}

	private DispatcherGateway getDispatcherGatewayInternal() {
		return Preconditions.checkNotNull(getDispatcherGateway().getNow(null));
	}

	private Optional<JobGraph> recoverJobIfRunning(JobID jobId) {
		return supplyUnsynchronizedIfRunning(() -> recoverJob(jobId));
	}

	@Override
	public void onRemovedJobGraph(JobID jobId) {
		runIfStateIs(
			State.RUNNING,
			() -> handleRemovedJobGraph(jobId));
	}

	private void handleRemovedJobGraph(JobID jobId) {
		log.debug(
			"Job {} has been removed from the {} by another process.",
			jobId,
			jobGraphStore.getClass().getSimpleName());

		onGoingRecoveryOperation = onGoingRecoveryOperation
			.thenCompose(ignored -> removeJobGraphIfRunning(jobId).orElse(FutureUtils.completedVoidFuture()))
			.handle(this::onErrorIfRunning);
	}

	private Optional<CompletableFuture<Void>> removeJobGraphIfRunning(JobID jobId) {
		return supplyIfRunning(() -> removeJobGraph(jobId));
	}

	private CompletableFuture<Void> removeJobGraph(JobID jobId) {
		return getDispatcherService().map(dispatcherService -> dispatcherService.onRemovedJobGraph(jobId))
			.orElseGet(FutureUtils::completedVoidFuture);
	}

	// ---------------------------------------------------------------
	// Factory methods
	// ---------------------------------------------------------------

	public static SessionDispatcherLeaderProcess create(
			UUID leaderSessionId,
			DispatcherGatewayServiceFactory dispatcherFactory,
			JobGraphStore jobGraphStore,
			Executor ioExecutor,
			FatalErrorHandler fatalErrorHandler) {
		return new SessionDispatcherLeaderProcess(
			leaderSessionId,
			dispatcherFactory,
			jobGraphStore,
			ioExecutor,
			fatalErrorHandler);
	}
}