/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.jobmanager;

import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.JobSubmissionResult;
import org.apache.flink.api.common.time.Deadline;
import org.apache.flink.configuration.BlobServerOptions;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.UnmodifiableConfiguration;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.blob.BlobClient;
import org.apache.flink.runtime.blob.PermanentBlobKey;
import org.apache.flink.runtime.client.JobSubmissionException;
import org.apache.flink.runtime.clusterframework.ApplicationStatus;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.JobVertex;
import org.apache.flink.runtime.jobmaster.JobResult;
import org.apache.flink.runtime.minicluster.MiniCluster;
import org.apache.flink.runtime.testtasks.FailingBlockingInvokable;
import org.apache.flink.runtime.testtasks.NoOpInvokable;
import org.apache.flink.runtime.testutils.MiniClusterResource;
import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.TestLogger;

import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import javax.annotation.Nonnull;

import java.io.File;
import java.io.FilenameFilter;
import java.net.InetSocketAddress;
import java.time.Duration;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;

import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.fail;

/**
 * Small test to check that the {@link org.apache.flink.runtime.blob.BlobServer} cleanup is executed
 * after job termination.
 */
public class BlobsCleanupITCase extends TestLogger {

	private static final long RETRY_INTERVAL = 100L;

	@ClassRule
	public static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder();

	private static MiniClusterResource miniClusterResource;

	private static UnmodifiableConfiguration configuration;

	private static File blobBaseDir;

	@BeforeClass
	public static void setup() throws Exception {
		blobBaseDir = TEMPORARY_FOLDER.newFolder();

		Configuration cfg = new Configuration();
		cfg.setString(BlobServerOptions.STORAGE_DIRECTORY, blobBaseDir.getAbsolutePath());
		cfg.setString(ConfigConstants.RESTART_STRATEGY, "fixeddelay");
		cfg.setInteger(ConfigConstants.RESTART_STRATEGY_FIXED_DELAY_ATTEMPTS, 1);
		// BLOBs are deleted from BlobCache between 1s and 2s after last reference
		// -> the BlobCache may still have the BLOB or not (let's test both cases randomly)
		cfg.setLong(BlobServerOptions.CLEANUP_INTERVAL, 1L);

		configuration = new UnmodifiableConfiguration(cfg);

		miniClusterResource = new MiniClusterResource(new MiniClusterResourceConfiguration.Builder()
			.setNumberSlotsPerTaskManager(2)
			.setNumberTaskManagers(1)
			.setConfiguration(configuration)
			.build());

		miniClusterResource.before();
	}

	@AfterClass
	public static void teardown() {
		if (miniClusterResource != null) {
			miniClusterResource.after();
		}
	}

	/**
	 * Specifies which test case to run in {@link #testBlobServerCleanup(TestCase)}.
	 */
	private enum TestCase {
		JOB_FINISHES_SUCESSFULLY,
		JOB_IS_CANCELLED,
		JOB_FAILS,
		JOB_SUBMISSION_FAILS
	}

	/**
	 * Test cleanup for a job that finishes ordinarily.
	 */
	@Test
	public void testBlobServerCleanupFinishedJob() throws Exception {
		testBlobServerCleanup(TestCase.JOB_FINISHES_SUCESSFULLY);
	}

	/**
	 * Test cleanup for a job which is cancelled after submission.
	 */
	@Test
	public void testBlobServerCleanupCancelledJob() throws Exception {
		testBlobServerCleanup(TestCase.JOB_IS_CANCELLED);
	}

	/**
	 * Test cleanup for a job that fails (first a task fails, then the job recovers, then the whole
	 * job fails due to a limited restart policy).
	 */
	@Test
	public void testBlobServerCleanupFailedJob() throws Exception {
		testBlobServerCleanup(TestCase.JOB_FAILS);
	}

	/**
	 * Test cleanup for a job that fails job submission (emulated by an additional BLOB not being
	 * present).
	 */
	@Test
	public void testBlobServerCleanupFailedSubmission() throws Exception {
		testBlobServerCleanup(TestCase.JOB_SUBMISSION_FAILS);
	}

	private void testBlobServerCleanup(final TestCase testCase) throws Exception {
		final MiniCluster miniCluster = miniClusterResource.getMiniCluster();
		final int numTasks = 2;
		final Deadline timeout = Deadline.fromNow(Duration.ofSeconds(30L));

		final JobGraph jobGraph = createJobGraph(testCase, numTasks);
		final JobID jid = jobGraph.getJobID();

		// upload a blob
		final File tempBlob = File.createTempFile("Required", ".jar");
		final int blobPort = miniCluster.getClusterInformation().getBlobServerPort();
		List<PermanentBlobKey> keys =
			BlobClient.uploadFiles(new InetSocketAddress("localhost", blobPort),
				configuration, jid,
				Collections.singletonList(new Path(tempBlob.getAbsolutePath())));
		assertThat(keys, hasSize(1));
		jobGraph.addUserJarBlobKey(keys.get(0));

		if (testCase == TestCase.JOB_SUBMISSION_FAILS) {
			// add an invalid key so that the submission fails
			jobGraph.addUserJarBlobKey(new PermanentBlobKey());
		}

		final CompletableFuture<JobSubmissionResult> submissionFuture = miniCluster.submitJob(jobGraph);

		if (testCase == TestCase.JOB_SUBMISSION_FAILS) {
			try {
				submissionFuture.get();
				fail("Expected job submission failure.");
			} catch (ExecutionException e) {
				assertThat(ExceptionUtils.findThrowable(e, JobSubmissionException.class).isPresent(), is(true));
			}
		} else {
			final JobSubmissionResult jobSubmissionResult = submissionFuture.get();

			assertThat(jobSubmissionResult.getJobID(), is(jid));

			final CompletableFuture<JobResult> resultFuture = miniCluster.requestJobResult(jid);

			if (testCase == TestCase.JOB_FAILS) {
				// fail a task so that the job is going to be recovered (we actually do not
				// need the blocking part of the invokable and can start throwing right away)
				FailingBlockingInvokable.unblock();

				// job will get restarted, BlobCache may re-download the BLOB if already deleted
				// then the tasks will fail again and the restart strategy will finalise the job
				final JobResult jobResult = resultFuture.get();
				assertThat(jobResult.isSuccess(), is(false));
				assertThat(jobResult.getApplicationStatus(), is(ApplicationStatus.FAILED));
			} else if (testCase == TestCase.JOB_IS_CANCELLED) {

				miniCluster.cancelJob(jid);

				final JobResult jobResult = resultFuture.get();
				assertThat(jobResult.isSuccess(), is(false));
				assertThat(jobResult.getApplicationStatus(), is(ApplicationStatus.CANCELED));
			} else {
				final JobResult jobResult = resultFuture.get();
				assertThat(jobResult.isSuccess(), is(true));
			}

		}

		// both BlobServer and BlobCache should eventually delete all files

		File[] blobDirs = blobBaseDir.listFiles((dir, name) -> name.startsWith("blobStore-"));
		assertNotNull(blobDirs);
		for (File blobDir : blobDirs) {
			waitForEmptyBlobDir(blobDir, timeout.timeLeft());
		}
	}

	@Nonnull
	private JobGraph createJobGraph(TestCase testCase, int numTasks) {
		JobVertex source = new JobVertex("Source");
		if (testCase == TestCase.JOB_FAILS || testCase == TestCase.JOB_IS_CANCELLED) {
			source.setInvokableClass(FailingBlockingInvokable.class);
		} else {
			source.setInvokableClass(NoOpInvokable.class);
		}
		source.setParallelism(numTasks);

		return new JobGraph("BlobCleanupTest", source);
	}

	/**
	 * Waits until the given {@link org.apache.flink.runtime.blob.BlobService} storage directory
	 * does not contain any job-related folders any more.
	 *
	 * @param blobDir
	 * 		directory of a {@link org.apache.flink.runtime.blob.BlobServer} or {@link
	 * 		org.apache.flink.runtime.blob.BlobCacheService}
	 * @param remaining
	 * 		remaining time for this test
	 *
	 * @see org.apache.flink.runtime.blob.BlobUtils
	 */
	private static void waitForEmptyBlobDir(File blobDir, Duration remaining)
		throws InterruptedException {
		long deadline = System.currentTimeMillis() + remaining.toMillis();
		String[] blobDirContents;
		final FilenameFilter jobDirFilter = (dir, name) -> name.startsWith("job_");

		do {
			blobDirContents = blobDir.list(jobDirFilter);
			if (blobDirContents == null || blobDirContents.length == 0) {
				return;
			}
			Thread.sleep(RETRY_INTERVAL);
		} while (System.currentTimeMillis() < deadline);

		fail("Timeout while waiting for " + blobDir.getAbsolutePath() + " to become empty. Current contents: " + Arrays.toString(blobDirContents));
	}
}