/*- * -\-\- * Spotify Styx Scheduler Service * -- * Copyright (C) 2016 Spotify AB * -- * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * -/-/- */ package com.spotify.styx.docker; import static com.spotify.styx.docker.DockerRunner.LOG; import static com.spotify.styx.docker.KubernetesDockerRunner.DOCKER_TERMINATION_LOGGING_ANNOTATION; import static com.spotify.styx.docker.KubernetesDockerRunner.getMainContainerStatus; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.spotify.styx.model.Event; import com.spotify.styx.model.WorkflowInstance; import com.spotify.styx.monitoring.Stats; import com.spotify.styx.serialization.Json; import com.spotify.styx.state.RunState; import io.fabric8.kubernetes.api.model.ContainerState; import io.fabric8.kubernetes.api.model.ContainerStateTerminated; import io.fabric8.kubernetes.api.model.ContainerStatus; import io.fabric8.kubernetes.api.model.Pod; import io.fabric8.kubernetes.api.model.PodStatus; import java.io.IOException; import java.util.List; import java.util.Objects; import java.util.Optional; import okio.ByteString; final class KubernetesPodEventTranslator { private KubernetesPodEventTranslator() { throw new UnsupportedOperationException(); } private static class TerminationLogMessage { int exitCode; @JsonCreator public TerminationLogMessage( @JsonProperty(value = "exit_code", required = true) int exitCode ) { this.exitCode = exitCode; } } private static Optional<Integer> getExitCodeIfValid(String workflowInstance, Pod pod, ContainerStatus status, Stats stats) { final ContainerStateTerminated terminated = status.getState().getTerminated(); // Check termination log exit code, if available if (Optional.ofNullable(pod.getMetadata().getAnnotations()) .map(annotations -> "true".equals(annotations.get(DOCKER_TERMINATION_LOGGING_ANNOTATION))) .orElse(false)) { if (terminated.getMessage() == null) { LOG.warn("Missing termination log message for workflow instance {} container {}", workflowInstance, status.getContainerID()); stats.recordTerminationLogMissing(); } else { try { final TerminationLogMessage message = Json.deserialize( ByteString.encodeUtf8(terminated.getMessage()), TerminationLogMessage.class); if (!Objects.equals(message.exitCode, terminated.getExitCode())) { LOG.warn("Exit code mismatch for workflow instance {} container {}. Container exit code: {}. " + "Termination log exit code: {}", workflowInstance, status.getContainerID(), terminated.getExitCode(), message.exitCode); stats.recordExitCodeMismatch(); } if (terminated.getExitCode() != null && message.exitCode == 0) { // If we have a non-zero container exit code but a zero termination log exit code, // return the container exit code to indicate failure. This guards against jobs that // incorrectly write a successful exit code to the termination log _before_ running // the actual job, which then fails. We could then still incorrectly get a zero exit // code from docker, but there is not a lot we can do about that. return Optional.of(terminated.getExitCode()); } else { return Optional.of(message.exitCode); } } catch (IOException e) { stats.recordTerminationLogInvalid(); LOG.warn("Unexpected termination log message for workflow instance {} container {}", workflowInstance, status.getContainerID(), e); } } // If there's no termination log exit code, fall back to k8s exit code if it is not zero. // Rationale: It is important for users to be able to get the exit code of the container to be // able to debug failures, but at the same time we must be careful about using the use the k8s // exit code when checking whether the execution was successful as dockerd some times returns // incorrect exit codes. // TODO: consider separating execution status and debugging info in the "terminate" event. if (terminated.getExitCode() != null && terminated.getExitCode() != 0) { return Optional.of(terminated.getExitCode()); } else { return Optional.empty(); } } // No termination log expected, use k8s exit code if (terminated.getExitCode() == null) { LOG.warn("Missing exit code for workflow instance {} container {}", workflowInstance, status.getContainerID()); return Optional.empty(); } else { // there are cases k8s marks the pod failed but with exitCode 0 if ("Failed".equals(pod.getStatus().getPhase()) && terminated.getExitCode() == 0) { return Optional.empty(); } return Optional.of(terminated.getExitCode()); } } static List<Event> translate( WorkflowInstance workflowInstance, RunState state, Pod pod, Stats stats) { final Optional<ContainerStatus> mainContainerStatusOpt = getMainContainerStatus(pod); final Optional<Event> hasError = isInErrorState(workflowInstance, pod, mainContainerStatusOpt); if (hasError.isPresent()) { return handleError(state, hasError.get()); } if (isExited(pod, mainContainerStatusOpt)) { return handleExited(workflowInstance, state, pod, mainContainerStatusOpt, stats); } if (isStarted(pod, mainContainerStatusOpt)) { return handleStarted(workflowInstance, state); } return List.of(); } private static List<Event> handleExited(WorkflowInstance workflowInstance, RunState state, Pod pod, Optional<ContainerStatus> mainContainerStatusOpt, Stats stats) { final List<Event> generatedEvents = Lists.newArrayList(); switch (state.state()) { case PREPARE: case SUBMITTED: generatedEvents.add(Event.started(workflowInstance)); // intentional fall-through case RUNNING: final Optional<Integer> exitCode = mainContainerStatusOpt.flatMap(cs -> getExitCodeIfValid(workflowInstance.toKey(), pod, cs, stats)); generatedEvents.add(Event.terminate(workflowInstance, exitCode)); break; default: // no event break; } return ImmutableList.copyOf(generatedEvents); } private static List<Event> handleStarted(WorkflowInstance workflowInstance, RunState state) { switch (state.state()) { case PREPARE: case SUBMITTED: return List.of(Event.started(workflowInstance)); default: return List.of(); } } private static List<Event> handleError(RunState state, Event event) { switch (state.state()) { case PREPARE: case SUBMITTED: case RUNNING: return List.of(event); default: return List.of(); } } private static boolean isExited(Pod pod, Optional<ContainerStatus> mainContainerStatusOpt) { switch (pod.getStatus().getPhase()) { case "Running": // Check if the main container has exited if (mainContainerStatusOpt.map(ContainerStatus::getState) .map(ContainerState::getTerminated) .isPresent()) { return true; } break; case "Succeeded": case "Failed": return true; default: // do nothing break; } return false; } private static boolean isStarted(Pod pod, Optional<ContainerStatus> mainContainerStatusOpt) { return "Running".equals(pod.getStatus().getPhase()) && mainContainerStatusOpt .map(ContainerStatus::getReady).orElse(false); } private static Optional<Event> isInErrorState(WorkflowInstance workflowInstance, Pod pod, Optional<ContainerStatus> mainContainerStatusOpt) { final PodStatus status = pod.getStatus(); final String phase = status.getPhase(); if ("NodeLost".equals(pod.getStatus().getReason())) { return Optional.of(Event.runError(workflowInstance, "Lost node running pod")); } switch (phase) { case "Pending": // check if one or more docker contains failed to pull their image, a possible silent error return mainContainerStatusOpt .flatMap(KubernetesPodEventTranslator::imageError) .map(msg -> Event.runError(workflowInstance, msg)); case "Succeeded": case "Failed": if (mainContainerStatusOpt.isEmpty()) { return Optional.of(Event.runError(workflowInstance, "Could not find our container in pod")); } final ContainerStatus containerStatus = mainContainerStatusOpt.get(); final ContainerStateTerminated terminated = containerStatus.getState().getTerminated(); if (terminated == null) { return Optional.of(Event.runError(workflowInstance, "Unexpected null terminated status")); } return Optional.empty(); case "Unknown": return Optional.of(Event.runError(workflowInstance, "Pod entered Unknown phase")); default: return Optional.empty(); } } static Optional<String> imageError(ContainerStatus cs) { return Optional.ofNullable(cs.getState().getWaiting()).flatMap(waiting -> Optional.ofNullable(waiting.getReason()).flatMap(reason -> { var message = Optional.ofNullable(waiting.getMessage()).orElse(""); switch (reason) { // https://github.com/kubernetes/kubernetes/blob/8327e433590f9e867b1e31a4dc32316685695729/pkg/kubelet/images/types.go#L26 case "ImageInspectError": case "PullImageError": case "ErrImagePull": case "ErrImageNeverPull": case "ImagePullBackOff": case "RegistryUnavailable": return Optional.of(String.format("Failed to pull image %s of container %s, reason: %s, message: %s", cs.getImage(), cs.getName(), reason, message)); case "InvalidImageName": return Optional.of(String.format("Container %s has invalid image name %s, message: %s", cs.getName(), cs.getImage(), message)); default: return Optional.empty(); } })); } static boolean isTerminated(ContainerStatus cs) { return cs.getState().getTerminated() != null; } static boolean isTerminated(Pod pod) { return getMainContainerStatus(pod) .map(KubernetesPodEventTranslator::isTerminated) .orElse(false); } }