Java Code Examples for org.apache.helix.task.TaskDriver#getWorkflowContext()

The following examples show how to use org.apache.helix.task.TaskDriver#getWorkflowContext() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HelixUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
static void waitJobInitialization(
    HelixManager helixManager,
    String workFlowName,
    String jobName,
    long timeoutMillis) throws Exception {
  WorkflowContext workflowContext = TaskDriver.getWorkflowContext(helixManager, workFlowName);

  // If the helix job is deleted from some other thread or a completely external process,
  // method waitJobCompletion() needs to differentiate between the cases where
  // 1) workflowContext did not get initialized ever, in which case we need to keep waiting, or
  // 2) it did get initialized but deleted soon after, in which case we should stop waiting
  // To overcome this issue, we wait here till workflowContext gets initialized
  long start = System.currentTimeMillis();
  while (workflowContext == null || workflowContext.getJobState(TaskUtil.getNamespacedJobName(workFlowName, jobName)) == null) {
    if (System.currentTimeMillis() - start > timeoutMillis) {
      log.error("Job cannot be initialized within {} milliseconds, considered as an error", timeoutMillis);
      throw new JobException("Job cannot be initialized within {} milliseconds, considered as an error");
    }
    workflowContext = TaskDriver.getWorkflowContext(helixManager, workFlowName);
    Thread.sleep(1000);
    log.info("Waiting for work flow initialization.");
  }

  log.info("Work flow {} initialized", workFlowName);
}
 
Example 2
Source File: HelixUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
static boolean isJobFinished(String workflowName, String jobName, HelixManager helixManager) {
  WorkflowContext workflowContext = TaskDriver.getWorkflowContext(helixManager, workflowName);
  if (workflowContext == null) {
    // this workflow context doesn't exist, considered as finished.
    return true;
  }

  TaskState jobState = workflowContext.getJobState(TaskUtil.getNamespacedJobName(workflowName, jobName));
  switch (jobState) {
    case STOPPED:
    case FAILED:
    case COMPLETED:
    case ABORTED:
    case TIMED_OUT:
      return true;
    default:
      return false;
  }
}
 
Example 3
Source File: HelixUtils.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Deletes the stopped Helix Workflow.
 * Caller should stop the Workflow before calling this method.
 * @param helixManager helix manager
 * @param workFlowName workflow needed to be deleted
 * @param jobName helix job name
 * @throws InterruptedException
 */
private static void deleteStoppedHelixJob(HelixManager helixManager, String workFlowName, String jobName)
    throws InterruptedException {
  WorkflowContext workflowContext = TaskDriver.getWorkflowContext(helixManager, workFlowName);
  while (workflowContext.getJobState(TaskUtil.getNamespacedJobName(workFlowName, jobName)) != STOPPED) {
    log.info("Waiting for job {} to stop...", jobName);
    workflowContext = TaskDriver.getWorkflowContext(helixManager, workFlowName);
    Thread.sleep(1000);
  }
  // deleting the entire workflow, as one workflow contains only one job
  new TaskDriver(helixManager).deleteAndWaitForCompletion(workFlowName, 10000L);
  log.info("Workflow deleted.");
}
 
Example 4
Source File: TaskTestUtil.java    From helix with Apache License 2.0 5 votes vote down vote up
public static WorkflowContext pollForWorkflowContext(TaskDriver driver, String workflowResource)
    throws InterruptedException {
  // Wait for completion.
  long st = System.currentTimeMillis();
  WorkflowContext ctx;
  do {
    ctx = driver.getWorkflowContext(workflowResource);
    Thread.sleep(100);
  } while (ctx == null && System.currentTimeMillis() < st + _default_timeout);
  Assert.assertNotNull(ctx);
  return ctx;
}
 
Example 5
Source File: WorkflowAccessor.java    From helix with Apache License 2.0 5 votes vote down vote up
@GET
@Path("{workflowId}")
public Response getWorkflow(@PathParam("clusterId") String clusterId,
    @PathParam("workflowId") String workflowId) {
  TaskDriver taskDriver = getTaskDriver(clusterId);
  WorkflowConfig workflowConfig = taskDriver.getWorkflowConfig(workflowId);
  WorkflowContext workflowContext = taskDriver.getWorkflowContext(workflowId);

  ObjectNode root = JsonNodeFactory.instance.objectNode();
  TextNode id = JsonNodeFactory.instance.textNode(workflowId);
  root.put(Properties.id.name(), id);

  ObjectNode workflowConfigNode = JsonNodeFactory.instance.objectNode();
  ObjectNode workflowContextNode = JsonNodeFactory.instance.objectNode();

  if (workflowConfig != null) {
    getWorkflowConfigNode(workflowConfigNode, workflowConfig.getRecord());
  }

  if (workflowContext != null) {
    getWorkflowContextNode(workflowContextNode, workflowContext.getRecord());
  }

  root.put(WorkflowProperties.WorkflowConfig.name(), workflowConfigNode);
  root.put(WorkflowProperties.WorkflowContext.name(), workflowContextNode);

  JobDag jobDag = workflowConfig.getJobDag();
  ArrayNode jobs = OBJECT_MAPPER.valueToTree(jobDag.getAllNodes());
  ObjectNode parentJobs = OBJECT_MAPPER.valueToTree(jobDag.getChildrenToParents());
  root.put(WorkflowProperties.Jobs.name(), jobs);
  root.put(WorkflowProperties.ParentJobs.name(), parentJobs);
  root.put(WorkflowProperties.LastScheduledTask.name(), OBJECT_MAPPER.valueToTree(taskDriver.getLastScheduledTaskExecutionInfo(workflowId)));
  return JSONRepresentation(root);
}
 
Example 6
Source File: WorkflowAccessor.java    From helix with Apache License 2.0 5 votes vote down vote up
@GET
@Path("{workflowId}/context")
public Response getWorkflowContext(@PathParam("clusterId") String clusterId,
    @PathParam("workflowId") String workflowId) {
  TaskDriver taskDriver = getTaskDriver(clusterId);
  WorkflowContext workflowContext = taskDriver.getWorkflowContext(workflowId);
  ObjectNode workflowContextNode = JsonNodeFactory.instance.objectNode();
  if (workflowContext != null) {
    getWorkflowContextNode(workflowContextNode, workflowContext.getRecord());
  }

  return JSONRepresentation(workflowContextNode);
}
 
Example 7
Source File: JobQueueResource.java    From helix with Apache License 2.0 5 votes vote down vote up
StringRepresentation getHostedEntitiesRepresentation(String clusterName, String jobQueueName)
    throws Exception {
  ZkClient zkClient =
      ResourceUtil.getAttributeFromCtx(getContext(), ResourceUtil.ContextKey.ZKCLIENT);
  HelixDataAccessor accessor =
      ClusterRepresentationUtil.getClusterDataAccessor(zkClient, clusterName);
  PropertyKey.Builder keyBuilder = accessor.keyBuilder();

  TaskDriver taskDriver = new TaskDriver(zkClient, clusterName);

  // Get job queue config
  // TODO: fix this to use workflowConfig.
  ResourceConfig jobQueueConfig = accessor.getProperty(keyBuilder.resourceConfig(jobQueueName));

  // Get job queue context
  WorkflowContext ctx = taskDriver.getWorkflowContext(jobQueueName);

  // Create the result
  ZNRecord hostedEntitiesRecord = new ZNRecord(jobQueueName);
  if (jobQueueConfig != null) {
    hostedEntitiesRecord.merge(jobQueueConfig.getRecord());
  }
  if (ctx != null) {
    hostedEntitiesRecord.merge(ctx.getRecord());
  }

  StringRepresentation representation =
      new StringRepresentation(ClusterRepresentationUtil.ZNRecordToJson(hostedEntitiesRecord),
          MediaType.APPLICATION_JSON);

  return representation;
}
 
Example 8
Source File: HelixUtils.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
static void waitJobCompletion(HelixManager helixManager, String workFlowName, String jobName,
    Optional<Long> timeoutInSeconds, Long stoppingStateTimeoutInSeconds) throws InterruptedException, TimeoutException {
  log.info("Waiting for job {} to complete...", jobName);
  long endTime = 0;
  long currentTimeMillis = System.currentTimeMillis();

  if (timeoutInSeconds.isPresent()) {
    endTime = currentTimeMillis + timeoutInSeconds.get() * 1000;
  }

  long stoppingStateEndTime = currentTimeMillis + stoppingStateTimeoutInSeconds * 1000;

  while (!timeoutInSeconds.isPresent() || System.currentTimeMillis() <= endTime) {
    WorkflowContext workflowContext = TaskDriver.getWorkflowContext(helixManager, workFlowName);
    if (workflowContext != null) {
      TaskState jobState = workflowContext.getJobState(TaskUtil.getNamespacedJobName(workFlowName, jobName));
      switch (jobState) {
        case STOPPED:
          // user requested cancellation, which is executed by executeCancellation()
          log.info("Job {} is cancelled, it will be deleted now.", jobName);
          HelixUtils.deleteStoppedHelixJob(helixManager, workFlowName, jobName);
          return;
        case FAILED:
        case COMPLETED:
        return;
        case STOPPING:
          log.info("Waiting for job {} to complete... State - {}", jobName, jobState);
          Thread.sleep(1000);
          // Workaround for a Helix bug where a job may be stuck in the STOPPING state due to an unresponsive task.
          if (System.currentTimeMillis() > stoppingStateEndTime) {
            log.info("Deleting workflow {}", workFlowName);
            new TaskDriver(helixManager).delete(workFlowName);
            log.info("Deleted workflow {}", workFlowName);
          }
          return;
        default:
          log.info("Waiting for job {} to complete... State - {}", jobName, jobState);
          Thread.sleep(1000);
      }
    } else {
      // We have waited for WorkflowContext to get initialized,
      // so it is found null here, it must have been deleted in job cancellation process.
      log.info("WorkflowContext not found. Job is probably cancelled.");
      return;
    }
  }

  throw new TimeoutException("task driver wait time [" + timeoutInSeconds + " sec] is expired.");
}
 
Example 9
Source File: GobblinHelixJobLauncherTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
public void testJobCleanup() throws Exception {
  final ConcurrentHashMap<String, Boolean> runningMap = new ConcurrentHashMap<>();

  final Properties properties = generateJobProperties(this.baseConfig, "3", "_1504201348473");
  final GobblinHelixJobLauncher gobblinHelixJobLauncher =
      new GobblinHelixJobLauncher(properties, this.helixManager, this.appWorkDir, ImmutableList.<Tag<?>>of(), runningMap,
          java.util.Optional.empty());

  final Properties properties2 = generateJobProperties(this.baseConfig, "33", "_1504201348474");
  final GobblinHelixJobLauncher gobblinHelixJobLauncher2 =
      new GobblinHelixJobLauncher(properties2, this.helixManager, this.appWorkDir, ImmutableList.<Tag<?>>of(), runningMap,
          java.util.Optional.empty());

  gobblinHelixJobLauncher.launchJob(null);
  gobblinHelixJobLauncher2.launchJob(null);

  final TaskDriver taskDriver = new TaskDriver(this.helixManager);

  final String jobIdKey1 = properties.getProperty(ConfigurationKeys.JOB_ID_KEY);
  final String jobIdKey2 = properties2.getProperty(ConfigurationKeys.JOB_ID_KEY);

  org.apache.helix.task.JobContext jobContext1 = taskDriver.getJobContext(jobIdKey1);
  org.apache.helix.task.JobContext jobContext2 = taskDriver.getJobContext(jobIdKey2);

  waitForWorkFlowStartup(taskDriver, jobIdKey1);
  waitForWorkFlowStartup(taskDriver, jobIdKey2);

  // job context should be present until close
  Assert.assertNotNull(jobContext1);
  Assert.assertNotNull(jobContext2);

  gobblinHelixJobLauncher.close();

  // workflow deleted asynchronously after close
  waitForWorkFlowCleanup(taskDriver, jobIdKey1);

  jobContext1 = taskDriver.getJobContext(jobIdKey1);

  // job context should have been deleted
  Assert.assertNull(jobContext1);

  // workflow should have been deleted
  WorkflowConfig workflowConfig  = taskDriver.getWorkflowConfig(jobIdKey1);
  Assert.assertNull(workflowConfig);

  WorkflowContext workflowContext = taskDriver.getWorkflowContext(jobIdKey1);
  Assert.assertNull(workflowContext);

  // second workflow with shared prefix should not be deleted when the first workflow is cleaned up
  workflowConfig  = taskDriver.getWorkflowConfig(jobIdKey2);
  Assert.assertNotNull(workflowConfig);

  gobblinHelixJobLauncher2.close();

  // workflow deleted asynchronously after close
  waitForWorkFlowCleanup(taskDriver, jobIdKey2);

  workflowConfig  = taskDriver.getWorkflowConfig(jobIdKey2);
  Assert.assertNull(workflowConfig);

  // check that workunit and taskstate directory for the job are cleaned up
  final File workunitsDir =
      new File(this.appWorkDir + File.separator + GobblinClusterConfigurationKeys.INPUT_WORK_UNIT_DIR_NAME
      + File.separator + jobIdKey1);

  final File taskstatesDir =
      new File(this.appWorkDir + File.separator + GobblinClusterConfigurationKeys.OUTPUT_TASK_STATE_DIR_NAME
          + File.separator + jobIdKey1);

  Assert.assertFalse(workunitsDir.exists());
  Assert.assertFalse(taskstatesDir.exists());

  // check that job.state file is cleaned up
  final File jobStateFile = new File(GobblinClusterUtils.getJobStateFilePath(true, this.appWorkDir, jobIdKey1).toString());

  Assert.assertFalse(jobStateFile.exists());
}
 
Example 10
Source File: ClusterIntegrationTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
public static Predicate<Void> isTaskStarted(HelixManager helixManager, String jobId) {
  return input -> TaskDriver.getWorkflowContext(helixManager, jobId) != null;
}
 
Example 11
Source File: TaskAdmin.java    From helix with Apache License 2.0 4 votes vote down vote up
private static void list(TaskDriver taskDriver, String workflow) {
  WorkflowConfig wCfg = taskDriver.getWorkflowConfig(workflow);
  if (wCfg == null) {
    LOG.error("Workflow " + workflow + " does not exist!");
    return;
  }
  WorkflowContext wCtx = taskDriver.getWorkflowContext(workflow);

  LOG.info("Workflow " + workflow + " consists of the following tasks: " + wCfg.getJobDag()
      .getAllNodes());
  String workflowState =
      (wCtx != null) ? wCtx.getWorkflowState().name() : TaskState.NOT_STARTED.name();
  LOG.info("Current state of workflow is " + workflowState);
  LOG.info("Job states are: ");
  LOG.info("-------");
  for (String job : wCfg.getJobDag().getAllNodes()) {
    TaskState jobState = (wCtx != null) ? wCtx.getJobState(job) : TaskState.NOT_STARTED;
    LOG.info("Job " + job + " is " + jobState);

    // fetch job information
    JobConfig jCfg = taskDriver.getJobConfig(job);
    JobContext jCtx = taskDriver.getJobContext(job);
    if (jCfg == null || jCtx == null) {
      LOG.info("-------");
      continue;
    }

    // calculate taskPartitions
    List<Integer> partitions = Lists.newArrayList(jCtx.getPartitionSet());
    Collections.sort(partitions);

    // report status
    for (Integer partition : partitions) {
      String taskId = jCtx.getTaskIdForPartition(partition);
      taskId = (taskId != null) ? taskId : jCtx.getTargetForPartition(partition);
      LOG.info("Task: " + taskId);
      TaskConfig taskConfig = jCfg.getTaskConfig(taskId);
      if (taskConfig != null) {
        LOG.info("Configuration: " + taskConfig.getConfigMap());
      }
      TaskPartitionState state = jCtx.getPartitionState(partition);
      state = (state != null) ? state : TaskPartitionState.INIT;
      LOG.info("State: " + state);
      String assignedParticipant = jCtx.getAssignedParticipant(partition);
      if (assignedParticipant != null) {
        LOG.info("Assigned participant: " + assignedParticipant);
      }
      LOG.info("-------");
    }
    LOG.info("-------");
  }
}