Java Code Examples for org.apache.helix.task.JobConfig#Builder

The following examples show how to use org.apache.helix.task.JobConfig#Builder . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestScheduleDelayTask.java    From helix with Apache License 2.0 6 votes vote down vote up
@Test
public void testDeplayTimeAndStartTime() throws InterruptedException {
  String workflowName = TestHelper.getTestMethodName();
  Workflow.Builder builder = new Workflow.Builder(workflowName);

  JobConfig.Builder jobBuilder =
      new JobConfig.Builder().setTargetResource(WorkflowGenerator.DEFAULT_TGT_DB)
          .setCommand(MockTask.TASK_COMMAND).setMaxAttemptsPerTask(2)
          .setJobCommandConfigMap(WorkflowGenerator.DEFAULT_COMMAND_CONFIG);

  builder.addParentChildDependency("Job1", "Job2");

  long currentTime = System.currentTimeMillis();
  builder.addJob("Job1", jobBuilder);
  builder
      .addJob("Job2", jobBuilder.setExecutionDelay(2000L).setExecutionStart(currentTime + 5000L));

  _driver.start(builder.build());
  _driver.pollForJobState(workflowName, TaskUtil.getNamespacedJobName(workflowName, "Job2"),
      TaskState.COMPLETED);

  long jobTwoStartTime = _driver.getWorkflowContext(workflowName)
      .getJobStartTime(TaskUtil.getNamespacedJobName(workflowName, "Job2"));

  Assert.assertTrue(jobTwoStartTime - currentTime >= 5000L);
}
 
Example 2
Source File: TestTaskRetryDelay.java    From helix with Apache License 2.0 6 votes vote down vote up
@Test
public void testTaskRetryWithoutDelay() throws Exception {
  String jobResource = TestHelper.getTestMethodName();
  JobConfig.Builder jobBuilder = JobConfig.Builder.fromMap(WorkflowGenerator.DEFAULT_JOB_CONFIG);
  jobBuilder.setJobCommandConfigMap(WorkflowGenerator.DEFAULT_COMMAND_CONFIG)
      .setMaxAttemptsPerTask(2).setCommand(MockTask.TASK_COMMAND)
      .setFailureThreshold(Integer.MAX_VALUE)
      .setJobCommandConfigMap(ImmutableMap.of(MockTask.FAILURE_COUNT_BEFORE_SUCCESS, "1"));
  Workflow flow =
      WorkflowGenerator.generateSingleJobWorkflowBuilder(jobResource, jobBuilder).build();
  _driver.start(flow);

  // Wait until the job completes.
  _driver.pollForWorkflowState(jobResource, TaskState.COMPLETED);

  long startTime = _driver.getWorkflowContext(jobResource).getStartTime();
  long finishedTime = _driver.getWorkflowContext(jobResource).getFinishTime();

  // It should have finished within less than 2 sec
  Assert.assertTrue(finishedTime - startTime <= 2000L);
}
 
Example 3
Source File: TestEnqueueJobs.java    From helix with Apache License 2.0 6 votes vote down vote up
@Test
public void testJobQueueAddingJobsOneByOne() throws InterruptedException {
  String queueName = TestHelper.getTestMethodName();
  JobQueue.Builder builder = TaskTestUtil.buildJobQueue(queueName);
  WorkflowConfig.Builder workflowCfgBuilder = new WorkflowConfig.Builder().setWorkflowId(queueName).setParallelJobs(1);
  _driver.start(builder.setWorkflowConfig(workflowCfgBuilder.build()).build());
  JobConfig.Builder jobBuilder =
      new JobConfig.Builder().setTargetResource(WorkflowGenerator.DEFAULT_TGT_DB)
          .setCommand(MockTask.TASK_COMMAND).setMaxAttemptsPerTask(2);
  _driver.enqueueJob(queueName, "JOB0", jobBuilder);
  for (int i = 1; i < 5; i++) {
    _driver.pollForJobState(queueName, TaskUtil.getNamespacedJobName(queueName, "JOB" + (i - 1)),
        10000L, TaskState.COMPLETED);
    _driver.waitToStop(queueName, 5000L);
    _driver.enqueueJob(queueName, "JOB" + i, jobBuilder);
    _driver.resume(queueName);
  }

  _driver.pollForJobState(queueName, TaskUtil.getNamespacedJobName(queueName, "JOB" + 4),
      TaskState.COMPLETED);
}
 
Example 4
Source File: TestWorkflowTermination.java    From helix with Apache License 2.0 6 votes vote down vote up
@Test
public void testJobQueueNotApplyTimeout() throws InterruptedException {
  String queueName = TestHelper.getTestMethodName();
  long timeout = 1000;
  // Make jobs run success
  JobConfig.Builder jobBuilder = createJobConfigBuilder(queueName, false, 10);
  JobQueue.Builder jobQueue = TaskTestUtil.buildJobQueue(queueName);
  jobQueue
      .setWorkflowConfig(new WorkflowConfig.Builder(queueName).setTimeout(timeout)
          .setWorkFlowType(WORKFLOW_TYPE).build())
      .enqueueJob(JOB_NAME, jobBuilder).enqueueJob(JOB_NAME + 1, jobBuilder);

  _driver.start(jobQueue.build());

  _driver.pollForJobState(queueName, TaskUtil.getNamespacedJobName(queueName, JOB_NAME),
      TaskState.COMPLETED);
  _driver.pollForJobState(queueName, TaskUtil.getNamespacedJobName(queueName, JOB_NAME + 1),
      TaskState.COMPLETED);

  Thread.sleep(timeout);

  // Verify that job queue is still in progress
  _driver.pollForWorkflowState(queueName, 10000L, TaskState.IN_PROGRESS);
}
 
Example 5
Source File: TestRebalanceRunningTask.java    From helix with Apache License 2.0 6 votes vote down vote up
/**
 * Task type: generic
 * Rebalance running task: disabled
 * Story: 1 node is down
 */
@Test
public void testGenericTaskAndDisabledRebalanceAndNodeDown() throws InterruptedException {
  WORKFLOW = TestHelper.getTestMethodName();
  startParticipant(_initialNumNodes);

  JobConfig.Builder jobBuilder =
      new JobConfig.Builder().setWorkflow(WORKFLOW).setNumberOfTasks(10)
          // should be enough for
          // consistent hashing to
          // place tasks on
          // different instances
          .setNumConcurrentTasksPerInstance(100).setCommand(MockTask.TASK_COMMAND)
          .setJobCommandConfigMap(ImmutableMap.of(MockTask.JOB_DELAY, "99999999")); // task stuck

  Workflow.Builder workflowBuilder = new Workflow.Builder(WORKFLOW).addJob(JOB, jobBuilder);

  _driver.start(workflowBuilder.build());

  Assert.assertTrue(checkTasksOnDifferentInstances());
  // Stop a participant, tasks rebalanced to the same instance
  stopParticipant(_initialNumNodes);
  Assert.assertTrue(checkTasksOnSameInstances());
}
 
Example 6
Source File: TestIndependentTaskRebalancer.java    From helix with Apache License 2.0 6 votes vote down vote up
@Test
public void testThresholdFailure() throws Exception {
  // Create a job with two different tasks
  String jobName = TestHelper.getTestMethodName();
  Workflow.Builder workflowBuilder = new Workflow.Builder(jobName);
  List<TaskConfig> taskConfigs = Lists.newArrayListWithCapacity(2);
  Map<String, String> taskConfigMap = Maps.newHashMap(ImmutableMap.of("fail", "" + true));
  TaskConfig taskConfig1 = new TaskConfig("TaskOne", taskConfigMap);
  TaskConfig taskConfig2 = new TaskConfig("TaskTwo", null);
  taskConfigs.add(taskConfig1);
  taskConfigs.add(taskConfig2);
  Map<String, String> jobConfigMap = Maps.newHashMap();
  jobConfigMap.put("Timeout", "1000");
  JobConfig.Builder jobBuilder = new JobConfig.Builder().setCommand("DummyCommand")
      .setFailureThreshold(1).addTaskConfigs(taskConfigs).setJobCommandConfigMap(jobConfigMap);
  workflowBuilder.addJob(jobName, jobBuilder);
  _driver.start(workflowBuilder.build());

  // Ensure the job completes
  _driver.pollForWorkflowState(jobName, TaskState.IN_PROGRESS);
  _driver.pollForWorkflowState(jobName, TaskState.COMPLETED);

  // Ensure that each class was invoked
  Assert.assertTrue(_invokedClasses.contains(TaskOne.class.getName()));
  Assert.assertTrue(_invokedClasses.contains(TaskTwo.class.getName()));
}
 
Example 7
Source File: TestTaskRebalancerStopResume.java    From helix with Apache License 2.0 6 votes vote down vote up
@Test
public void stopAndResume() throws Exception {
  Map<String, String> commandConfig = ImmutableMap.of(MockTask.JOB_DELAY, String.valueOf(100));

  JobConfig.Builder jobBuilder = JobConfig.Builder.fromMap(WorkflowGenerator.DEFAULT_JOB_CONFIG);
  jobBuilder.setJobCommandConfigMap(commandConfig);
  Workflow flow =
      WorkflowGenerator.generateSingleJobWorkflowBuilder(JOB_RESOURCE, jobBuilder).build();

  LOG.info("Starting flow " + flow.getName());
  _driver.start(flow);
  _driver.pollForWorkflowState(JOB_RESOURCE, TaskState.IN_PROGRESS);

  LOG.info("Pausing job");
  _driver.stop(JOB_RESOURCE);
  _driver.pollForWorkflowState(JOB_RESOURCE, TaskState.STOPPED);

  LOG.info("Resuming job");
  _driver.resume(JOB_RESOURCE);
  _driver.pollForWorkflowState(JOB_RESOURCE, TaskState.COMPLETED);
}
 
Example 8
Source File: TestTaskWithInstanceDisabled.java    From helix with Apache License 2.0 5 votes vote down vote up
@Test
public void testTaskWithInstanceDisabled() throws InterruptedException {
  _gSetupTool.getClusterManagementTool()
      .enableInstance(CLUSTER_NAME, PARTICIPANT_PREFIX + "_" + (_startPort + 0), false);
  String jobResource = TestHelper.getTestMethodName();
  JobConfig.Builder jobBuilder = new JobConfig.Builder().setCommand(MockTask.TASK_COMMAND)
      .setTargetResource(WorkflowGenerator.DEFAULT_TGT_DB);
  Workflow flow =
      WorkflowGenerator.generateSingleJobWorkflowBuilder(jobResource, jobBuilder).build();
  _driver.start(flow);

  _driver.pollForWorkflowState(jobResource, TaskState.COMPLETED);
  JobContext ctx = _driver.getJobContext(TaskUtil.getNamespacedJobName(jobResource));
  Assert.assertEquals(ctx.getAssignedParticipant(0), PARTICIPANT_PREFIX + "_" + (_startPort + 1));
}
 
Example 9
Source File: TestTaskAssignmentCalculator.java    From helix with Apache License 2.0 5 votes vote down vote up
@Test
public void testAbortTaskForWorkflowFail() throws InterruptedException {
  failTask = true;
  String workflowName = TestHelper.getTestMethodName();
  Workflow.Builder workflowBuilder = new Workflow.Builder(workflowName);

  for (int i = 0; i < 5; i++) {
    List<TaskConfig> taskConfigs = Lists.newArrayListWithCapacity(1);
    Map<String, String> taskConfigMap = Maps.newHashMap();
    taskConfigs.add(new TaskConfig("TaskOne", taskConfigMap));
    JobConfig.Builder jobBuilder = new JobConfig.Builder().setCommand("DummyCommand")
        .addTaskConfigs(taskConfigs).setJobCommandConfigMap(_jobCommandMap);
    workflowBuilder.addJob("JOB" + i, jobBuilder);
  }

  _driver.start(workflowBuilder.build());
  _driver.pollForWorkflowState(workflowName, TaskState.FAILED);

  int abortedTask = 0;
  for (TaskState jobState : _driver.getWorkflowContext(workflowName).getJobStates().values()) {
    if (jobState == TaskState.ABORTED) {
      abortedTask++;
    }
  }

  Assert.assertEquals(abortedTask, 4);
}
 
Example 10
Source File: TestTaskThreadLeak.java    From helix with Apache License 2.0 5 votes vote down vote up
@Test
public void testTaskThreadCount() throws InterruptedException {
  String queueName = "myTestJobQueue";
  JobQueue.Builder queueBuilder = new JobQueue.Builder(queueName);
  String lastJob = null;
  for (int i = 0; i < 5; i++) {
    String db = TestHelper.getTestMethodName() + "_" + i;
    _gSetupTool.addResourceToCluster(CLUSTER_NAME, db, 20, MASTER_SLAVE_STATE_MODEL,
        IdealState.RebalanceMode.FULL_AUTO.name());
    _gSetupTool.rebalanceStorageCluster(CLUSTER_NAME, db, 1);
    JobConfig.Builder jobBuilder =
        new JobConfig.Builder().setCommand(MockTask.TASK_COMMAND).setTargetResource(db)
            .setNumConcurrentTasksPerInstance(100);
    queueBuilder.addJob(db + "_job", jobBuilder);
    lastJob = db + "_job";
  }

  queueBuilder
      .setWorkflowConfig(new WorkflowConfig.Builder(queueName).setParallelJobs(10).build());

  _driver.start(queueBuilder.build());

  String nameSpacedJob = TaskUtil.getNamespacedJobName(queueName, lastJob);
  _driver.pollForJobState(queueName, nameSpacedJob, TaskState.COMPLETED);


  int threadCountAfter = getThreadCount("TaskStateModelFactory");

  Assert.assertTrue(
      (threadCountAfter - _threadCountBefore) <= TaskStateModelFactory.TASK_THREADPOOL_SIZE + 1);
}
 
Example 11
Source File: WorkflowGenerator.java    From helix with Apache License 2.0 5 votes vote down vote up
public static Workflow.Builder generateDefaultRepeatedJobWorkflowBuilder(String workflowName, int jobCount) {
  Workflow.Builder builder = new Workflow.Builder(workflowName);
  JobConfig.Builder jobBuilder = JobConfig.Builder.fromMap(DEFAULT_JOB_CONFIG);
  jobBuilder.setJobCommandConfigMap(DEFAULT_COMMAND_CONFIG);

  builder.addJob(JOB_NAME_1, jobBuilder);

  for (int i = 0; i < jobCount - 1; i++) {
    String jobName = JOB_NAME_2 + "-" + i;
    builder.addParentChildDependency(JOB_NAME_1, jobName);
    builder.addJob(jobName, jobBuilder);
  }

  return builder;
}
 
Example 12
Source File: TestTaskRebalancerRetryLimit.java    From helix with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws Exception {
  String jobResource = TestHelper.getTestMethodName();

  JobConfig.Builder jobBuilder = JobConfig.Builder.fromMap(WorkflowGenerator.DEFAULT_JOB_CONFIG);
  jobBuilder.setJobCommandConfigMap(WorkflowGenerator.DEFAULT_COMMAND_CONFIG)
      .setMaxAttemptsPerTask(2).setCommand(MockTask.TASK_COMMAND)
      .setFailureThreshold(Integer.MAX_VALUE)
      .setJobCommandConfigMap(ImmutableMap.of(MockTask.THROW_EXCEPTION, "true"));

  Workflow flow =
      WorkflowGenerator.generateSingleJobWorkflowBuilder(jobResource, jobBuilder).build();

  _driver.start(flow);

  // Wait until the job completes.
  _driver.pollForWorkflowState(jobResource, TaskState.COMPLETED);

  JobContext ctx = _driver.getJobContext(TaskUtil.getNamespacedJobName(jobResource));
  for (int i = 0; i < _numPartitions; i++) {
    TaskPartitionState state = ctx.getPartitionState(i);
    if (state != null) {
      Assert.assertEquals(state, TaskPartitionState.TASK_ERROR);
      Assert.assertEquals(ctx.getPartitionNumAttempts(i), 2);
    }
  }
}
 
Example 13
Source File: TestUpdateWorkflow.java    From helix with Apache License 2.0 5 votes vote down vote up
private JobQueue createDefaultRecurrentJobQueue(String queueName, int numJobs) {
  JobQueue.Builder queueBuild = TaskTestUtil.buildRecurrentJobQueue(queueName, 0, 600000);
  for (int i = 0; i <= numJobs; i++) {
    String targetPartition = (i == 0) ? "MASTER" : "SLAVE";

    JobConfig.Builder jobConfig =
        new JobConfig.Builder().setCommand(MockTask.TASK_COMMAND)
            .setTargetResource(WorkflowGenerator.DEFAULT_TGT_DB)
            .setTargetPartitionStates(Sets.newHashSet(targetPartition));
    String jobName = targetPartition.toLowerCase() + "Job" + i;
    queueBuild.enqueueJob(jobName, jobConfig);
  }

  return queueBuild.build();
}
 
Example 14
Source File: TestTaskRebalancer.java    From helix with Apache License 2.0 5 votes vote down vote up
@Test
public void partitionSet() throws Exception {
  final String jobResource = "partitionSet";
  ImmutableList<String> targetPartitions =
      ImmutableList.of("TestDB_1", "TestDB_2", "TestDB_3", "TestDB_5", "TestDB_8", "TestDB_13");

  // construct and submit our basic workflow
  Map<String, String> commandConfig = ImmutableMap.of(MockTask.JOB_DELAY, String.valueOf(100));

  JobConfig.Builder jobBuilder = JobConfig.Builder.fromMap(WorkflowGenerator.DEFAULT_JOB_CONFIG);
  jobBuilder.setJobCommandConfigMap(commandConfig).setMaxAttemptsPerTask(1)
      .setTargetPartitions(targetPartitions);

  Workflow flow =
      WorkflowGenerator.generateSingleJobWorkflowBuilder(jobResource, jobBuilder).build();
  _driver.start(flow);

  // wait for job completeness/timeout
  _driver.pollForWorkflowState(jobResource, TaskState.COMPLETED);

  // see if resulting context completed successfully for our partition set
  String namespacedName = TaskUtil.getNamespacedJobName(jobResource);

  JobContext ctx = _driver.getJobContext(namespacedName);
  WorkflowContext workflowContext = _driver.getWorkflowContext(jobResource);
  Assert.assertNotNull(ctx);
  Assert.assertNotNull(workflowContext);
  Assert.assertEquals(workflowContext.getJobState(namespacedName), TaskState.COMPLETED);
  for (String pName : targetPartitions) {
    int i = ctx.getPartitionsByTarget().get(pName).get(0);
    Assert.assertEquals(ctx.getPartitionState(i), TaskPartitionState.COMPLETED);
    Assert.assertEquals(ctx.getPartitionNumAttempts(i), 1);
  }
}
 
Example 15
Source File: TestWorkflowTermination.java    From helix with Apache License 2.0 5 votes vote down vote up
@Test
public void testWorkflowRunningTimeout() throws Exception {
  String workflowName = TestHelper.getTestMethodName();
  String notStartedJobName = JOB_NAME + "-NotStarted";
  long workflowExpiry = 2000; // 2sec expiry time
  long timeout = 50;
  JobConfig.Builder jobBuilder = createJobConfigBuilder(workflowName, false, 5000);
  jobBuilder.setWorkflow(workflowName);

  // Create a workflow where job2 depends on job1. Workflow would timeout before job1 finishes
  Workflow.Builder workflowBuilder = new Workflow.Builder(workflowName)
      .setWorkflowConfig(new WorkflowConfig.Builder(workflowName).setTimeout(timeout)
          .setWorkFlowType(WORKFLOW_TYPE).build())
      .addJob(JOB_NAME, jobBuilder).addJob(notStartedJobName, jobBuilder)
      .addParentChildDependency(JOB_NAME, notStartedJobName).setExpiry(workflowExpiry);

  _driver.start(workflowBuilder.build());

  _driver.pollForWorkflowState(workflowName, 10000L, TaskState.TIMED_OUT);
  long finishTime = _driver.getWorkflowContext(workflowName).getFinishTime();

  // Running job should be marked as timeout
  // and job not started should not appear in workflow context
  _driver.pollForJobState(workflowName, getJobNameToPoll(workflowName, JOB_NAME), 10000L,
      TaskState.TIMED_OUT);

  WorkflowContext context = _driver.getWorkflowContext(workflowName);
  Assert.assertNull(context.getJobState(notStartedJobName));
  Assert.assertTrue(context.getFinishTime() - context.getStartTime() >= timeout);

  verifyWorkflowCleanup(workflowName, getJobNameToPoll(workflowName, JOB_NAME),
      getJobNameToPoll(workflowName, notStartedJobName));
  long cleanUpTime = System.currentTimeMillis();
  Assert.assertTrue(cleanUpTime - finishTime >= workflowExpiry);
}
 
Example 16
Source File: TestTaskRebalancer.java    From helix with Apache License 2.0 5 votes vote down vote up
@Test
public void timeouts() throws Exception {
  final String jobResource = "timeouts";

  JobConfig.Builder jobBuilder = JobConfig.Builder.fromMap(WorkflowGenerator.DEFAULT_JOB_CONFIG);
  jobBuilder.setJobCommandConfigMap(WorkflowGenerator.DEFAULT_COMMAND_CONFIG)
      .setMaxAttemptsPerTask(2).setTimeoutPerTask(1); // This timeout needs to be very short

  Workflow flow =
      WorkflowGenerator.generateSingleJobWorkflowBuilder(jobResource, jobBuilder).build();
  _driver.start(flow);

  // Wait until the job reports failure.
  _driver.pollForWorkflowState(jobResource, TaskState.FAILED);

  // Check that all partitions timed out up to maxAttempts
  JobContext ctx = _driver.getJobContext(TaskUtil.getNamespacedJobName(jobResource));
  int maxAttempts = 0;
  boolean sawTimedoutTask = false;
  for (int i = 0; i < _numPartitions; i++) {
    TaskPartitionState state = ctx.getPartitionState(i);
    if (state != null) {
      if (state == TaskPartitionState.TIMED_OUT) {
        sawTimedoutTask = true;
      }
      // At least one task timed out, other might be aborted due to job failure.
      Assert.assertTrue(
          state == TaskPartitionState.TIMED_OUT || state == TaskPartitionState.TASK_ABORTED);
      maxAttempts = Math.max(maxAttempts, ctx.getPartitionNumAttempts(i));
    }
  }

  Assert.assertTrue(sawTimedoutTask);
  // 2 or 3 both okay only for tests - TODO: Fix this later
  Assert.assertTrue(maxAttempts == 2 || maxAttempts == 3);
}
 
Example 17
Source File: TestUserContentStore.java    From helix with Apache License 2.0 5 votes vote down vote up
@Test
public void testJobContentPutAndGetWithDependency() throws InterruptedException {
  String queueName = TestHelper.getTestMethodName();
  JobQueue.Builder queueBuilder = TaskTestUtil.buildJobQueue(queueName, 0, 100);

  List<TaskConfig> taskConfigs1 = Lists.newArrayListWithCapacity(1);
  List<TaskConfig> taskConfigs2 = Lists.newArrayListWithCapacity(1);
  Map<String, String> taskConfigMap1 = Maps.newHashMap();
  Map<String, String> taskConfigMap2 = Maps.newHashMap();
  TaskConfig taskConfig1 = new TaskConfig("TaskOne", taskConfigMap1);
  TaskConfig taskConfig2 = new TaskConfig("TaskTwo", taskConfigMap2);

  taskConfigs1.add(taskConfig1);
  taskConfigs2.add(taskConfig2);
  Map<String, String> jobCommandMap = Maps.newHashMap();
  jobCommandMap.put("Timeout", "1000");

  JobConfig.Builder jobBuilder1 =
      new JobConfig.Builder().setCommand("DummyCommand").addTaskConfigs(taskConfigs1)
          .setJobCommandConfigMap(jobCommandMap).setWorkflow(queueName);
  JobConfig.Builder jobBuilder2 =
      new JobConfig.Builder().setCommand("DummyCommand").addTaskConfigs(taskConfigs2)
          .setJobCommandConfigMap(jobCommandMap).setWorkflow(queueName);

  queueBuilder.enqueueJob(queueName + 0, jobBuilder1);
  queueBuilder.enqueueJob(queueName + 1, jobBuilder2);

  _driver.start(queueBuilder.build());
  _driver.pollForJobState(queueName, TaskUtil.getNamespacedJobName(queueName, queueName + 1),
      TaskState.COMPLETED);
  Assert.assertEquals(_driver.getWorkflowContext(queueName)
      .getJobState(TaskUtil.getNamespacedJobName(queueName, queueName + 1)), TaskState.COMPLETED);
}
 
Example 18
Source File: TestJobTimeout.java    From helix with Apache License 2.0 4 votes vote down vote up
@Test
public void testTaskRunningIndefinitely() throws InterruptedException {
  // first job runs indefinitely and timeout, the second job runs successfully, the workflow
  // succeed.
  final String FIRST_JOB = "first_job";
  final String SECOND_JOB = "second_job";
  final String WORKFLOW_NAME = TestHelper.getTestMethodName();
  final String DB_NAME = WorkflowGenerator.DEFAULT_TGT_DB;

  JobConfig.Builder firstJobBuilder =
      new JobConfig.Builder().setWorkflow(WORKFLOW_NAME).setTargetResource(DB_NAME)
          .setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name()))
          .setCommand(MockTask.TASK_COMMAND)
          .setJobCommandConfigMap(ImmutableMap.of(MockTask.JOB_DELAY, "99999999")) // task stuck
          .setTimeout(10);

  JobConfig.Builder secondJobBuilder =
      new JobConfig.Builder().setWorkflow(WORKFLOW_NAME).setTargetResource(DB_NAME)
          .setTargetPartitionStates(Sets.newHashSet(MasterSlaveSMD.States.MASTER.name()))
          .setCommand(MockTask.TASK_COMMAND).setIgnoreDependentJobFailure(true); // ignore first
                                                                                 // job's timeout

  WorkflowConfig.Builder workflowConfigBuilder =
      new WorkflowConfig.Builder(WORKFLOW_NAME).setFailureThreshold(1); // workflow ignores first
                                                                        // job's timeout and
                                                                        // schedule second job and
                                                                        // succeed.

  Workflow.Builder workflowBuilder = new Workflow.Builder(WORKFLOW_NAME)
      .setWorkflowConfig(workflowConfigBuilder.build()).addJob(FIRST_JOB, firstJobBuilder)
      .addJob(SECOND_JOB, secondJobBuilder).addParentChildDependency(FIRST_JOB, SECOND_JOB);

  _driver.start(workflowBuilder.build());

  _driver.pollForJobState(WORKFLOW_NAME, TaskUtil.getNamespacedJobName(WORKFLOW_NAME, FIRST_JOB),
      TaskState.TIMED_OUT);
  _driver.pollForJobState(WORKFLOW_NAME, TaskUtil.getNamespacedJobName(WORKFLOW_NAME, SECOND_JOB),
      TaskState.COMPLETED);
  _driver.pollForWorkflowState(WORKFLOW_NAME, TaskState.COMPLETED);

  JobContext jobContext =
      _driver.getJobContext(TaskUtil.getNamespacedJobName(WORKFLOW_NAME, FIRST_JOB));
  for (int pId : jobContext.getPartitionSet()) {
    // All tasks aborted because of job timeout
    Assert.assertEquals(jobContext.getPartitionState(pId), TaskPartitionState.TASK_ABORTED);
  }
}
 
Example 19
Source File: GobblinHelixDistributeJobExecutionLauncher.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
/**
 * Create a job config builder which has a single task that wraps the original jobProps.
 *
 * The planning job (which runs the original {@link GobblinHelixJobLauncher}) will be
 * executed on one of the Helix participants.
 *
 * We rely on the underlying {@link GobblinHelixJobLauncher} to correctly handle the task
 * execution timeout so that the planning job itself is relieved of the timeout constrain.
 *
 * In short, the planning job will run once and requires no timeout.
 */
private JobConfig.Builder createJobBuilder (Properties jobProps) {
  // Create a single task for job planning
  String planningId = getPlanningJobId(jobProps);
  Map<String, TaskConfig> taskConfigMap = Maps.newHashMap();
  Map<String, String> rawConfigMap = Maps.newHashMap();
  for (String key : jobProps.stringPropertyNames()) {
    rawConfigMap.put(JOB_PROPS_PREFIX + key, (String)jobProps.get(key));
  }
  rawConfigMap.put(GobblinClusterConfigurationKeys.TASK_SUCCESS_OPTIONAL_KEY, "true");

  // Create a single Job which only contains a single task
  taskConfigMap.put(planningId, TaskConfig.Builder.from(rawConfigMap));
  JobConfig.Builder jobConfigBuilder = new JobConfig.Builder();

  // We want GobblinHelixJobLauncher only run once.
  jobConfigBuilder.setMaxAttemptsPerTask(1);

  // Planning job never timeout (Helix defaults 1h timeout, set a large number '1 month')
  jobConfigBuilder.setTimeoutPerTask(JobConfig.DEFAULT_TIMEOUT_PER_TASK * 24 * 30);

  // Planning job should have its own tag support
  if (jobProps.containsKey(GobblinClusterConfigurationKeys.HELIX_PLANNING_JOB_TAG_KEY)) {
    String jobPlanningTag = jobProps.getProperty(GobblinClusterConfigurationKeys.HELIX_PLANNING_JOB_TAG_KEY);
    log.info("PlanningJob {} has tags associated : {}", planningId, jobPlanningTag);
    jobConfigBuilder.setInstanceGroupTag(jobPlanningTag);
  }

  // Planning job should have its own type support
  if (jobProps.containsKey(GobblinClusterConfigurationKeys.HELIX_PLANNING_JOB_TYPE_KEY)) {
    String jobType = jobProps.getProperty(GobblinClusterConfigurationKeys.HELIX_PLANNING_JOB_TYPE_KEY);
    log.info("PlanningJob {} has types associated : {}", planningId, jobType);
    jobConfigBuilder.setJobType(jobType);
  }

  jobConfigBuilder.setNumConcurrentTasksPerInstance(PropertiesUtils.getPropAsInt(jobProps,
      GobblinClusterConfigurationKeys.HELIX_CLUSTER_TASK_CONCURRENCY,
      GobblinClusterConfigurationKeys.HELIX_CLUSTER_TASK_CONCURRENCY_DEFAULT));

  jobConfigBuilder.setFailureThreshold(1);
  jobConfigBuilder.addTaskConfigMap(taskConfigMap).setCommand(GobblinTaskRunner.GOBBLIN_JOB_FACTORY_NAME);

  return jobConfigBuilder;
}
 
Example 20
Source File: TestWorkflowTermination.java    From helix with Apache License 2.0 4 votes vote down vote up
@Test
public void testWorkflowJobFail() throws Exception {
  String workflowName = TestHelper.getTestMethodName();
  String job1 = JOB_NAME + "1";
  String job2 = JOB_NAME + "2";
  String job3 = JOB_NAME + "3";
  String job4 = JOB_NAME + "4";
  long workflowExpiry = 10000;
  long timeout = 10000;

  JobConfig.Builder jobBuilder = createJobConfigBuilder(workflowName, false, 1);
  JobConfig.Builder failedJobBuilder = createJobConfigBuilder(workflowName, true, 1);

  Workflow.Builder workflowBuilder = new Workflow.Builder(workflowName)
      .setWorkflowConfig(new WorkflowConfig.Builder(workflowName).setWorkFlowType(WORKFLOW_TYPE)
          .setTimeout(timeout).setParallelJobs(4).setFailureThreshold(1).build())
      .addJob(job1, jobBuilder).addJob(job2, jobBuilder).addJob(job3, failedJobBuilder)
      .addJob(job4, jobBuilder).addParentChildDependency(job1, job2)
      .addParentChildDependency(job1, job3).addParentChildDependency(job2, job4)
      .addParentChildDependency(job3, job4).setExpiry(workflowExpiry);

  _driver.start(workflowBuilder.build());

  _driver.pollForWorkflowState(workflowName, 10000L, TaskState.FAILED);

  // Timeout is longer than fail time, so the failover should occur earlier
  WorkflowContext context = _driver.getWorkflowContext(workflowName);
  long finishTime = context.getFinishTime();
  Assert.assertTrue(context.getFinishTime() - context.getStartTime() < timeout);

  // job1 will complete
  _driver.pollForJobState(workflowName, getJobNameToPoll(workflowName, job1), 10000L,
      TaskState.COMPLETED);

  // Possible race between 2 and 3 so it's likely for job2 to stay in either COMPLETED or ABORTED
  _driver.pollForJobState(workflowName, getJobNameToPoll(workflowName, job2), 10000L,
      TaskState.COMPLETED, TaskState.ABORTED);

  // job3 meant to fail
  _driver.pollForJobState(workflowName, getJobNameToPoll(workflowName, job3), 10000L,
      TaskState.FAILED);

  // because job4 has dependency over job3, it will fail as well
  _driver.pollForJobState(workflowName, getJobNameToPoll(workflowName, job4), 10000L,
      TaskState.FAILED);

  // Check MBean is updated
  ObjectName objectName = getWorkflowMBeanObjectName(workflowName);
  Assert.assertEquals((long) beanServer.getAttribute(objectName, "FailedWorkflowCount"), 1);

  // For a failed workflow, after timing out, it will be purged
  verifyWorkflowCleanup(workflowName, getJobNameToPoll(workflowName, job1),
      getJobNameToPoll(workflowName, job2), getJobNameToPoll(workflowName, job3),
      getJobNameToPoll(workflowName, job4));

  long cleanUpTime = System.currentTimeMillis();
  Assert.assertTrue(cleanUpTime - finishTime >= workflowExpiry);
}