Python luigi.build() Examples

The following are 11 code examples of luigi.build(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module luigi , or try the search function .
Example #1
Source File: utils.py    From GASpy with GNU Lesser General Public License v3.0 6 votes vote down vote up
def run_task_locally(task):
    '''
    This is similar to the `gaspy.tasks.core.run_tasks` function, but it runs
    one task and it runs it on a local scheduler. You should really only be
    using this for debugging and/or testing purposes.

    Arg:
        task    Instance of a `luigi.Task` object that you want to run
    '''
    # Ignore this silly Luigi warning that they're too lazy to fix
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', message='Parameter '
                                '"task_process_context" with value "None" is not '
                                'of type string.')

        luigi.build([task], local_scheduler=True) 
Example #2
Source File: runner.py    From aws-service-catalog-puppet with Apache License 2.0 5 votes vote down vote up
def run_tasks_for_bootstrap_spokes_in_ou(tasks_to_run, num_workers):
    for type in ["failure", "success", "timeout", "process_failure", "processing_time", "broken_task", ]:
        os.makedirs(Path(constants.RESULTS_DIRECTORY) / type)

    run_result = luigi.build(
        tasks_to_run,
        local_scheduler=True,
        detailed_summary=True,
        workers=num_workers,
        log_level='INFO',
    )

    for filename in glob('results/failure/*.json'):
        result = json.loads(open(filename, 'r').read())
        click.echo(colorclass.Color("{red}" + result.get('task_type') + " failed{/red}"))
        click.echo(f"{yaml.safe_dump({'parameters':result.get('task_params')})}")
        click.echo("\n".join(result.get('exception_stack_trace')))
        click.echo('')
    exit_status_codes = {
        LuigiStatusCode.SUCCESS: 0,
        LuigiStatusCode.SUCCESS_WITH_RETRY: 0,
        LuigiStatusCode.FAILED: 1,
        LuigiStatusCode.FAILED_AND_SCHEDULING_FAILED: 2,
        LuigiStatusCode.SCHEDULING_FAILED: 3,
        LuigiStatusCode.NOT_RUN: 4,
        LuigiStatusCode.MISSING_EXT: 5,
    }
    sys.exit(exit_status_codes.get(run_result.status)) 
Example #3
Source File: app.py    From Gather-Deployment with MIT License 5 votes vote down vote up
def luigi_task(self, filename, topic):
    luigi.build(
        [Save_to_Elastic(filename = filename, summary = topic, index = topic)],
        scheduler_host = 'localhost',
        scheduler_port = 8082,
    )
    return {'status': 'Task scheluded!', 'result': 42} 
Example #4
Source File: format_test.py    From gluish with GNU General Public License v3.0 5 votes vote down vote up
def test_decompress(self):
        try:
            os.remove(DUMMY_GZIP_FILENAME)
        except FileNotFoundError:
            pass

        task = DummyGzipTask()
        luigi.build([task])

        self.assertTrue(os.path.exists(DUMMY_GZIP_FILENAME))
        with gzip.open(DUMMY_GZIP_FILENAME) as f:
            self.assertEqual(f.read(), b'hello') 
Example #5
Source File: task_test.py    From gluish with GNU General Public License v3.0 5 votes vote down vote up
def test_mock_task(self):
        """ Test the mock class. """
        task = MockTask(fixture=os.path.join(FIXTURES, 'l-1.txt'))
        self.assertEqual(task.content(), '1\n')
        luigi.build([task], local_scheduler=True)
        self.assertEqual(task.output().open().read(), '1\n') 
Example #6
Source File: task_test.py    From gluish with GNU General Public License v3.0 5 votes vote down vote up
def test_task_dir(self):
        task = TaskN()
        self.assertFalse(os.path.exists(task.taskdir()))
        luigi.build([task], local_scheduler=True)
        self.assertTrue(os.path.isdir(task.taskdir()))
        self.assertTrue(task.taskdir().endswith('TaskN')) 
Example #7
Source File: test_info.py    From gokart with MIT License 5 votes vote down vote up
def test_make_tree_info_complete(self):
        task = _Task(param=1, sub=_SubTask(param=2))

        # check after sub task runs
        luigi.build([task], local_scheduler=True)
        tree = gokart.info.make_tree_info(task)
        expected = r"""
└─-\(COMPLETE\) _Task\[[a-z0-9]*\]
   └─-\(COMPLETE\) _SubTask\[[a-z0-9]*\]"""
        self.assertRegex(tree, expected) 
Example #8
Source File: test_restore_task_by_id.py    From gokart with MIT License 5 votes vote down vote up
def test(self):
        task = _DummyTask(sub_task=_SubDummyTask(param=10))
        luigi.build([task], local_scheduler=True, log_level="CRITICAL")

        unique_id = task.make_unique_id()
        restored = _DummyTask.restore(unique_id)
        self.assertTrue(task.make_unique_id(), restored.make_unique_id()) 
Example #9
Source File: core.py    From GASpy with GNU Lesser General Public License v3.0 5 votes vote down vote up
def schedule_tasks(tasks, workers=1, local_scheduler=False):
    '''
    This light wrapping function will execute any tasks you want through the
    Luigi host that is listed in the `.gaspyrc.json` file.

    Arg:
        tasks               An iterable of `luigi.Task` instances
        workers             An integer indicating how many processes/workers
                            you want executing the tasks and prerequisite
                            tasks.
        local_scheduler     A Boolean indicating whether or not you want to
                            use a local scheduler. You should use a local
                            scheduler only when you want something done
                            quickly but dirtily. If you do not use local
                            scheduling, then we will use our Luigi daemon
                            to manage things, which should be the status
                            quo.
    '''
    luigi_host = utils.read_rc('luigi_host')

    # Ignore this silly Luigi warning that they're too lazy to fix
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', message='Parameter '
                                '"task_process_context" with value "None" is not '
                                'of type string.')

        if local_scheduler is False:
            luigi.build(tasks, workers=workers, scheduler_host=luigi_host)
        else:
            luigi.build(tasks, workers=workers, local_scheduler=True) 
Example #10
Source File: luigi_mr_tests.py    From pyschema with Apache License 2.0 5 votes vote down vote up
def test_typed_mr(self):
        task = VanillaTask()
        luigi.build([task], local_scheduler=True)
        for line in task.output().open('r'):
            self.assertTrue("$schema" in line)
            rec = pyschema.loads(line)
            self.assertEquals(rec.foo, u"yay")
            self.assertEquals(rec.barsum, 5) 
Example #11
Source File: __init__.py    From d6tflow with MIT License 4 votes vote down vote up
def run(tasks, forced=None, forced_all=False, forced_all_upstream=False, confirm=True, workers=1, abort=True, **kwargs):
    """
    Run tasks locally. See luigi.build for additional details

    Args:
        tasks (obj, list): task or list of tasks
        forced (list): list of forced tasks
        forced_all (bool): force all tasks
        forced_all_upstream (bool): force all tasks including upstream
        confirm (list): confirm invalidating tasks
        workers (int): number of workers
        abort (bool): on errors raise exception
        kwargs: keywords to pass to luigi.build

    """
    if not isinstance(tasks, (list,)):
        tasks = [tasks]

    if forced_all:
        forced = tasks
    if forced_all_upstream:
        for t in tasks:
            invalidate_upstream(t,confirm=confirm)
    if forced is not None:
        if not isinstance(forced, (list,)):
            forced = [forced]
        invalidate = []
        for tf in forced:
            for tup in tasks:
                invalidate.append(d6tflow.taskflow_downstream(tf,tup))
        invalidate = set().union(*invalidate)
        invalidate = {t for t in invalidate if t.complete()}
        if len(invalidate)>0:
            if confirm:
                print('Forced tasks', invalidate)
                c = input('Confirm invalidating forced tasks (y/n)')
            else:
                c = 'y'
            if c == 'y':
                [t.invalidate(confirm=False) for t in invalidate]
            else:
                return None

    opts = {**{'workers':workers, 'local_scheduler':True, 'log_level':d6tflow.settings.log_level},**kwargs}
    result = luigi.build(tasks, **opts)
    if abort and not result:
        raise RuntimeError('Exception found running flow, check trace')

    return result