# -*- coding: utf-8 -*- # # Copyright (C) 2015-2019 Bitergia # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # Authors: # Santiago DueƱas <sduenas@bitergia.com> # Alvaro del Castillo San Felix <acs@bitergia.com> # import logging import pickle import rq import perceval import perceval.backend import perceval.backends import perceval.archive from ._version import __version__ from .errors import NotFoundError logger = logging.getLogger(__name__) class JobResult: """Class to store the result of a Perceval job. It stores the summary of a Perceval job and other useful data such as the task and job identifiers, the backend and the category of the items generated. :param job_id: job identifier :param job_number: human readable identifier for this job :param task_id: identifier of the task linked to this job :param backend: backend used to fetch the items :param category: category of the fetched items """ def __init__(self, job_id, job_number, task_id, backend, category): self.job_id = job_id self.job_number = job_number self.task_id = task_id self.backend = backend self.category = category self.summary = None def to_dict(self): """Convert object to a dict""" result = { 'job_id': self.job_id, 'job_number': self.job_number, 'task_id': self.task_id } if self.summary: result['fetched'] = self.summary.fetched result['skipped'] = self.summary.skipped result['min_updated_on'] = self.summary.min_updated_on.timestamp() result['max_updated_on'] = self.summary.max_updated_on.timestamp() result['last_updated_on'] = self.summary.last_updated_on.timestamp() result['last_uuid'] = self.summary.last_uuid result['min_offset'] = self.summary.min_offset result['max_offset'] = self.summary.max_offset result['last_offset'] = self.summary.last_offset result['extras'] = self.summary.extras return result class PercevalJob: """Class for wrapping Perceval jobs. Wrapper for running and executing Perceval backends. The items generated by the execution of a backend will be stored on the Redis queue named `qitems`. The result of the job can be obtained accessing to the property `result` of this object. :param job_id: job identifier :param job_number: human readable identifier for this job :param task_id: identifier of the task linked to this job :param backend: name of the backend to execute :param conn: connection with a Redis database :param qitems: name of the queue where items will be stored :raises NotFoundError: raised when the backend is not available in Perceval """ def __init__(self, job_id, job_number, task_id, backend, category, conn, qitems): try: self._bklass = perceval.backend.find_backends(perceval.backends)[0][backend] except KeyError: raise NotFoundError(element=backend) self.job_id = job_id self.job_number = job_number self.task_id = task_id self.backend = backend self.conn = conn self.qitems = qitems self.archive_manager = None self.category = category self._big = None # items generator self._result = JobResult(self.job_id, self.job_number, self.task_id, self.backend, self.category) @property def result(self): if not self._result.summary and self._big and self._big.summary: self._result.summary = self._big.summary return self._result def initialize_archive_manager(self, archive_path): """Initialize the archive manager. :param archive_path: path where the archive manager is located """ if archive_path == "": raise ValueError("Archive manager path cannot be empty") if archive_path: self.archive_manager = perceval.archive.ArchiveManager(archive_path) def run(self, backend_args, archive_args=None): """Run the backend with the given parameters. The method will run the backend assigned to this job, storing the fetched items in a Redis queue. The ongoing status of the job, can be accessed through the property `result`. When the parameter `fetch_from_archive` is set to `True`, items will be fetched from the archive assigned to this job. Any exception during the execution of the process will be raised. :param backend_args: parameters used to un the backend :param archive_args: archive arguments """ args = backend_args.copy() if archive_args: self.initialize_archive_manager(archive_args['archive_path']) self._result = JobResult(self.job_id, self.job_number, self.task_id, self.backend, self.category) self._big = self._create_items_generator(args, archive_args) for item in self._big.items: self._metadata(item) self.conn.rpush(self.qitems, pickle.dumps(item)) def has_archiving(self): """Returns if the job supports items archiving""" return self._bklass.has_archiving() def has_resuming(self): """Returns if the job can be resumed when it fails""" return self._bklass.has_resuming() def _create_items_generator(self, backend_args, archive_args): """Create a Perceval items generator. This method will create a items generator using the internal backend defined for this job and the given parameters. :param backend_args: arguments to execute the backend :param archive_args: archive arguments :returns: a `BackendItemsGenerator` instance """ fetch_archive = archive_args and archive_args['fetch_from_archive'] if fetch_archive: archived_after = archive_args.get('archived_after', None) else: archived_after = None return perceval.backend.BackendItemsGenerator(self._bklass, backend_args, self.category, manager=self.archive_manager, fetch_archive=fetch_archive, archived_after=archived_after) def _metadata(self, item): """Add metadata to an item. Method that adds in place metadata to Perceval items such as the identifier of the job that generated it or the version of the system. :param item: an item generated by Perceval """ item['arthur_version'] = __version__ item['job_id'] = self.job_id def execute_perceval_job(backend, backend_args, qitems, task_id, job_number, category, archive_args=None): """Execute a Perceval job on RQ. The items fetched during the process will be stored in a Redis queue named `queue`. Setting the parameter `archive_path`, raw data will be stored with the archive manager. The contents from the archive can be retrieved setting the parameter `fetch_from_archive` to `True`, too. Take into account this behaviour will be only available when the backend supports the use of the archive. If archiving is not supported, an `AttributeError` exception will be raised. :param backend: backend to execute :param backend_args: dict of arguments for running the backend :param qitems: name of the RQ queue used to store the items :param task_id: identifier of the task linked to this job :param job_number: human readable identifier for this job :param category: category of the items to retrieve :param archive_args: archive arguments :returns: a `JobResult` instance :raises NotFoundError: raised when the backend is not found :raises AttributeError: raised when archiving is not supported but any of the archive parameters were set """ rq_job = rq.get_current_job() job = PercevalJob(rq_job.id, job_number, task_id, backend, category, rq_job.connection, qitems) logger.debug("Running job #%s (task: %s) (%s) (cat:%s)", job.job_id, task_id, backend, category) if not job.has_archiving() and archive_args: raise AttributeError("archive attributes set but archive is not supported") try: job.run(backend_args, archive_args=archive_args) except AttributeError as e: raise e except Exception as e: rq_job = rq.get_current_job() rq_job.meta['result'] = job.result rq_job.save_meta() logger.debug("Error running job %s (%s) - %s", job.job_id, backend, str(e)) raise e result = job.result logger.debug("Job #%s (task: %s) completed (%s) - %s/%s items (%s) fetched", result.job_id, task_id, result.backend, str(result.summary.fetched), str(result.summary.skipped), result.category) return result