python source code of common

import collections
import logging
import os
import subprocess
import xml.etree.ElementTree

import dateutil.parser

import svn.constants
import svn.exception
import svn.common_base

_LOGGER = logging.getLogger(__name__)

_FILE_HUNK_PREFIX = 'Index: '

_HUNK_HEADER_LEFT_PREFIX = '--- '
_HUNK_HEADER_RIGHT_PREFIX = '+++ '
_HUNK_HEADER_LINE_NUMBERS_PREFIX = '@@ '


class CommonClient(svn.common_base.CommonBase):
    def __init__(self, url_or_path, type_, username=None, password=None,
                 svn_filepath='svn', trust_cert=None, env={}, *args, **kwargs):
        super(CommonClient, self).__init__(*args, **kwargs)

        self.__url_or_path = url_or_path
        self.__username = username
        self.__password = password
        self.__svn_filepath = svn_filepath
        self.__trust_cert = trust_cert
        self.__env = env

        if type_ not in (svn.constants.LT_URL, svn.constants.LT_PATH):
            raise svn.exception.SvnException("Type is invalid: {}".format(type_))

        self.__type = type_

    def run_command(self, subcommand, args, **kwargs):
        cmd = [self.__svn_filepath, '--non-interactive']

        if self.__trust_cert:
            cmd += ['--trust-server-cert']

        if self.__username is not None and self.__password is not None:
            cmd += ['--username', self.__username]
            cmd += ['--password', self.__password]
            cmd += ['--no-auth-cache']

        cmd += [subcommand] + args
        return self.external_command(cmd, environment=self.__env, **kwargs)

    def __element_text(self, element):
        """Return ElementTree text or None
        :param xml.etree.ElementTree element: ElementTree to get text.

        :return str|None: Element text
        """
        if element is not None and len(element.text):
            return element.text

        return None

    def info(self, rel_path=None, revision=None):
        cmd = []
        if revision is not None:
            cmd += ['-r', str(revision)]

        full_url_or_path = self.__url_or_path
        if rel_path is not None:
            full_url_or_path += '/' + rel_path
        cmd += ['--xml', full_url_or_path]

        result = self.run_command(
            'info',
            cmd,
            do_combine=True)

        root = xml.etree.ElementTree.fromstring(result)

        entry_attr = root.find('entry').attrib
        commit_attr = root.find('entry/commit').attrib

        relative_url = root.find('entry/relative-url')
        author = root.find('entry/commit/author')
        wcroot_abspath = root.find('entry/wc-info/wcroot-abspath')
        wcinfo_schedule = root.find('entry/wc-info/schedule')
        wcinfo_depth = root.find('entry/wc-info/depth')

        info = {
            'url': root.find('entry/url').text,

            'relative_url': self.__element_text(relative_url),

# TODO(dustin): These are just for backwards-compatibility. Use the ones added
#               below.

            'entry#kind': entry_attr['kind'],
            'entry#path': entry_attr['path'],
            'entry#revision': int(entry_attr['revision']),

            'repository/root': root.find('entry/repository/root').text,
            'repository/uuid': root.find('entry/repository/uuid').text,

            'wc-info/wcroot-abspath': self.__element_text(wcroot_abspath),
            'wc-info/schedule': self.__element_text(wcinfo_schedule),
            'wc-info/depth': self.__element_text(wcinfo_depth),
            'commit/author': self.__element_text(author),

            'commit/date': dateutil.parser.parse(
                root.find('entry/commit/date').text),
            'commit#revision': int(commit_attr['revision']),
        }

        # Set some more intuitive keys, because no one likes dealing with
        # symbols. However, we retain the old ones to maintain backwards-
        # compatibility.

# TODO(dustin): Should we be casting the integers?
# TODO(dustin): Convert to namedtuple in the next version.

        info['entry_kind'] = info['entry#kind']
        info['entry_path'] = info['entry#path']
        info['entry_revision'] = info['entry#revision']
        info['repository_root'] = info['repository/root']
        info['repository_uuid'] = info['repository/uuid']
        info['wcinfo_wcroot_abspath'] = info['wc-info/wcroot-abspath']
        info['wcinfo_schedule'] = info['wc-info/schedule']
        info['wcinfo_depth'] = info['wc-info/depth']
        info['commit_author'] = info['commit/author']
        info['commit_date'] = info['commit/date']
        info['commit_revision'] = info['commit#revision']

        return info

    def properties(self, rel_path=None):
        """ Return a dictionary with all svn-properties associated with a
            relative path.
        :param rel_path: relative path in the svn repo to query the
                         properties from
        :returns: a dictionary with the property name as key and the content
                  as value
        """

        full_url_or_path = self.__url_or_path
        if rel_path is not None:
            full_url_or_path += '/' + rel_path

        result = self.run_command(
            'proplist',
            ['--xml', full_url_or_path],
            do_combine=True)

        # query the proper list of this path
        root = xml.etree.ElementTree.fromstring(result)
        target_elem = root.find('target')
        property_names = [p.attrib["name"]
                          for p in target_elem.findall('property')]

        # now query the content of each propery
        property_dict = {}

        for property_name in property_names:
            result = self.run_command(
                'propget',
                ['--xml', property_name, full_url_or_path, ],
                do_combine=True)
            root = xml.etree.ElementTree.fromstring(result)
            target_elem = root.find('target')
            property_elem = target_elem.find('property')
            property_dict[property_name] = property_elem.text

        return property_dict

    def cat(self, rel_filepath, revision=None):
        cmd = []
        if revision is not None:
            cmd += ['-r', str(revision)]
        cmd += [self.__url_or_path + '/' + rel_filepath]
        return self.run_command('cat', cmd, return_binary=True)

    def log_default(self, timestamp_from_dt=None, timestamp_to_dt=None,
                    limit=None, rel_filepath=None, stop_on_copy=False,
                    revision_from=None, revision_to=None, changelist=False,
                    use_merge_history=False):
        """Allow for the most-likely kind of log listing: the complete list,
        a FROM and TO timestamp, a FROM timestamp only, or a quantity limit.
        """

        full_url_or_path = self.__url_or_path
        if rel_filepath is not None:
            full_url_or_path += '/' + rel_filepath

        timestamp_from_phrase = ('{' + timestamp_from_dt.isoformat() + '}') \
            if timestamp_from_dt \
            else ''

        timestamp_to_phrase = ('{' + timestamp_to_dt.isoformat() + '}') \
            if timestamp_to_dt \
            else ''

        args = []

        if timestamp_from_phrase or timestamp_to_phrase:
            if not timestamp_from_phrase:
                raise ValueError("The default log retriever can not take a TO "
                                 "timestamp without a FROM timestamp.")

            if not timestamp_to_phrase:
                timestamp_to_phrase = 'HEAD'

            args += ['-r', timestamp_from_phrase + ':' + timestamp_to_phrase]

        if revision_from or revision_to:
            if timestamp_from_phrase or timestamp_to_phrase:
                raise ValueError("The default log retriever can not take both "
                                 "timestamp and revision number ranges.")

            if not revision_from:
                revision_from = '1'

            if not revision_to:
                revision_to = 'HEAD'

            args += ['-r', str(revision_from) + ':' + str(revision_to)]

        if limit is not None:
            args += ['-l', str(limit)]

        if stop_on_copy is True:
            args += ['--stop-on-copy']

        if use_merge_history is True:
            args += ['--use-merge-history']

        if changelist is True:
            args += ['--verbose']

        result = self.run_command(
            'log',
            args + ['--xml', full_url_or_path],
            do_combine=True)

        root = xml.etree.ElementTree.fromstring(result)
        named_fields = ['date', 'msg', 'revision', 'author', 'changelist']
        c = collections.namedtuple(
            'LogEntry',
            named_fields)

        # Merge history can create nested log entries, so use iter instead of findall
        for e in root.iter('logentry'):
            entry_info = {x.tag: x.text for x in e.getchildren()}

            date = None
            date_text = entry_info.get('date')
            if date_text is not None:
                date = dateutil.parser.parse(date_text)

            log_entry = {
                'msg': entry_info.get('msg'),
                'author': entry_info.get('author'),
                'revision': int(e.get('revision')),
                'date': date
            }

            if changelist is True:
                cl = []
                for ch in e.findall('paths/path'):
                    cl.append((ch.attrib['action'], ch.text))

                log_entry['changelist'] = cl
            else:
                log_entry['changelist'] = None

            yield c(**log_entry)

    def export(self, to_path, revision=None, force=False):
        cmd = []

        if revision is not None:
            cmd += ['-r', str(revision)]

        cmd += [self.__url_or_path, to_path]
        cmd.append('--force') if force else None

        self.run_command('export', cmd)

    def list(self, extended=False, rel_path=None):
        full_url_or_path = self.__url_or_path
        if rel_path is not None:
            full_url_or_path += '/' + rel_path

        if extended is False:
            for line in self.run_command(
                    'ls',
                    [full_url_or_path]):
                line = line.strip()
                if line:
                    yield line

        else:
            raw = self.run_command(
                'ls',
                ['--xml', full_url_or_path],
                do_combine=True)

            root = xml.etree.ElementTree.fromstring(raw)

            list_ = root.findall('list/entry')
            for entry in list_:
                entry_attr = entry.attrib

                kind = entry_attr['kind']
                name = entry.find('name').text

                size = entry.find('size')

                # This will be None for directories.
                if size is not None:
                    size = int(size.text)

                commit_node = entry.find('commit')

                author = commit_node.find('author').text
                date = dateutil.parser.parse(commit_node.find('date').text)

                commit_attr = commit_node.attrib
                revision = int(commit_attr['revision'])

# TODO(dustin): Convert this to a namedtuple in the next version.
                entry = {
                    'kind': kind,

                    # To decouple people from the knowledge of the value.
                    'is_directory': kind == svn.constants.K_DIR,

                    'name': name,
                    'size': size,
                    'author': author,
                    'date': date,

                    # Our approach to normalizing a goofy field-name.
                    'timestamp': date,

                    'commit_revision': revision,
                }

                yield entry

    def list_recursive(self, rel_path=None, yield_dirs=False,
                       path_filter_cb=None):
        q = [rel_path]
        while q:
            current_rel_path = q[0]
            del q[0]

            for entry in self.list(extended=True, rel_path=current_rel_path):
                if entry['is_directory'] is True:
                    if current_rel_path is not None:
                        next_rel_path = \
                            os.path.join(current_rel_path, entry['name'])
                    else:
                        next_rel_path = entry['name']

                    do_queue = True
                    if path_filter_cb is not None:
                        result = path_filter_cb(next_rel_path)
                        if result is False:
                            do_queue = False

                    if do_queue is True:
                        q.append(next_rel_path)

                if entry['is_directory'] is False or yield_dirs is True:
                    current_rel_path_phrase = current_rel_path \
                        if current_rel_path is not None \
                        else ''

                    yield (current_rel_path_phrase, entry)

    def diff_summary(self, old, new, rel_path=None):
        """Provides a summarized output of a diff between two revisions
        (file, change type, file type)
        """

        full_url_or_path = self.__url_or_path
        if rel_path is not None:
            full_url_or_path += '/' + rel_path

        arguments = [
            '--old', '{0}@{1}'.format(full_url_or_path, old),
            '--new', '{0}@{1}'.format(full_url_or_path, new),
            '--summarize',
            '--xml',
        ]

        result = self.run_command(
            'diff',
            arguments,
            do_combine=True)

        root = xml.etree.ElementTree.fromstring(result)

        diff = []
        for element in root.findall('paths/path'):
            diff.append({
                'path': element.text,
                'item': element.attrib['item'],
                'kind': element.attrib['kind'],
            })

        return diff

    def diff(self, old, new, rel_path=None):
        """Provides output of a diff between two revisions (file, change type,
        file type)
        """

        full_url_or_path = self.__url_or_path
        if rel_path is not None:
            full_url_or_path += '/' + rel_path

        arguments = [
            '--old', '{0}@{1}'.format(full_url_or_path, old),
            '--new', '{0}@{1}'.format(full_url_or_path, new),
        ]

        diff_result = \
            self.run_command(
            'diff', arguments,
            do_combine=True)

        diff_result = diff_result.strip()

        # Split the hunks.

        # Index: /tmp/testsvnwc/bb
        # ===================================================================
        # --- /tmp/testsvnwc/bb   (nonexistent)
        # +++ /tmp/testsvnwc/bb   (revision 3)
        # @@ -0,0 +1 @@
        # +Sat Feb  1 03:14:10 EST 2020
        # Index: /tmp/testsvnwc/cc
        # ===================================================================
        # --- /tmp/testsvnwc/cc   (nonexistent)
        # +++ /tmp/testsvnwc/cc   (revision 3)
        # @@ -0,0 +1 @@
        # +Sat Feb  1 03:14:27 EST 2020

        hunks = {}

        def _process_hunk(file_hunk_raw):
            # hunks_info will be `None` for file-adds.
            filepath, hunks_info = self._split_file_hunk(file_hunk_raw)

            hunks[filepath] = hunks_info

        while True:
            if not diff_result:
                break

            assert \
                diff_result.startswith(_FILE_HUNK_PREFIX), \
                "Diff output doesn't start with 'Index:':\n{}".format(
                diff_result)

            try:
                next_index = diff_result.index(_FILE_HUNK_PREFIX, 1)
            except ValueError:
                _process_hunk(diff_result)
                break

            file_hunk_raw, diff_result = diff_result[:next_index], diff_result[next_index:]

            _process_hunk(file_hunk_raw)

        return hunks

    def _split_file_hunk(self, file_hunk):
        # Parse the filename out of the header and drop the header from the
        # hunk.

        lines = file_hunk.split('\n')

        # Index: /tmp/testsvnwc/bb
        # ===================================================================
        filepath = lines[0][len(_FILE_HUNK_PREFIX):]

        # File was added. We have the file-hunk header but no actual hunks.
        if len(lines) == 3:
            assert \
                lines[2] == '', \
                "Empty diff expects third line to be empty:\n{}".format(lines)

            return filepath, None

        assert \
            lines[2].startswith(_HUNK_HEADER_LEFT_PREFIX), \
            "Could not find 'left' header prefix: [{}]".format(lines[2])

        assert \
            lines[3].startswith(_HUNK_HEADER_RIGHT_PREFIX), \
            "Could not find 'right' header prefix: [{}]".format(lines[3])

        # --- /tmp/testsvnwc/cc   (revision 5)
        # +++ /tmp/testsvnwc/cc   (revision 6)
        # @@ -33,6 +33,7 @@
        #  nova/sdk/certgen   developertools/certgen  master
        #  nova/apps/search   nova/apps/search    develop
        #  external/conscrypt nvidia/android/platform/external/conscrypt  ml-t186-n-dev
        # +testline1
        #  vendor/nvidia/tegra/adsp/adsp-t21x nvidia/tegra/adsp/adsp-t21x ml/rel-28r10
        #  device/generic/armv7-a-neon    nvidia/android/device/generic/armv7-a-neon  ml-t186-n-dev
        #  robot_test_apps/unity_robot_test   ml/unity_robot_test master
        # @@ -236,6 +237,7 @@
        #  hardware/nvidia/soc/t18x   nvidia/device/hardware/nvidia/soc/t18x  ml-t186-n-dev
        #  vendor/nvidia/tegra/multimedia nvidia/tegra/prebuilts-multimedia-headers-standard  ml-t186-n-dev
        #  external/c-ares    nvidia/android/platform/external/c-ares ml-t186-n-dev
        # +testline2
        #  external/chromium-webview  nvidia/android/platform/external/chromium-webview   ml-t186-n-dev
        #  nova/apps/landscapemanager nova/apps/landscapemanager  master
        #  external/bzip2 nvidia/android/platform/external/bzip2  ml-t186-n-dev

        file_hunk_left_phrase = lines[2][len(_HUNK_HEADER_LEFT_PREFIX):].split('\t')
        file_hunk_right_phrase = lines[3][len(_HUNK_HEADER_RIGHT_PREFIX):].split('\t')

        lines = lines[4:]

        hunks = []
        while True:
            if not lines:
                break

            # @@ -33,6 +33,7 @@
            #  nova/sdk/certgen   developertools/certgen  master
            #  nova/apps/search   nova/apps/search    develop
            #  external/conscrypt nvidia/android/platform/external/conscrypt  ml-t186-n-dev
            # +testline1
            #  vendor/nvidia/tegra/adsp/adsp-t21x nvidia/tegra/adsp/adsp-t21x ml/rel-28r10
            #  device/generic/armv7-a-neon    nvidia/android/device/generic/armv7-a-neon  ml-t186-n-dev
            #  robot_test_apps/unity_robot_test   ml/unity_robot_test master

            hunk_lines_phrase = lines[0]
            lines = lines[1:]

            hunk_lines = []
            for line in lines:
                # We've run into the next hunk.
                if line.startswith(_HUNK_HEADER_LINE_NUMBERS_PREFIX) is True:
                    break

                hunk_lines.append(line)

            lines = lines[len(hunk_lines):]

            hunks.append({
                'lines_phrase': hunk_lines_phrase,
                'body': '\n'.join(hunk_lines),
            })

        hunks_info = {
            'left_phrase': file_hunk_left_phrase,
            'right_phrase': file_hunk_right_phrase,
            'hunks': hunks,
        }

        return filepath, hunks_info

    @property
    def url(self):
        if self.__type != svn.constants.LT_URL:
            raise EnvironmentError(
                "Only the remote-client has access to the URL.")

        return self.__url_or_path

    @property
    def path(self):
        if self.__type != svn.constants.LT_PATH:
            raise EnvironmentError(
                "Only the local-client has access to the path.")

        return self.__url_or_path