# -*- coding: utf-8 -*- """ asammdf utility functions and classes """ from collections import namedtuple from io import BytesIO import logging from pathlib import Path from random import randint import re import string from struct import Struct import subprocess import sys from tempfile import TemporaryDirectory import xml.etree.ElementTree as ET from cchardet import detect import numpy as np from numpy import arange, interp, where from numpy.core.records import fromarrays from pandas import Series from . import v2_v3_constants as v3c from . import v4_constants as v4c try: from canmatrix.dbc import load as dbc_load from canmatrix.arxml import load as arxml_load except ModuleNotFoundError: from canmatrix.formats.dbc import load as dbc_load from canmatrix.formats.arxml import load as arxml_load UINT8_u = Struct("<B").unpack UINT16_u = Struct("<H").unpack UINT32_u = Struct("<I").unpack UINT64_u = Struct("<Q").unpack UINT8_uf = Struct("<B").unpack_from UINT16_uf = Struct("<H").unpack_from UINT32_uf = Struct("<I").unpack_from UINT64_uf = Struct("<Q").unpack_from FLOAT64_u = Struct("<d").unpack FLOAT64_uf = Struct("<d").unpack_from TWO_UINT64_u = Struct("<2Q").unpack TWO_UINT64_uf = Struct("<2Q").unpack_from _xmlns_pattern = re.compile(' xmlns="[^"]*"') logger = logging.getLogger("asammdf") __all__ = [ "CHANNEL_COUNT", "CONVERT", "MERGE", "ChannelsDB", "UniqueDB", "MdfException", "SignalSource", "get_fmt_v3", "get_fmt_v4", "get_text_v4", "fmt_to_datatype_v3", "fmt_to_datatype_v4", "matlab_compatible", "extract_cncomment_xml", "validate_version_argument", "MDF2_VERSIONS", "MDF3_VERSIONS", "MDF4_VERSIONS", "SUPPORTED_VERSIONS", ] CHANNEL_COUNT = (1000, 2000, 10000, 20000) _channel_count = arange(0, 20000, 1000, dtype="<u4") CONVERT = (10 * 2 ** 20, 20 * 2 ** 20, 30 * 2 ** 20, 40 * 2 ** 20) CONVERT = interp(_channel_count, CHANNEL_COUNT, CONVERT).astype("<u4") MERGE = (10 * 2 ** 20, 20 * 2 ** 20, 35 * 2 ** 20, 60 * 2 ** 20) MERGE = interp(_channel_count, CHANNEL_COUNT, MERGE).astype("<u4") CHANNEL_COUNT = _channel_count MDF2_VERSIONS = ("2.00", "2.10", "2.14") MDF3_VERSIONS = ("3.00", "3.10", "3.20", "3.30") MDF4_VERSIONS = ("4.00", "4.10", "4.11", "4.20") SUPPORTED_VERSIONS = MDF2_VERSIONS + MDF3_VERSIONS + MDF4_VERSIONS ALLOWED_MATLAB_CHARS = set(string.ascii_letters + string.digits + "_") class MdfException(Exception): """MDF Exception class""" pass def extract_cncomment_xml(comment): """extract *TX* tag or otherwise the *common_properties* from a xml comment Paremeters ---------- comment : str xml string comment Returns ------- comment : str extracted string """ comment = comment.replace(' xmlns="http://www.asam.net/mdf/v4"', "") try: comment = ET.fromstring(comment) match = comment.find(".//TX") if match is None: common_properties = comment.find(".//common_properties") if common_properties is not None: comment = [] for e in common_properties: field = f'{e.get("name")}: {e.text}' comment.append(field) comment = "\n".join(field) else: comment = "" else: comment = match.text or "" except ET.ParseError: pass return comment def matlab_compatible(name): """ make a channel name compatible with Matlab variable naming Parameters ---------- name : str channel name Returns ------- compatible_name : str channel name compatible with Matlab """ compatible_name = [ch if ch in ALLOWED_MATLAB_CHARS else "_" for ch in name] compatible_name = "".join(compatible_name) if compatible_name[0] not in string.ascii_letters: compatible_name = "M_" + compatible_name # max variable name is 63 and 3 chars are reserved # for get_unique_name in case of multiple channel name occurence return compatible_name[:60] def get_text_v3(address, stream, mapped=False, decode=True): """ faster way to extract strings from mdf versions 2 and 3 TextBlock Parameters ---------- address : int TextBlock address stream : handle file IO handle Returns ------- text : str unicode string """ if address == 0: return "" if decode else b"" if mapped: block_id = stream[address: address+2] if block_id != b'TX': return "" if decode else b"" (size,) = UINT16_uf(stream, address + 2) text_bytes = stream[address + 4 : address + size].strip(b" \r\t\n\0") else: stream.seek(address) block_id = stream.read(2) if block_id != b'TX': return "" if decode else b"" size = UINT16_u(stream.read(2))[0] - 4 text_bytes = stream.read(size).strip(b" \r\t\n\0") if decode: try: text = text_bytes.decode("latin-1") except UnicodeDecodeError: encoding = detect(text_bytes)["encoding"] text = text_bytes.decode(encoding, "ignore") else: text = text_bytes return text def get_text_v4(address, stream, mapped=False, decode=True): """ faster way to extract strings from mdf version 4 TextBlock Parameters ---------- address : int TextBlock address stream : handle file IO handle Returns ------- text : str unicode string """ if address == 0: return "" if decode else b"" if mapped: block_id = stream[address: address+4] if block_id not in (b'##TX', b'##MD'): return "" if decode else b"" (size,) = UINT64_uf(stream, address + 8) text_bytes = stream[address + 24 : address + size].strip(b" \r\t\n\0") else: stream.seek(address) block_id = stream.read(8)[:4] if block_id not in (b'##TX', b'##MD'): return "" if decode else b"" size, _ = TWO_UINT64_u(stream.read(16)) text_bytes = stream.read(size - 24).strip(b" \r\t\n\0") if decode: try: text = text_bytes.decode("utf-8") except UnicodeDecodeError: encoding = detect(text_bytes)["encoding"] text = text_bytes.decode(encoding, "ignore") else: text = text_bytes return text def sanitize_xml(text): return re.sub(_xmlns_pattern, "", text) def get_fmt_v3(data_type, size, byte_order=v3c.BYTE_ORDER_INTEL): """convert mdf versions 2 and 3 channel data type to numpy dtype format string Parameters ---------- data_type : int mdf channel data type size : int data bit size Returns ------- fmt : str numpy compatible data type format string """ if data_type in (v3c.DATA_TYPE_STRING, v3c.DATA_TYPE_BYTEARRAY): size = size // 8 if data_type == v3c.DATA_TYPE_STRING: fmt = f"S{size}" else: fmt = f"({size},)u1" else: if size > 64 and data_type in ( v3c.DATA_TYPE_UNSIGNED_INTEL, v3c.DATA_TYPE_UNSIGNED, v3c.DATA_TYPE_UNSIGNED_MOTOROLA, ): fmt = f"({size // 8},)u1" else: if size <= 8: size = 1 elif size <= 16: size = 2 elif size <= 32: size = 4 elif size <= 64: size = 8 else: size = size // 8 if data_type == v3c.DATA_TYPE_UNSIGNED_INTEL: fmt = f"<u{size}" elif data_type == v3c.DATA_TYPE_UNSIGNED: if byte_order == v3c.BYTE_ORDER_INTEL: fmt = f"<u{size}" else: fmt = f">u{size}" elif data_type == v3c.DATA_TYPE_UNSIGNED_MOTOROLA: fmt = f">u{size}" elif data_type == v3c.DATA_TYPE_SIGNED_INTEL: fmt = f"<i{size}" elif data_type == v3c.DATA_TYPE_SIGNED: if byte_order == v3c.BYTE_ORDER_INTEL: fmt = f"<i{size}" else: fmt = f">i{size}" elif data_type == v3c.DATA_TYPE_SIGNED_MOTOROLA: fmt = f">i{size}" elif data_type in (v3c.DATA_TYPE_FLOAT_INTEL, v3c.DATA_TYPE_DOUBLE_INTEL): fmt = f"<f{size}" elif data_type in ( v3c.DATA_TYPE_FLOAT_MOTOROLA, v3c.DATA_TYPE_DOUBLE_MOTOROLA, ): fmt = f">f{size}" elif data_type in (v3c.DATA_TYPE_FLOAT, v3c.DATA_TYPE_DOUBLE): if byte_order == v3c.BYTE_ORDER_INTEL: fmt = f"<f{size}" else: fmt = f">f{size}" return fmt def get_fmt_v4(data_type, size, channel_type=v4c.CHANNEL_TYPE_VALUE): """convert mdf version 4 channel data type to numpy dtype format string Parameters ---------- data_type : int mdf channel data type size : int data bit size channel_type: int mdf channel type Returns ------- fmt : str numpy compatible data type format string """ if data_type in v4c.NON_SCALAR_TYPES: size = size // 8 if data_type == v4c.DATA_TYPE_BYTEARRAY: if channel_type == v4c.CHANNEL_TYPE_VALUE: fmt = f"({size},)u1" else: fmt = f"<u{size}" elif data_type in v4c.STRING_TYPES: if channel_type == v4c.CHANNEL_TYPE_VALUE: fmt = f"S{size}" else: fmt = f"<u{size}" elif data_type == v4c.DATA_TYPE_CANOPEN_DATE: fmt = "V7" elif data_type == v4c.DATA_TYPE_CANOPEN_TIME: fmt = "V6" else: if size > 64 and data_type in ( v4c.DATA_TYPE_UNSIGNED_INTEL, v4c.DATA_TYPE_UNSIGNED, ): fmt = f"({size // 8},)u1" else: if size <= 8: size = 1 elif size <= 16: size = 2 elif size <= 32: size = 4 elif size <= 64: size = 8 else: size = size // 8 if data_type == v4c.DATA_TYPE_UNSIGNED_INTEL: fmt = f"<u{size}" elif data_type == v4c.DATA_TYPE_UNSIGNED_MOTOROLA: fmt = f">u{size}" elif data_type == v4c.DATA_TYPE_SIGNED_INTEL: fmt = f"<i{size}" elif data_type == v4c.DATA_TYPE_SIGNED_MOTOROLA: fmt = f">i{size}" elif data_type == v4c.DATA_TYPE_REAL_INTEL: fmt = f"<f{size}" elif data_type == v4c.DATA_TYPE_REAL_MOTOROLA: fmt = f">f{size}" elif data_type == v4c.DATA_TYPE_COMPLEX_INTEL: fmt = f"<c{size}" elif data_type == v4c.DATA_TYPE_COMPLEX_MOTOROLA: fmt = f">c{size}" return fmt def fmt_to_datatype_v3(fmt, shape, array=False): """convert numpy dtype format string to mdf versions 2 and 3 channel data type and size Parameters ---------- fmt : numpy.dtype numpy data type shape : tuple numpy array shape array : bool disambiguate between bytearray and channel array Returns ------- data_type, size : int, int integer data type as defined by ASAM MDF and bit size """ byteorder = fmt.byteorder if byteorder in "=|": byteorder = "<" if sys.byteorder == "little" else ">" size = fmt.itemsize * 8 kind = fmt.kind if not array and shape[1:] and fmt.itemsize == 1 and kind == "u": data_type = v3c.DATA_TYPE_BYTEARRAY for dim in shape[1:]: size *= dim else: if kind == "u": if byteorder in "<": data_type = v3c.DATA_TYPE_UNSIGNED_INTEL else: data_type = v3c.DATA_TYPE_UNSIGNED_MOTOROLA elif kind == "i": if byteorder in "<": data_type = v3c.DATA_TYPE_SIGNED_INTEL else: data_type = v3c.DATA_TYPE_SIGNED_MOTOROLA elif kind == "f": if byteorder in "<": if size == 32: data_type = v3c.DATA_TYPE_FLOAT else: data_type = v3c.DATA_TYPE_DOUBLE else: if size == 32: data_type = v3c.DATA_TYPE_FLOAT_MOTOROLA else: data_type = v3c.DATA_TYPE_DOUBLE_MOTOROLA elif kind in "SV": data_type = v3c.DATA_TYPE_STRING elif kind == "b": data_type = v3c.DATA_TYPE_UNSIGNED_INTEL size = 1 else: message = f"Unknown type: dtype={fmt}, shape={shape}" logger.exception(message) raise MdfException(message) return data_type, size def info_to_datatype_v4(signed, little_endian): """map CAN signal to MDF integer types Parameters ---------- signed : bool signal is flagged as signed in the CAN database little_endian : bool signal is flagged as little endian (Intel) in the CAN database Returns ------- datatype : int integer code for MDF channel data type """ if signed: if little_endian: datatype = v4c.DATA_TYPE_SIGNED_INTEL else: datatype = v4c.DATA_TYPE_SIGNED_MOTOROLA else: if little_endian: datatype = v4c.DATA_TYPE_UNSIGNED_INTEL else: datatype = v4c.DATA_TYPE_UNSIGNED_MOTOROLA return datatype def fmt_to_datatype_v4(fmt, shape, array=False): """convert numpy dtype format string to mdf version 4 channel data type and size Parameters ---------- fmt : numpy.dtype numpy data type shape : tuple numpy array shape array : bool disambiguate between bytearray and channel array Returns ------- data_type, size : int, int integer data type as defined by ASAM MDF and bit size """ byteorder = fmt.byteorder if byteorder in "=|": byteorder = "<" if sys.byteorder == "little" else ">" size = fmt.itemsize * 8 kind = fmt.kind if not array and shape[1:] and fmt.itemsize == 1 and kind == "u": data_type = v4c.DATA_TYPE_BYTEARRAY for dim in shape[1:]: size *= dim else: if kind == "u": if byteorder in "<": data_type = v4c.DATA_TYPE_UNSIGNED_INTEL else: data_type = v4c.DATA_TYPE_UNSIGNED_MOTOROLA elif kind == "i": if byteorder in "<": data_type = v4c.DATA_TYPE_SIGNED_INTEL else: data_type = v4c.DATA_TYPE_SIGNED_MOTOROLA elif kind == "f": if byteorder in "<": data_type = v4c.DATA_TYPE_REAL_INTEL else: data_type = v4c.DATA_TYPE_REAL_MOTOROLA elif kind in "SV": data_type = v4c.DATA_TYPE_STRING_LATIN_1 elif kind == "b": data_type = v4c.DATA_TYPE_UNSIGNED_INTEL size = 1 elif kind == "c": if byteorder in "<": data_type = v4c.DATA_TYPE_COMPLEX_INTEL else: data_type = v4c.DATA_TYPE_COMPLEX_MOTOROLA else: message = f"Unknown type: dtype={fmt}, shape={shape}" logger.exception(message) raise MdfException(message) return data_type, size def as_non_byte_sized_signed_int(integer_array, bit_length): """ The MDF spec allows values to be encoded as integers that aren't byte-sized. Numpy only knows how to do two's complement on byte-sized integers (i.e. int16, int32, int64, etc.), so we have to calculate two's complement ourselves in order to handle signed integers with unconventional lengths. Parameters ---------- integer_array : np.array Array of integers to apply two's complement to bit_length : int Number of bits to sample from the array Returns ------- integer_array : np.array signed integer array with non-byte-sized two's complement applied """ if integer_array.flags.writeable: integer_array &= (1 << bit_length) - 1 # Zero out the unwanted bits truncated_integers = integer_array else: truncated_integers = integer_array & ( (1 << bit_length) - 1 ) # Zero out the unwanted bits return where( truncated_integers >> bit_length - 1, # sign bit as a truth series (True when negative) (2 ** bit_length - truncated_integers) * -1, # when negative, do two's complement truncated_integers, # when positive, return the truncated int ) def debug_channel(mdf, group, channel, dependency, file=None): """ use this to print debug information in case of errors Parameters ---------- mdf : MDF source MDF object group : dict group channel : Channel channel object dependency : ChannelDependency channel dependency object """ print("MDF", "=" * 76, file=file) print("name:", mdf.name, file=file) print("version:", mdf.version, file=file) print("read fragment size:", mdf._read_fragment_size, file=file) print("write fragment size:", mdf._write_fragment_size, file=file) print() parents, dtypes = mdf._prepare_record(group) print("GROUP", "=" * 74, file=file) print("sorted:", group["sorted"], file=file) print("data location:", group["data_location"], file=file) print("data blocks:", group.data_blocks, file=file) print("dependencies", group["channel_dependencies"], file=file) print("parents:", parents, file=file) print("dtypes:", dtypes, file=file) print(file=file) cg = group["channel_group"] print("CHANNEL GROUP", "=" * 66, file=file) print(cg, cg.cycles_nr, cg.samples_byte_nr, cg.invalidation_bytes_nr, file=file) print(file=file) print("CHANNEL", "=" * 72, file=file) print(channel, file=file) print(file=file) print("CHANNEL ARRAY", "=" * 66, file=file) print(dependency, file=file) print(file=file) def count_channel_groups(stream, include_channels=False): """ count all channel groups as fast as possible. This is used to provide reliable progress information when loading a file using the GUI Parameters ---------- stream : file handle opened file handle include_channels : bool also count channels Returns ------- count : int channel group count """ count = 0 ch_count = 0 stream.seek(64) blk_id = stream.read(2) if blk_id == b"HD": version = 3 else: blk_id += stream.read(2) if blk_id == b"##HD": version = 4 else: raise MdfException(f'"{stream.name}" is not a valid MDF file') if version >= 4: stream.seek(88, 0) dg_addr = UINT64_u(stream.read(8))[0] while dg_addr: stream.seek(dg_addr + 32) cg_addr = UINT64_u(stream.read(8))[0] while cg_addr: count += 1 if include_channels: stream.seek(cg_addr + 32) ch_addr = UINT64_u(stream.read(8))[0] while ch_addr: ch_count += 1 stream.seek(ch_addr + 24) ch_addr = UINT64_u(stream.read(8))[0] stream.seek(cg_addr + 24) cg_addr = UINT64_u(stream.read(8))[0] stream.seek(dg_addr + 24) dg_addr = UINT64_u(stream.read(8))[0] else: stream.seek(68, 0) dg_addr = UINT32_u(stream.read(4))[0] while dg_addr: stream.seek(dg_addr + 8) cg_addr = UINT32_u(stream.read(4))[0] while cg_addr: count += 1 if include_channels: stream.seek(cg_addr + 8) ch_addr = UINT32_u(stream.read(4))[0] while ch_addr: ch_count += 1 stream.seek(ch_addr + 4) ch_addr = UINT32_u(stream.read(4))[0] stream.seek(cg_addr + 4) cg_addr = UINT32_u(stream.read(4))[0] stream.seek(dg_addr + 4) dg_addr = UINT32_u(stream.read(4))[0] return count, ch_count def validate_version_argument(version, hint=4): """ validate the version argument against the supported MDF versions. The default version used depends on the hint MDF major revision Parameters ---------- version : str requested MDF version hint : int MDF revision hint Returns ------- valid_version : str valid version """ if version not in SUPPORTED_VERSIONS: if hint == 2: valid_version = "2.14" elif hint == 3: valid_version = "3.30" else: valid_version = "4.10" message = ( 'Unknown mdf version "{}".' " The available versions are {};" ' automatically using version "{}"' ) message = message.format(version, SUPPORTED_VERSIONS, valid_version) logger.warning(message) else: valid_version = version return valid_version class ChannelsDB(dict): def __init__(self, version=4): super().__init__() def add(self, channel_name, entry): """ add name to channels database and check if it contains a source path Parameters ---------- channel_name : str name that needs to be added to the database entry : tuple (group index, channel index) pair """ if channel_name: if channel_name not in self: self[channel_name] = (entry,) else: self[channel_name] += (entry,) if "\\" in channel_name: channel_name, _ = channel_name.split("\\", 1) if channel_name not in self: self[channel_name] = (entry,) else: self[channel_name] += (entry,) def randomized_string(size): """ get a \0 terminated string of size length Parameters ---------- size : int target string length Returns ------- string : bytes randomized string """ return bytes(randint(65, 90) for _ in range(size - 1)) + b"\0" def is_file_like(obj): """ Check if the object is a file-like object. For objects to be considered file-like, they must be an iterator AND have a 'read' and 'seek' method as an attribute. Note: file-like objects must be iterable, but iterable objects need not be file-like. Parameters ---------- obj : The object to check. Returns ------- is_file_like : bool Whether `obj` has file-like properties. Examples -------- >>> buffer(StringIO("data")) >>> is_file_like(buffer) True >>> is_file_like([1, 2, 3]) False """ if not (hasattr(obj, "read") and hasattr(obj, "seek")): return False if not hasattr(obj, "__iter__"): return False return True class UniqueDB(object): def __init__(self): self._db = {} def get_unique_name(self, name): """ returns an available unique name Parameters ---------- name : str name to be made unique Returns ------- unique_name : str new unique name """ if name not in self._db: self._db[name] = 0 return name else: index = self._db[name] self._db[name] = index + 1 return f"{name}_{index}" def cut_video_stream(stream, start, end, fmt): """ cut video stream from `start` to `end` time Parameters ---------- stream : bytes video file content start : float start time end : float end time Returns ------- result : bytes content of cut video """ with TemporaryDirectory() as tmp: in_file = Path(tmp) / f"in{fmt}" out_file = Path(tmp) / f"out{fmt}" in_file.write_bytes(stream) try: ret = subprocess.run( [ "ffmpeg", "-ss", f"{start}", "-i", f"{in_file}", "-to", f"{end}", "-c", "copy", f"{out_file}", ], capture_output=True, ) except FileNotFoundError: result = stream else: if ret.returncode: result = stream else: result = out_file.read_bytes() return result def get_video_stream_duration(stream): with TemporaryDirectory() as tmp: in_file = Path(tmp) / "in" in_file.write_bytes(stream) try: result = subprocess.run( [ "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", f"{in_file}", ], capture_output=True, ) result = float(result.stdout) except FileNotFoundError: result = None return result class Group: __slots__ = ( "channels", "channel_dependencies", "signal_data_size", "signal_data", "channel_group", "record_size", "sorted", "data_group", "data_location", "data_blocks", "record_size", "record", "parents", "types", "signal_types", "trigger", "string_dtypes", "single_channel_dtype", "uses_ld", "read_split_count", ) def __init__(self, data_group): self.data_group = data_group self.channels = [] self.channel_dependencies = [] self.signal_data = [] self.parents = None self.types = None self.record = None self.trigger = None self.string_dtypes = None self.data_blocks = [] self.single_channel_dtype = None self.uses_ld = False self.read_split_count = 0 def __getitem__(self, item): return self.__getattribute__(item) def __setitem__(self, item, value): self.__setattr__(item, value) def set_blocks_info(self, info): self.data_blocks = info def __contains__(self, item): return hasattr(self, item) class VirtualChannelGroup: """ starting with MDF v4.20 it is possible to use remote masters and column oriented storage. This means we now have virtual channel groups that can span over multiple regular channel groups. This class facilitates the handling of this virtual groups """ __slots__ = ( "groups", "record_size", "cycles_nr", ) def __init__(self): self.groups = [] self.record_size = 0 self.cycles_nr = 0 def __repr__(self): return f"VirtualChannelGroup(groups={self.groups}, records_size={self.record_size}, cycles_nr={self.cycles_nr})" def block_fields(obj): fields = [] for attr in dir(obj): if attr[:2] + attr[-2:] == "____": continue try: if callable(getattr(obj, attr)): continue fields.append(f"{attr}:{getattr(obj, attr)}") except AttributeError: continue return fields def components( channel, channel_name, unique_names, prefix="", master=None, only_basenames=False, ): """ yield pandas Series and unique name based on the ndarray object Parameters ---------- channel : numpy.ndarray channel to be used for Series channel_name : str channel name unique_names : UniqueDB unique names object prefix : str prefix used in case of nested recarrays master : np.array optional index for the Series only_basenames (False) : bool use jsut the field names, without prefix, for structures and channel arrays .. versionadded:: 5.13.0 Returns ------- name, series : (str, pandas.Series) tuple of unqiue name and Series object """ names = channel.dtype.names # channel arrays if names[0] == channel_name: name = names[0] if not only_basenames: if prefix: name_ = unique_names.get_unique_name(f"{prefix}.{name}") else: name_ = unique_names.get_unique_name(name) else: name_ = unique_names.get_unique_name(name) values = channel[name] if len(values.shape) > 1: values = list(values) yield name_, Series(values, index=master) for name in names[1:]: values = channel[name] if not only_basenames: axis_name = unique_names.get_unique_name(f"{name_}.{name}") else: axis_name = unique_names.get_unique_name(name) if len(values.shape) > 1: arr = [values] types = [("", values.dtype, values.shape[1:])] values = fromarrays(arr, dtype=types) del arr yield axis_name, Series(values, index=master, dtype="O") # structure composition else: for name in channel.dtype.names: values = channel[name] if values.dtype.names: yield from components( values, name, unique_names, prefix=f"{prefix}.{channel_name}" if prefix else f"{channel_name}", master=master, only_basenames=only_basenames, ) else: if not only_basenames: name_ = unique_names.get_unique_name( f"{prefix}.{channel_name}.{name}" if prefix else f"{channel_name}.{name}" ) else: name_ = unique_names.get_unique_name(name) if len(values.shape) > 1: values = list(values) yield name_, Series(values, index=master) class DataBlockInfo: __slots__ = ( "address", "block_type", "raw_size", "size", "param", "invalidation_block", "block_limit", ) def __init__( self, address, block_type, raw_size, size, param, invalidation_block=None, block_limit=None, ): self.address = address self.block_type = block_type self.raw_size = raw_size self.size = size self.param = param self.invalidation_block = invalidation_block self.block_limit = block_limit def __repr__(self): return ( f"DataBlockInfo(address=0x{self.address:X}, " f"block_type={self.block_type}, " f"raw_size={self.raw_size}, " f"size={self.size}, " f"param={self.param}, " f"invalidation_block={self.invalidation_block}, " f"block_limit={self.block_limit})" ) class InvalidationBlockInfo(DataBlockInfo): __slots__ = ("all_valid",) def __init__( self, address, block_type, raw_size, size, param, all_valid=False, block_limit=None, ): super().__init__(address, block_type, raw_size, size, param, block_limit) self.all_valid = all_valid def __repr__(self): return ( f"InvalidationBlockInfo(address=0x{self.address:X}, " f"block_type={self.block_type}, " f"raw_size={self.raw_size}, " f"size={self.size}, " f"param={self.param}, " f"all_valid={self.all_valid}, " f"block_limit={self.block_limit})" ) class SignalDataBlockInfo: __slots__ = ( "address", "size", "count", "offsets", ) def __init__(self, address, size, count, offsets=None): self.address = address self.count = count self.size = size self.offsets = offsets def __repr__(self): return ( f"SignalDataBlockInfo(address=0x{self.address:X}, " f"size={self.size}, " f"count={self.count}, " f"offsets={self.offsets})" ) def get_fields(obj): fields = [] for attr in dir(obj): if attr[:2] + attr[-2:] == "____": continue try: if callable(getattr(obj, attr)): continue fields.append(attr) except AttributeError: continue return fields # code snippet taken from https://www.kaggle.com/arjanso/reducing-dataframe-memory-size-by-65 def downcast(array): kind = array.dtype.kind if kind == "f": array = array.astype(np.float32) elif kind in "ui": min_ = array.min() max_ = array.max() if min_ >= 0: if max_ < 255: array = array.astype(np.uint8) elif max_ < 65535: array = array.astype(np.uint16) elif max_ < 4294967295: array = array.astype(np.uint32) else: array = array.astype(np.uint64) else: if min_ > np.iinfo(np.int8).min and max_ < np.iinfo(np.int8).max: array = array.astype(np.int8) elif min_ > np.iinfo(np.int16).min and max_ < np.iinfo(np.int16).max: array = array.astype(np.int16) elif min_ > np.iinfo(np.int32).min and max_ < np.iinfo(np.int32).max: array = array.astype(np.int32) elif min_ > np.iinfo(np.int64).min and max_ < np.iinfo(np.int64).max: array = array.astype(np.int64) return array def master_using_raster(mdf, raster, endpoint=False): """ get single master based on the raster Parameters ---------- mdf : asammdf.MDF measurement object raster : float new raster endpoint=False : bool include maximum time stamp in the new master Returns ------- master : np.array new master """ if not raster: master = np.array([], dtype="<f8") else: t_min = [] t_max = [] for group_index in mdf.virtual_groups: group = mdf.groups[group_index] cycles_nr = group.channel_group.cycles_nr if cycles_nr: master_min = mdf.get_master( group_index, record_offset=0, record_count=1, ) if len(master_min): t_min.append(master_min[0]) master_max = mdf.get_master( group_index, record_offset=cycles_nr - 1, record_count=1, ) if len(master_max): t_max.append(master_max[0]) if t_min: t_min = np.amin(t_min) t_max = np.amax(t_max) num = float(np.float32((t_max - t_min) / raster)) if int(num) == num: master = np.linspace(t_min, t_max, int(num) + 1) else: master = np.arange(t_min, t_max, raster) if endpoint: master = np.concatenate([master, [t_max]]) else: master = np.array([], dtype="<f8") return master def extract_signal(signal, payload): vals = payload big_endian = False if signal.is_little_endian else True signed = signal.is_signed start_bit = signal.get_startbit(bit_numbering=1) if big_endian: start_byte = start_bit // 8 bit_count = signal.size pos = start_bit % 8 + 1 over = bit_count % 8 if pos >= over: bit_offset = (pos - over) % 8 else: bit_offset = pos + 8 - over else: start_byte, bit_offset = divmod(start_bit, 8) bit_count = signal.size if big_endian: byte_pos = start_byte + 1 start_pos = start_bit bits = bit_count while True: pos = start_pos % 8 + 1 if pos < bits: byte_pos += 1 bits -= pos start_pos = 7 else: break if byte_pos > vals.shape[1]: raise MdfException( f'Could not extract signal "{signal.name}" with start ' f"bit {start_bit} and bit count {signal.size} " f"from the payload with shape {vals.shape}" ) else: if start_bit + bit_count > vals.shape[1] * 8: raise MdfException( f'Could not extract signal "{signal.name}" with start ' f"bit {start_bit} and bit count {signal.size} " f"from the payload with shape {vals.shape}" ) byte_size, r = divmod(bit_offset + bit_count, 8) if r: byte_size += 1 if byte_size in (1, 2, 4, 8): extra_bytes = 0 else: extra_bytes = 4 - (byte_size % 4) std_size = byte_size + extra_bytes # prepend or append extra bytes columns # to get a standard size number of bytes # print(signal.name, start_bit, bit_offset, start_byte, byte_size) if extra_bytes: if big_endian: vals = np.column_stack( [ vals[:, start_byte : start_byte + byte_size], np.zeros(len(vals), dtype=f"<({extra_bytes},)u1"), ] ) try: vals = vals.view(f">u{std_size}").ravel() except: vals = np.frombuffer(vals.tobytes(), dtype=f">u{std_size}") vals = vals >> (extra_bytes * 8 + bit_offset) vals &= (2 ** bit_count) - 1 else: vals = np.column_stack( [ vals[:, start_byte : start_byte + byte_size], np.zeros(len(vals), dtype=f"<({extra_bytes},)u1"), ] ) try: vals = vals.view(f"<u{std_size}").ravel() except: vals = np.frombuffer(vals.tobytes(), dtype=f"<u{std_size}") vals = vals >> bit_offset vals &= (2 ** bit_count) - 1 else: if big_endian: try: vals = ( vals[:, start_byte : start_byte + byte_size] .view(f">u{std_size}") .ravel() ) except: vals = np.frombuffer( vals[:, start_byte : start_byte + byte_size].tobytes(), dtype=f">u{std_size}", ) vals = vals >> bit_offset vals &= (2 ** bit_count) - 1 else: try: vals = ( vals[:, start_byte : start_byte + byte_size] .view(f"<u{std_size}") .ravel() ) except: vals = np.frombuffer( vals[:, start_byte : start_byte + byte_size].tobytes(), dtype=f"<u{std_size}", ) vals = vals >> bit_offset vals &= (2 ** bit_count) - 1 if signed: vals = as_non_byte_sized_signed_int(vals, bit_count) if (signal.factor, signal.offset) != (1, 0): vals = vals * float(signal.factor) vals += float(signal.offset) return vals def extract_can_signal(signal, payload): return extract_signal(signal, payload) def extract_mux(payload, message, message_id, bus, t, muxer=None, muxer_values=None): """ extract multiplexed CAN signals from the raw payload Parameters ---------- payload : np.ndarray raw CAN payload as numpy array message : canmatrix.Frame CAN message description parsed by canmatrix message_id : int message id bus : int bus channel number t : np.ndarray timestamps for the raw payload muxer (None): str name of the parent multiplexor signal muxer_values (None): np.ndarray multiplexor signal values Returns ------- extracted_signal : dict each value in the dict is a list of signals that share the same multiplexors """ if muxer is None: if message.is_multiplexed: for sig in message: if sig.multiplex == "Multiplexor" and sig.muxer_for_signal is None: multiplexor_name = sig.name break for sig in message: if ( sig.multiplex not in (None, "Multiplexor") and sig.muxer_for_signal is None ): sig.muxer_for_signal = multiplexor_name sig.mux_val_min = sig.mux_val_max = int(sig.multiplex) sig.mux_val_grp.insert(0, (int(sig.multiplex), int(sig.multiplex))) extracted_signals = {} if message.size > payload.shape[1]: return extracted_signals pairs = {} for signal in message: if signal.muxer_for_signal == muxer: try: entry = signal.mux_val_min, signal.mux_val_max except: entry = tuple(signal.mux_val_grp[0]) if signal.mux_val_grp else (0, 0) pair_signals = pairs.setdefault(entry, []) pair_signals.append(signal) for pair, pair_signals in pairs.items(): entry = bus, message_id, muxer, *pair extracted_signals[entry] = signals = {} if muxer_values is not None: min_, max_ = pair idx = np.argwhere((min_ <= muxer_values) & (muxer_values <= max_)).ravel() payload_ = payload[idx] t_ = t[idx] else: t_ = t payload_ = payload for sig in pair_signals: samples = extract_signal(sig, payload_) if len(samples) == 0 and len(t_): continue max_val = np.full(len(samples), float(sig.calc_max())) signals[sig.name] = { "name": sig.name, "comment": sig.comment or "", "unit": sig.unit or "", "samples": samples, "t": t_, "invalidation_bits": np.isclose(samples, max_val), } if sig.multiplex == "Multiplexor": extracted_signals.update( extract_mux( payload_, message, message_id, bus, t_, muxer=sig.name, muxer_values=samples, ) ) return extracted_signals def csv_int2hex(val): """ format CAN id as hex 100 -> 64 """ return f"{val:X}" csv_int2hex = np.vectorize(csv_int2hex, otypes=[str]) def csv_bytearray2hex(val): """ format CAN payload as hex strings b'\xa2\xc3\x08' -> A2 C3 08 """ val = val.tobytes().hex().upper() vals = [val[i : i + 2] for i in range(0, len(val), 2)] return " ".join(vals) csv_bytearray2hex = np.vectorize(csv_bytearray2hex, otypes=[str]) def pandas_query_compatible(name): """ adjust column name for usage in dataframe query string """ for c in ".$[]: ": name = name.replace(c, "_") if name.startswith(tuple(string.digits)): name = "file_" + name try: exec(f"from pandas import {name}") except ImportError: pass else: name = f"{name}__" return name def load_can_database(file, contents=None, **kwargs): file = Path(file) dbc = None if file.suffix.lower() in (".dbc", ".arxml") or contents: if contents is None and file.exists(): contents = file.read_bytes() if contents: import_type = file.suffix.lower().strip(".") loads = dbc_load if import_type == "dbc" else arxml_load contents = BytesIO(contents) try: try: dbc = loads(contents, import_type=import_type, key="db", **kwargs) except UnicodeDecodeError: encoding = detect(contents)["encoding"] contents = contents.decode(encoding) dbc = loads( contents, importType=import_type, import_type=import_type, key="db", encoding=encoding, ) except: raise dbc = None if isinstance(dbc, dict): if dbc: first_bus = list(dbc)[0] dbc = dbc[first_bus] else: dbc = None return dbc def all_blocks_addresses(obj): pattern = re.compile( rb"(?P<block>##(D[GVTZIL]|AT|C[AGHNC]|EV|FH|HL|LD|MD|R[DVI]|S[IRD]|TX))", re.DOTALL | re.MULTILINE, ) try: obj.seek(0) except: pass try: match_starts = [match.start() for match in re.finditer(pattern, obj)] except TypeError: """ TypeError: expected string or bytes-like object when reading PyFilesystem concrete class from S3. """ match_starts = [match.start() for match in re.finditer(pattern, obj.read())] return match_starts