Source code for hexrec.utils

# Copyright (c) 2013-2024, Andrea Zoppi
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
#    this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

r"""Generic utility functions."""

import binascii
import re
import sys
from typing import Any
from typing import Iterator
from typing import Mapping
from typing import Optional
from typing import Sequence
from typing import Union

from bytesparse import MemoryIO
from bytesparse.base import Address
from bytesparse.base import ImmutableMemory

from .base import AnyBytes
from .base import EllipsisType

SUFFIX_SCALE: Mapping[str, int] = {
    'k': 2**10,
    'm': 2**20,
    'g': 2**30,
    't': 2**40,
    'p': 2**50,
    'e': 2**60,
    'z': 2**70,
    'y': 2**80,

    'kib': 2**10,
    'mib': 2**20,
    'gib': 2**30,
    'tib': 2**40,
    'pib': 2**50,
    'eib': 2**60,
    'zib': 2**70,
    'yib': 2**80,

    'kb': 10**3,
    'mb': 10**6,
    'gb': 10**9,
    'tb': 10**12,
    'pb': 10**15,
    'eb': 10**18,
    'zb': 10**21,
    'yb': 10**24,
}
r"""Integer suffix to scale factor."""

INT_REGEX = re.compile(r'^\s*(?P<sign>[+-]?)\s*'
                       r'(?P<prefix>(0x|0b|0o|0)?)'
                       r'(?P<value>[a-f0-9]+)'
                       r'(?P<suffix>h?)'
                       r'\s*(?P<scale>('
                       r'k|m|g|t|p|e|z|y|'
                       r'kib|mib|gib|tib|pib|eib|zib|yib|'
                       r'kb|mb|gb|tb|pb|eb|zb|yb'
                       r')?)\s*$')

DEFAULT_DELETE: bytes = b' \t.-:\r\n'
r"""Delete from hex strings.

Default values to delete from hexadecimal strings via :meth:`unhexlify`.
These are commonly used as byte separators or whitespace in hex strings.
"""

__BINASCII_HEXLIFY_HAS_SEP = (sys.version_info >= (3, 8))



[docs]
def chop(
    vector: AnyBytes,
    window: int,
    align_base: int = 0,
) -> Iterator[AnyBytes]:
    r"""Chops a vector.

    Iterates through the vector grouping its items into windows.

    Args:
        vector (items):
            Vector to chop.

        window (int):
            Window length.

        align_base (int):
            Offset of the first window.

    Yields:
        list or items: `vector` slices of up to `window` elements.

    Examples:
        >>> list(chop(b'ABCDEFG', 2))
        ['AB', 'CD', 'EF', 'G']

        >>> b':'.join(chop(b'ABCDEFG', 2))
        b'AB:CD:EF:G'

        >>> list(chop(b'ABCDEFG', 4, 3))
        [b'A', b'BCDE', b'FG']
    """
    window = int(window)
    if window <= 0:
        raise ValueError('non-positive window')

    align_base = int(align_base)
    if align_base:
        offset = -align_base % window
        chunk = vector[:offset]
        yield chunk
    else:
        offset = 0

    for i in range(offset, len(vector), window):
        yield vector[i:(i + window)]




[docs]
def hexlify(
    bytestr: Union[bytes, bytearray],
    sep: Optional[Union[bytes, bytearray]] = None,
    upper: bool = True,
) -> bytes:
    r"""Converts raw bytes into a hexadecimal byte string.

    Args:
        bytestr (bytes):
            Source byte string.

        sep (bytes):
            Optional byte separator.

        upper (bool):
            Uppercase hexadecimal string.

    Returns:
        bytes: Hexadecimal byte string.

    Examples:
        >>> from hexrec.utils import hexlify
        >>> hexlify(b'\xAA\xBB\xCC')
        b'AABBCC'
        >>> hexlify(b'\xAA\xBB\xCC', sep=b' ')
        b'AA BB CC'
        >>> hexlify(b'\xAA\xBB\xCC', sep=b'-')
        b'AA-BB-CC'
        >>> hexlify(b'\xAA\xBB\xCC', upper=False)
        b'aabbcc'
    """

    if sep:
        pass  # coverage
        if __BINASCII_HEXLIFY_HAS_SEP:  # pragma: no cover
            hexstr = binascii.hexlify(bytestr, sep)
        else:  # pragma: no cover
            hexstr = sep.join(b'%02x' % b for b in bytestr)
    else:
        hexstr = binascii.hexlify(bytestr)

    if upper:
        hexstr = hexstr.upper()

    return hexstr




[docs]
def parse_int(
    value: Union[str, Any],
) -> Optional[int]:
    r"""Parses an integer.

    Args:
        value:
            A generic object to convert to integer.
            In case `value` is a :obj:`str` (case-insensitive), it can be
            either prefixed with ``0x`` or postfixed with ``h`` to convert
            from a hexadecimal representation, or prefixed with ``0b`` from
            binary; a prefix of only ``0`` converts from octal.
            A further suffix applies a scale factor as per
            :data:`SUFFIX_SCALE`.
            A ``None`` value evaluates as ``None``.
            Any other object class will call the standard :func:`int`.

    Returns:
        int: None if `value` is ``None``, its integer conversion otherwise.

    Examples:
        >>> parse_int('-0xABk')
        -175104

        >>> parse_int(None) is None
        True

        >>> parse_int(123)
        123

        >>> parse_int(135.7)
        135
    """
    if value is None:
        return None

    elif isinstance(value, str):
        value = value.lower()
        m = INT_REGEX.match(value)
        if not m:
            raise ValueError(f'invalid syntax: {value!r}')
        g = m.groupdict()
        sign = g['sign']
        prefix = g['prefix']
        value = g['value']
        suffix = g['suffix']
        scale = g['scale']
        if prefix in ('0b', '0o') and suffix == 'h':
            raise ValueError(f'invalid syntax: {value!r}')

        if prefix == '0x' or suffix == 'h':
            i = int(value, 16)
        elif prefix == '0b':
            i = int(value, 2)
        elif prefix == '0' or prefix == '0o':
            i = int(value, 8)
        else:
            i = int(value, 10)

        i *= SUFFIX_SCALE.get((scale or '').lower(), 1)

        if sign == '-':
            i = -i

        return i

    else:
        return int(value)




[docs]
def unhexlify(
    hexstr: Union[bytes, bytearray],
    delete: Optional[Union[bytes, bytearray, EllipsisType]] = None,
) -> bytes:
    r"""Converts a hexadecimal byte string into raw bytes.

    If `delete`, its byte values are deleted from `hexstr` before evaluation.
    Useful to remove whitespace and separators.

    Args:
        hexstr (bytes):
            Source hexadecimal byte string.

        delete (bytes):
            If empty or ``None``, no deletion occurs.
            If ``Ellipsis``, :data:`DEFAULT_DELETE` is used.

    Returns:
        bytes: Raw byte string.

    Examples:
        >>> from hexrec.utils import unhexlify
        >>> unhexlify(b'AABBCC')
        b'\xaa\xbb\xcc'
        >>> unhexlify(b'AA BB CC', delete=...)
        b'\xaa\xbb\xcc'
        >>> unhexlify(b'AA-BB-CC', delete=...)
        b'\xaa\xbb\xcc'
        >>> unhexlify(b'AA/BB/CC', delete=b'/')
        b'\xaa\xbb\xcc'
    """

    if delete:
        if delete is Ellipsis:
            delete = DEFAULT_DELETE
        hexstr = hexstr.translate(None, delete)

    bytestr = binascii.unhexlify(hexstr)
    return bytestr




[docs]
class SparseMemoryIO(MemoryIO):
    r"""Sparse memory I/O wrapper.

    With respect to the parent class :class:`bytesparse.io.MemoryIO`, it allows
    reading and writing memory *holes*.

    Such holes are marked by the following integer values (instead of ``None``):

    * ``0x100`` = hole byte within memory span
        (:attr:`bytesparse.base.ImmutableMemory.span`);

    * ``0x101`` = hole byte before memory start address
        (:attr:`bytesparse.base.ImmutableMemory.start`);

    * ``0x102`` = hole byte after memory end address
        (:attr:`bytesparse.base.ImmutableMemory.endex`);

    These special values allow displaying dedicated stuff when dumping memory
    data to standard output.

    See Also:
        :class:`bytesparse.io.MemoryIO`
        :attr:`bytesparse.base.ImmutableMemory.span`
        :attr:`bytesparse.base.ImmutableMemory.start`
        :attr:`bytesparse.base.ImmutableMemory.endex`
    """


[docs]
    def read(
        self,
        size: Optional[Address] = -1,
        asmemview: bool = False,
    ) -> Union[bytes, memoryview, Address, Sequence[int]]:

        if asmemview:
            raise ValueError('memory view not supported')

        memory = self._memory
        start = self._position
        if start >= memory.endex:
            return b''
        endex = None if size < 0 else start + size
        buffer = b''
        try:
            buffer = memory.view(start=start, endex=endex)
            contiguous = True
        except ValueError:
            contiguous = False

        if contiguous:
            size = len(buffer)
        else:
            buffer = list(memory.values(start=start, endex=endex))
            size = len(buffer)
            offset_start = memory.start - start
            offset_endex = memory.endex - start

            for offset in range(size):
                if buffer[offset] is None:
                    if offset < offset_start:
                        buffer[offset] = 0x101  # before
                    elif offset >= offset_endex:
                        buffer[offset] = 0x102  # after
                    else:
                        buffer[offset] = 0x100  # within

        self._position = start + size
        return buffer



[docs]
    def write(
        self,
        buffer: Union[AnyBytes, ImmutableMemory, int, Sequence[int]],
    ) -> Address:

        if isinstance(buffer, (bytes, bytearray, memoryview, ImmutableMemory, int)):
            return super().write(buffer)

        memory = self._memory
        start = self._position
        size = len(buffer)

        for offset in range(size):
            value = buffer[offset]
            memory.poke(start + offset, value if value < 0x100 else None)

        self._position = start + size
        return size