Refactor code structure for improved readability and maintainability

This commit is contained in:
claudi 2026-04-07 09:10:53 +02:00
parent 389d72a136
commit aa4c067ea8
1685 changed files with 393439 additions and 71932 deletions

View file

@ -0,0 +1,95 @@
"""
This module provides private data for the base implementation for the
:module:`re2` library.
WARNING: The *pathspec._backends.re2* package is not part of the public API. Its
contents and structure are likely to change.
"""
from __future__ import annotations
from dataclasses import (
dataclass)
from typing import (
Optional, # Replaced by `X | None` in 3.10.
Union) # Replaced by `X | Y` in 3.10.
try:
import re2
re2_error = None
except ModuleNotFoundError as e:
re2 = None
re2_error = e
RE2_OPTIONS = None
else:
# Both the `google-re2` and `pyre2` libraries use the `re2` namespace.
# `google-re2` is the only one currently supported.
try:
RE2_OPTIONS = re2.Options()
RE2_OPTIONS.log_errors = False
RE2_OPTIONS.never_capture = True
except Exception as e:
re2_error = e
RE2_OPTIONS = None
RE2_OPTIONS: re2.Options
"""
The re2 options to use:
- `log_errors=False` disables logging to stderr.
- `never_capture=True` disables capture groups because they effectively cannot
be utilized with :class:`re2.Set`.
"""
re2_error: Optional[Exception]
"""
*re2_error* (:class:`Exception` or :data:`None`) is the re2 import error.
"""
@dataclass(frozen=True)
class Re2RegexDat(object):
"""
The :class:`Re2RegexDat` class is used to store data related to a regular
expression.
"""
# The slots argument is not supported until Python 3.10.
__slots__ = [
'include',
'index',
'is_dir_pattern',
]
include: bool
"""
*include* (:class:`bool`) is whether is whether the matched files should be
included (:data:`True`), or excluded (:data:`False`).
"""
index: int
"""
*index* (:class:`int`) is the pattern index.
"""
is_dir_pattern: bool
"""
*is_dir_pattern* (:class:`bool`) is whether the pattern is a directory
pattern for gitignore.
"""
@dataclass(frozen=True)
class Re2RegexDebug(Re2RegexDat):
"""
The :class:`Re2RegexDebug` class stores additional debug information related
to a regular expression.
"""
# The slots argument is not supported until Python 3.10.
__slots__ = ['regex']
regex: Union[str, bytes]
"""
*regex* (:class:`str` or :class:`bytes`) is the regular expression.
"""

View file

@ -0,0 +1,18 @@
"""
This module provides the base implementation for the :module:`re2` backend.
WARNING: The *pathspec._backends.re2* package is not part of the public API. Its
contents and structure are likely to change.
"""
from __future__ import annotations
from typing import (
Optional) # Replaced by `X | None` in 3.10.
from ._base import (
re2_error)
re2_error: Optional[Exception]
"""
*re2_error* (:class:`Exception` or :data:`None`) is the re2 import error.
"""

View file

@ -0,0 +1,179 @@
"""
This module provides the :module:`re2` backend for :class:`~pathspec.gitignore.GitIgnoreSpec`.
WARNING: The *pathspec._backends.re2* package is not part of the public API. Its
contents and structure are likely to change.
"""
from __future__ import annotations
from typing import (
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional, # Replaced by `X | None` in 3.10.
Union) # Replaced by `X | Y` in 3.10.
try:
import re2
except ModuleNotFoundError:
re2 = None
from pathspec.pattern import (
RegexPattern)
from pathspec.patterns.gitignore.spec import (
GitIgnoreSpecPattern,
_BYTES_ENCODING,
_DIR_MARK_CG,
_DIR_MARK_OPT)
from pathspec._typing import (
override) # Added in 3.12.
from ._base import (
Re2RegexDat,
Re2RegexDebug)
from .pathspec import (
Re2PsBackend)
class Re2GiBackend(Re2PsBackend):
"""
The :class:`Re2GiBackend` class is the :module:`re2` implementation used by
:class:`~pathspec.gitignore.GitIgnoreSpec` for matching files.
"""
@override
@staticmethod
def _init_set(
debug: bool,
patterns: dict[int, RegexPattern],
regex_set: re2.Set,
sort_indices: Optional[Callable[[list[int]], None]],
) -> list[Re2RegexDat]:
"""
Create the re2 regex set.
*debug* (:class:`bool`) is whether to include additional debugging
information for the regular expressions.
*patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
(:class:`.RegexPattern`).
*regex_set* (:class:`re2.Set`) is the regex set.
*sort_indices* (:class:`callable` or :data:`None`) is a function used to
sort the patterns by index. This is used during testing to ensure the order
of patterns is not accidentally relied on.
Returns a :class:`list` indexed by regex id (:class:`int`) to its data
(:class:`Re2RegexDat`).
"""
# Sort patterns.
indices = list(patterns.keys())
if sort_indices is not None:
sort_indices(indices)
# Prepare patterns.
regex_data: list[Re2RegexDat] = []
for pattern_index in indices:
pattern = patterns[pattern_index]
if pattern.include is None:
continue
assert isinstance(pattern, RegexPattern), pattern
regex = pattern.regex.pattern
use_regexes: list[tuple[Union[str, bytes], bool]] = []
if isinstance(pattern, GitIgnoreSpecPattern):
# GitIgnoreSpecPattern uses capture groups for its directory marker. Re2
# supports capture groups, but they cannot be utilized when using
# `re2.Set`. Handle this scenario.
regex_str: str
if isinstance(regex, str):
regex_str = regex
else:
assert isinstance(regex, bytes), regex
regex_str = regex.decode(_BYTES_ENCODING)
if _DIR_MARK_CG in regex_str:
# Found directory marker.
if regex_str.endswith(_DIR_MARK_OPT):
# Regex has optional directory marker. Split regex into directory
# and file variants.
base_regex = regex_str[:-len(_DIR_MARK_OPT)]
use_regexes.append((f'{base_regex}/', True))
use_regexes.append((f'{base_regex}$', False))
else:
# Remove capture group.
base_regex = regex_str.replace(_DIR_MARK_CG, '/')
use_regexes.append((base_regex, True))
if not use_regexes:
# No special case for regex.
use_regexes.append((regex, False))
for regex, is_dir_pattern in use_regexes:
if debug:
regex_data.append(Re2RegexDebug(
include=pattern.include,
index=pattern_index,
is_dir_pattern=is_dir_pattern,
regex=regex,
))
else:
regex_data.append(Re2RegexDat(
include=pattern.include,
index=pattern_index,
is_dir_pattern=is_dir_pattern,
))
regex_set.Add(regex)
# Compile patterns.
regex_set.Compile()
return regex_data
@override
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
# Find best match.
match_ids: Optional[list[int]] = self._set.Match(file)
if not match_ids:
return (None, None)
out_include: Optional[bool] = None
out_index: int = -1
out_priority = -1
regex_data = self._regex_data
for regex_id in match_ids:
regex_dat = regex_data[regex_id]
is_dir_pattern = regex_dat.is_dir_pattern
if is_dir_pattern:
# Pattern matched by a directory pattern.
priority = 1
else:
# Pattern matched by a file pattern.
priority = 2
# WARNING: According to the documentation on `RE2::Set::Match()`, there is
# no guarantee matches will be produced in order!
include = regex_dat.include
index = regex_dat.index
if (
(include and is_dir_pattern and index > out_index)
or (priority == out_priority and index > out_index)
or priority > out_priority
):
out_include = include
out_index = index
out_priority = priority
assert out_index != -1, (out_index, out_include, out_priority)
return (out_include, out_index)

View file

@ -0,0 +1,187 @@
"""
This module provides the :module:`re2` backend for :class:`~pathspec.pathspec.PathSpec`.
WARNING: The *pathspec._backends.re2* package is not part of the public API. Its
contents and structure are likely to change.
"""
from __future__ import annotations
from collections.abc import (
Sequence)
from typing import (
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional) # Replaced by `X | None` in 3.10.
try:
import re2
except ModuleNotFoundError:
re2 = None
from pathspec.backend import (
_Backend)
from pathspec.pattern import (
RegexPattern)
from pathspec._typing import (
override) # Added in 3.12.
from .._utils import (
enumerate_patterns)
from .base import (
re2_error)
from ._base import (
RE2_OPTIONS,
Re2RegexDat,
Re2RegexDebug)
class Re2PsBackend(_Backend):
"""
The :class:`Re2PsBackend` class is the :module:`re2` implementation used by
:class:`~pathspec.pathspec.PathSpec` for matching files.
"""
def __init__(
self,
patterns: Sequence[RegexPattern],
*,
_debug_regex: Optional[bool] = None,
_test_sort: Optional[Callable[[list], None]] = None,
) -> None:
"""
Initialize the :class:`Re2PsBackend` instance.
*patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the
compiled patterns.
"""
if re2_error is not None:
raise re2_error
if patterns and not isinstance(patterns[0], RegexPattern):
raise TypeError(f"{patterns[0]=!r} must be a RegexPattern.")
use_patterns = dict(enumerate_patterns(
patterns, filter=True, reverse=False,
))
regex_set = self._make_set()
self._debug_regex = bool(_debug_regex)
"""
*_debug_regex* (:class:`bool`) is whether to include additional debugging
information for the regular expressions.
"""
self._patterns: dict[int, RegexPattern] = use_patterns
"""
*_patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
(:class:`RegexPattern`).
"""
self._regex_data: list[Re2RegexDat] = self._init_set(
debug=self._debug_regex,
patterns=use_patterns,
regex_set=regex_set,
sort_indices=_test_sort,
)
"""
*_regex_data* (:class:`list`) maps regex index (:class:`int`) to regex data
(:class:`Re2RegexDat`).
"""
self._set: re2.Set = regex_set
"""
*_set* (:class:`re2.Set`) is the re2 regex set.
"""
@staticmethod
def _init_set(
debug: bool,
patterns: dict[int, RegexPattern],
regex_set: re2.Set,
sort_indices: Optional[Callable[[list[int]], None]],
) -> list[Re2RegexDat]:
"""
Create the re2 regex set.
*debug* (:class:`bool`) is whether to include additional debugging
information for the regular expressions.
*patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
(:class:`.RegexPattern`).
*regex_set* (:class:`re2.Set`) is the regex set.
*sort_indices* (:class:`callable` or :data:`None`) is a function used to
sort the patterns by index. This is used during testing to ensure the order
of patterns is not accidentally relied on.
Returns a :class:`list` indexed by regex id (:class:`int`) to its data
(:class:`Re2RegexDat`).
"""
# Sort patterns.
indices = list(patterns.keys())
if sort_indices is not None:
sort_indices(indices)
# Prepare patterns.
regex_data: list[Re2RegexDat] = []
for pattern_index in indices:
pattern = patterns[pattern_index]
if pattern.include is None:
continue
assert isinstance(pattern, RegexPattern), pattern
regex = pattern.regex.pattern
if debug:
regex_data.append(Re2RegexDebug(
include=pattern.include,
index=pattern_index,
is_dir_pattern=False,
regex=regex,
))
else:
regex_data.append(Re2RegexDat(
include=pattern.include,
index=pattern_index,
is_dir_pattern=False,
))
regex_set.Add(regex)
# Compile patterns.
regex_set.Compile()
return regex_data
@staticmethod
def _make_set() -> re2.Set:
"""
Create the re2 regex set.
Returns the set (:class:`re2.Set`).
"""
return re2.Set.SearchSet(RE2_OPTIONS)
@override
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
# Find best match.
# - WARNING: According to the documentation on `RE2::Set::Match()`, there is
# no guarantee matches will be produced in order! Later expressions have
# higher priority.
match_ids: Optional[list[int]] = self._set.Match(file)
if not match_ids:
return (None, None)
regex_data = self._regex_data
pattern_index = max(regex_data[__id].index for __id in match_ids)
pattern = self._patterns[pattern_index]
return (pattern.include, pattern_index)