Refactor code structure for improved readability and maintainability

This commit is contained in:
claudi 2026-04-07 09:10:53 +02:00
parent 389d72a136
commit aa4c067ea8
1685 changed files with 393439 additions and 71932 deletions

View file

@ -0,0 +1,68 @@
"""
The *pathspec* package provides pattern matching for file paths. So far this
only includes Git's *gitignore* patterns.
The following classes are imported and made available from the root of the
`pathspec` package:
- :class:`pathspec.gitignore.GitIgnoreSpec`
- :class:`pathspec.pathspec.PathSpec`
- :class:`pathspec.pattern.Pattern`
- :class:`pathspec.pattern.RegexPattern`
- :class:`pathspec.util.RecursionError`
The following functions are also imported:
- :func:`pathspec.util.lookup_pattern`
The following deprecated functions are also imported to maintain backward
compatibility:
- :func:`pathspec.util.iter_tree`
- :func:`pathspec.util.match_files`
"""
from .gitignore import (
GitIgnoreSpec)
from .pathspec import (
PathSpec)
from .pattern import (
Pattern,
RegexPattern)
from .util import (
RecursionError,
iter_tree, # Deprecated since 0.10.0.
lookup_pattern,
match_files) # Deprecated since 0.10.0.
from ._meta import (
__author__,
__copyright__,
__credits__,
__license__)
from ._version import (
__version__)
# Load pattern implementations.
from . import patterns
# Declare private imports as part of the public interface. Deprecated imports
# are deliberately excluded.
__all__ = [
'GitIgnoreSpec',
'PathSpec',
'Pattern',
'RecursionError',
'RegexPattern',
'__author__',
'__copyright__',
'__credits__',
'__license__',
'__version__',
'lookup_pattern',
]

View file

@ -0,0 +1,4 @@
"""
WARNING: The *pathspec._backends* package is not part of the public API. Its
contents and structure are likely to change.
"""

View file

@ -0,0 +1,45 @@
"""
This module provides private utility functions for backends.
WARNING: The *pathspec._backends* package is not part of the public API. Its
contents and structure are likely to change.
"""
from collections.abc import (
Iterable)
from typing import (
TypeVar)
from pathspec.pattern import (
Pattern)
TPattern = TypeVar("TPattern", bound=Pattern)
def enumerate_patterns(
patterns: Iterable[TPattern],
filter: bool,
reverse: bool,
) -> list[tuple[int, TPattern]]:
"""
Enumerate the patterns.
*patterns* (:class:`Iterable` of :class:`.Pattern`) contains the patterns.
*filter* (:class:`bool`) is whether to remove no-op patterns (:data:`True`),
or keep them (:data:`False`).
*reverse* (:class:`bool`) is whether to reverse the pattern order
(:data:`True`), or keep the order (:data:`True`).
Returns the enumerated patterns (:class:`list` of :class:`tuple`).
"""
out_patterns = [
(__i, __pat)
for __i, __pat in enumerate(patterns)
if not filter or __pat.include is not None
]
if reverse:
out_patterns.reverse()
return out_patterns

View file

@ -0,0 +1,104 @@
"""
This module provides aggregated private data and utilities functions about the
available backends.
WARNING: The *pathspec._backends* package is not part of the public API. Its
contents and structure are likely to change.
"""
from collections.abc import (
Sequence)
from typing import (
cast)
from pathspec.backend import (
BackendNamesHint,
_Backend)
from pathspec.pattern import (
Pattern,
RegexPattern)
from .hyperscan.base import (
hyperscan_error)
from .hyperscan.gitignore import (
HyperscanGiBackend)
from .hyperscan.pathspec import (
HyperscanPsBackend)
from .re2.base import (
re2_error)
from .re2.gitignore import (
Re2GiBackend)
from .re2.pathspec import (
Re2PsBackend)
from .simple.gitignore import (
SimpleGiBackend)
from .simple.pathspec import (
SimplePsBackend)
_BEST_BACKEND: BackendNamesHint
"""
The best available backend.
"""
if re2_error is None:
_BEST_BACKEND = 're2'
elif hyperscan_error is None:
_BEST_BACKEND = 'hyperscan'
else:
_BEST_BACKEND = 'simple'
def make_gitignore_backend(
name: BackendNamesHint,
patterns: Sequence[Pattern],
) -> _Backend:
"""
Create the specified backend with the supplied patterns for
:class:`~pathspec.gitignore.GitIgnoreSpec`.
*name* (:class:`str`) is the name of the backend.
*patterns* (:class:`.Iterable` of :class:`.Pattern`) contains the compiled
patterns.
Returns the backend (:class:`._Backend`).
"""
if name == 'best':
name = _BEST_BACKEND
if name == 'hyperscan':
return HyperscanGiBackend(cast(Sequence[RegexPattern], patterns))
elif name == 're2':
return Re2GiBackend(cast(Sequence[RegexPattern], patterns))
elif name == 'simple':
return SimpleGiBackend(cast(Sequence[RegexPattern], patterns))
else:
raise ValueError(f"Backend {name=!r} is invalid.")
def make_pathspec_backend(
name: BackendNamesHint,
patterns: Sequence[Pattern],
) -> _Backend:
"""
Create the specified backend with the supplied patterns for
:class:`~pathspec.pathspec.PathSpec`.
*name* (:class:`str`) is the name of the backend.
*patterns* (:class:`Iterable` of :class:`Pattern`) contains the compiled
patterns.
Returns the backend (:class:`._Backend`).
"""
if name == 'best':
name = _BEST_BACKEND
if name == 'hyperscan':
return HyperscanPsBackend(cast(Sequence[RegexPattern], patterns))
elif name == 're2':
return Re2PsBackend(cast(Sequence[RegexPattern], patterns))
elif name == 'simple':
return SimplePsBackend(patterns)
else:
raise ValueError(f"Backend {name=!r} is invalid.")

View file

@ -0,0 +1,78 @@
"""
This module provides private data for the base implementation for the
:module:`hyperscan` library.
WARNING: The *pathspec._backends.hyperscan* package is not part of the public
API. Its contents and structure are likely to change.
"""
from __future__ import annotations
from dataclasses import (
dataclass)
from typing import (
Union) # Replaced by `X | Y` in 3.10.
try:
import hyperscan
except ModuleNotFoundError:
hyperscan = None
HS_FLAGS = 0
else:
HS_FLAGS = hyperscan.HS_FLAG_SINGLEMATCH | hyperscan.HS_FLAG_UTF8
HS_FLAGS: int
"""
The hyperscan flags to use:
- HS_FLAG_SINGLEMATCH is needed to ensure the partial patterns only match once.
- HS_FLAG_UTF8 is required to support unicode paths.
"""
@dataclass(frozen=True)
class HyperscanExprDat(object):
"""
The :class:`HyperscanExprDat` class is used to store data related to an
expression.
"""
# The slots argument is not supported until Python 3.10.
__slots__ = [
'include',
'index',
'is_dir_pattern',
]
include: bool
"""
*include* (:class:`bool`) is whether is whether the matched files should be
included (:data:`True`), or excluded (:data:`False`).
"""
index: int
"""
*index* (:class:`int`) is the pattern index.
"""
is_dir_pattern: bool
"""
*is_dir_pattern* (:class:`bool`) is whether the pattern is a directory
pattern for gitignore.
"""
@dataclass(frozen=True)
class HyperscanExprDebug(HyperscanExprDat):
"""
The :class:`HyperscanExprDebug` class stores additional debug information
related to an expression.
"""
# The slots argument is not supported until Python 3.10.
__slots__ = ['regex']
regex: Union[str, bytes]
"""
*regex* (:class:`str` or :class:`bytes`) is the regular expression.
"""

View file

@ -0,0 +1,24 @@
"""
This module provides the base implementation for the :module:`hyperscan`
backend.
WARNING: The *pathspec._backends.hyperscan* package is not part of the public
API. Its contents and structure are likely to change.
"""
from __future__ import annotations
from typing import (
Optional)
try:
import hyperscan
hyperscan_error = None
except ModuleNotFoundError as e:
hyperscan = None
hyperscan_error = e
hyperscan_error: Optional[ModuleNotFoundError]
"""
*hyperscan_error* (:class:`ModuleNotFoundError` or :data:`None`) is the
hyperscan import error.
"""

View file

@ -0,0 +1,245 @@
"""
This module provides the :module:`hyperscan` backend for :class:`~pathspec.gitignore.GitIgnoreSpec`.
WARNING: The *pathspec._backends.hyperscan* package is not part of the public
API. Its contents and structure are likely to change.
"""
from __future__ import annotations
from collections.abc import (
Sequence)
from typing import (
Any,
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional, # Replaced by `X | None` in 3.10.
Union) # Replaced by `X | Y` in 3.10.
try:
import hyperscan
except ModuleNotFoundError:
hyperscan = None
from pathspec.pattern import (
RegexPattern)
from pathspec.patterns.gitignore.spec import (
GitIgnoreSpecPattern,
_BYTES_ENCODING,
_DIR_MARK_CG,
_DIR_MARK_OPT)
from pathspec._typing import (
override) # Added in 3.12.
from ._base import (
HS_FLAGS,
HyperscanExprDat,
HyperscanExprDebug)
from .pathspec import (
HyperscanPsBackend)
class HyperscanGiBackend(HyperscanPsBackend):
"""
The :class:`HyperscanGiBackend` class is the :module:`hyperscan`
implementation used by :class:`~pathspec.gitignore.GitIgnoreSpec`. The
Hyperscan database uses block mode for matching files.
"""
# Change type hint.
_out: tuple[Optional[bool], int, int]
def __init__(
self,
patterns: Sequence[RegexPattern],
*,
_debug_exprs: Optional[bool] = None,
_test_sort: Optional[Callable[[list], None]] = None,
) -> None:
"""
Initialize the :class:`HyperscanMatcher` instance.
*patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the
compiled patterns.
"""
super().__init__(patterns, _debug_exprs=_debug_exprs, _test_sort=_test_sort)
self._out = (None, -1, 0)
"""
*_out* (:class:`tuple`) stores the current match:
- *0* (:class:`bool` or :data:`None`) is the match include.
- *1* (:class:`int`) is the match index.
- *2* (:class:`int`) is the match priority.
"""
@override
@staticmethod
def _init_db(
db: hyperscan.Database,
debug: bool,
patterns: list[tuple[int, RegexPattern]],
sort_ids: Optional[Callable[[list[int]], None]],
) -> list[HyperscanExprDat]:
"""
Create the Hyperscan database from the given patterns.
*db* (:class:`hyperscan.Hyperscan`) is the Hyperscan database.
*debug* (:class:`bool`) is whether to include additional debugging
information for the expressions.
*patterns* (:class:`~collections.abc.Sequence` of :class:`.RegexPattern`)
contains the patterns.
*sort_ids* (:class:`callable` or :data:`None`) is a function used to sort
the compiled expression ids. This is used during testing to ensure the order
of expressions is not accidentally relied on.
Returns a :class:`list` indexed by expression id (:class:`int`) to its data
(:class:`HyperscanExprDat`).
"""
# WARNING: Hyperscan raises a `hyperscan.error` exception when compiled with
# zero elements.
assert patterns, patterns
# Prepare patterns.
expr_data: list[HyperscanExprDat] = []
exprs: list[bytes] = []
for pattern_index, pattern in patterns:
assert pattern.include is not None, (pattern_index, pattern)
# Encode regex.
assert isinstance(pattern, RegexPattern), pattern
regex = pattern.regex.pattern
use_regexes: list[tuple[Union[str, bytes], bool]] = []
if isinstance(pattern, GitIgnoreSpecPattern):
# GitIgnoreSpecPattern uses capture groups for its directory marker but
# Hyperscan does not support capture groups. Handle this scenario.
regex_str: str
if isinstance(regex, str):
regex_str: str = regex
else:
assert isinstance(regex, bytes), regex
regex_str = regex.decode(_BYTES_ENCODING)
if _DIR_MARK_CG in regex_str:
# Found directory marker.
if regex_str.endswith(_DIR_MARK_OPT):
# Regex has optional directory marker. Split regex into directory
# and file variants.
base_regex = regex_str[:-len(_DIR_MARK_OPT)]
use_regexes.append((f'{base_regex}/', True))
use_regexes.append((f'{base_regex}$', False))
else:
# Remove capture group.
base_regex = regex_str.replace(_DIR_MARK_CG, '/')
use_regexes.append((base_regex, True))
if not use_regexes:
# No special case for regex.
use_regexes.append((regex, False))
for regex, is_dir_pattern in use_regexes:
if isinstance(regex, bytes):
regex_bytes = regex
else:
assert isinstance(regex, str), regex
regex_bytes = regex.encode('utf8')
if debug:
expr_data.append(HyperscanExprDebug(
include=pattern.include,
index=pattern_index,
is_dir_pattern=is_dir_pattern,
regex=regex,
))
else:
expr_data.append(HyperscanExprDat(
include=pattern.include,
index=pattern_index,
is_dir_pattern=is_dir_pattern,
))
exprs.append(regex_bytes)
# Sort expressions.
ids = list(range(len(exprs)))
if sort_ids is not None:
sort_ids(ids)
exprs = [exprs[__id] for __id in ids]
# Compile patterns.
db.compile(
expressions=exprs,
ids=ids,
elements=len(exprs),
flags=HS_FLAGS,
)
return expr_data
@override
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
# NOTICE: According to benchmarking, a method callback is 13% faster than
# using a closure here.
db = self._db
if self._db is None:
# Database was not initialized because there were no patterns. Return no
# match.
return (None, None)
self._out = (None, -1, 0)
db.scan(file.encode('utf8'), match_event_handler=self.__on_match)
out_include, out_index = self._out[:2]
if out_index == -1:
out_index = None
return (out_include, out_index)
@override
def __on_match(
self,
expr_id: int,
_from: int,
_to: int,
_flags: int,
_context: Any,
) -> Optional[bool]:
"""
Called on each match.
*expr_id* (:class:`int`) is the expression id (index) of the matched
pattern.
"""
expr_dat = self._expr_data[expr_id]
is_dir_pattern = expr_dat.is_dir_pattern
if is_dir_pattern:
# Pattern matched by a directory pattern.
priority = 1
else:
# Pattern matched by a file pattern.
priority = 2
# WARNING: Hyperscan does not guarantee matches will be produced in order!
include = expr_dat.include
index = expr_dat.index
prev_index = self._out[1]
prev_priority = self._out[2]
if (
(include and is_dir_pattern and index > prev_index)
or (priority == prev_priority and index > prev_index)
or priority > prev_priority
):
self._out = (include, expr_dat.index, priority)

View file

@ -0,0 +1,251 @@
"""
This module provides the :module:`hyperscan` backend for :class:`~pathspec.pathspec.PathSpec`.
WARNING: The *pathspec._backends.hyperscan* package is not part of the public
API. Its contents and structure are likely to change.
"""
from __future__ import annotations
from collections.abc import (
Sequence)
from typing import (
Any,
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional) # Replaced by `X | None` in 3.10.
try:
import hyperscan
except ModuleNotFoundError:
hyperscan = None
from pathspec.backend import (
_Backend)
from pathspec.pattern import (
RegexPattern)
from pathspec._typing import (
override) # Added in 3.12.
from .._utils import (
enumerate_patterns)
from .base import (
hyperscan_error)
from ._base import (
HS_FLAGS,
HyperscanExprDat,
HyperscanExprDebug)
class HyperscanPsBackend(_Backend):
"""
The :class:`HyperscanPsBackend` class is the :module:`hyperscan`
implementation used by :class:`~pathspec.pathspec.PathSpec` for matching
files. The Hyperscan database uses block mode for matching files.
"""
def __init__(
self,
patterns: Sequence[RegexPattern],
*,
_debug_exprs: Optional[bool] = None,
_test_sort: Optional[Callable[[list], None]] = None,
) -> None:
"""
Initialize the :class:`HyperscanPsBackend` instance.
*patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the
compiled patterns.
"""
if hyperscan is None:
raise hyperscan_error
if patterns and not isinstance(patterns[0], RegexPattern):
raise TypeError(f"{patterns[0]=!r} must be a RegexPattern.")
use_patterns = enumerate_patterns(
patterns, filter=True, reverse=False,
)
debug_exprs = bool(_debug_exprs)
if use_patterns:
db = self._make_db()
expr_data = self._init_db(
db=db,
debug=debug_exprs,
patterns=use_patterns,
sort_ids=_test_sort,
)
else:
# WARNING: The hyperscan database cannot be initialized with zero
# patterns.
db = None
expr_data = []
self._db: Optional[hyperscan.Database] = db
"""
*_db* (:class:`hyperscan.Database`) is the Hyperscan database.
"""
self._debug_exprs = debug_exprs
"""
*_debug_exprs* (:class:`bool`) is whether to include additional debugging
information for the expressions.
"""
self._expr_data: list[HyperscanExprDat] = expr_data
"""
*_expr_data* (:class:`list`) maps expression index (:class:`int`) to
expression data (:class:`:class:`HyperscanExprDat`).
"""
self._out: tuple[Optional[bool], int] = (None, -1)
"""
*_out* (:class:`tuple`) stores the current match:
- *0* (:class:`bool` or :data:`None`) is the match include.
- *1* (:class:`int`) is the match index.
"""
self._patterns: dict[int, RegexPattern] = dict(use_patterns)
"""
*_patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
(:class:`RegexPattern`).
"""
@staticmethod
def _init_db(
db: hyperscan.Database,
debug: bool,
patterns: list[tuple[int, RegexPattern]],
sort_ids: Optional[Callable[[list[int]], None]],
) -> list[HyperscanExprDat]:
"""
Initialize the Hyperscan database from the given patterns.
*db* (:class:`hyperscan.Hyperscan`) is the Hyperscan database.
*debug* (:class:`bool`) is whether to include additional debugging
information for the expressions.
*patterns* (:class:`~collections.abc.Sequence` of :class:`.RegexPattern`)
contains the patterns.
*sort_ids* (:class:`callable` or :data:`None`) is a function used to sort
the compiled expression ids. This is used during testing to ensure the order
of expressions is not accidentally relied on.
Returns a :class:`list` indexed by expression id (:class:`int`) to its data
(:class:`HyperscanExprDat`).
"""
# WARNING: Hyperscan raises a `hyperscan.error` exception when compiled with
# zero elements.
assert patterns, patterns
# Prepare patterns.
expr_data: list[HyperscanExprDat] = []
exprs: list[bytes] = []
for pattern_index, pattern in patterns:
assert pattern.include is not None, (pattern_index, pattern)
# Encode regex.
assert isinstance(pattern, RegexPattern), pattern
regex = pattern.regex.pattern
if isinstance(regex, bytes):
regex_bytes = regex
else:
assert isinstance(regex, str), regex
regex_bytes = regex.encode('utf8')
if debug:
expr_data.append(HyperscanExprDebug(
include=pattern.include,
index=pattern_index,
is_dir_pattern=False,
regex=regex,
))
else:
expr_data.append(HyperscanExprDat(
include=pattern.include,
index=pattern_index,
is_dir_pattern=False,
))
exprs.append(regex_bytes)
# Sort expressions.
ids = list(range(len(exprs)))
if sort_ids is not None:
sort_ids(ids)
exprs = [exprs[__id] for __id in ids]
# Compile patterns.
db.compile(
expressions=exprs,
ids=ids,
elements=len(exprs),
flags=HS_FLAGS,
)
return expr_data
@override
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
# NOTICE: According to benchmarking, a method callback is 20% faster than
# using a closure here.
db = self._db
if self._db is None:
# Database was not initialized because there were no patterns. Return no
# match.
return (None, None)
self._out = (None, -1)
db.scan(file.encode('utf8'), match_event_handler=self.__on_match)
out_include, out_index = self._out
if out_index == -1:
out_index = None
return (out_include, out_index)
@staticmethod
def _make_db() -> hyperscan.Database:
"""
Create the Hyperscan database.
Returns the database (:class:`hyperscan.Database`).
"""
return hyperscan.Database(mode=hyperscan.HS_MODE_BLOCK)
def __on_match(
self,
expr_id: int,
_from: int,
_to: int,
_flags: int,
_context: Any,
) -> Optional[bool]:
"""
Called on each match.
*expr_id* (:class:`int`) is the expression id (index) of the matched
pattern.
"""
# Store match.
# - WARNING: Hyperscan does not guarantee matches will be produced in order!
# Later expressions have higher priority.
expr_dat = self._expr_data[expr_id]
index = expr_dat.index
prev_index = self._out[1]
if index > prev_index:
self._out = (expr_dat.include, index)

View file

@ -0,0 +1,95 @@
"""
This module provides private data for the base implementation for the
:module:`re2` library.
WARNING: The *pathspec._backends.re2* package is not part of the public API. Its
contents and structure are likely to change.
"""
from __future__ import annotations
from dataclasses import (
dataclass)
from typing import (
Optional, # Replaced by `X | None` in 3.10.
Union) # Replaced by `X | Y` in 3.10.
try:
import re2
re2_error = None
except ModuleNotFoundError as e:
re2 = None
re2_error = e
RE2_OPTIONS = None
else:
# Both the `google-re2` and `pyre2` libraries use the `re2` namespace.
# `google-re2` is the only one currently supported.
try:
RE2_OPTIONS = re2.Options()
RE2_OPTIONS.log_errors = False
RE2_OPTIONS.never_capture = True
except Exception as e:
re2_error = e
RE2_OPTIONS = None
RE2_OPTIONS: re2.Options
"""
The re2 options to use:
- `log_errors=False` disables logging to stderr.
- `never_capture=True` disables capture groups because they effectively cannot
be utilized with :class:`re2.Set`.
"""
re2_error: Optional[Exception]
"""
*re2_error* (:class:`Exception` or :data:`None`) is the re2 import error.
"""
@dataclass(frozen=True)
class Re2RegexDat(object):
"""
The :class:`Re2RegexDat` class is used to store data related to a regular
expression.
"""
# The slots argument is not supported until Python 3.10.
__slots__ = [
'include',
'index',
'is_dir_pattern',
]
include: bool
"""
*include* (:class:`bool`) is whether is whether the matched files should be
included (:data:`True`), or excluded (:data:`False`).
"""
index: int
"""
*index* (:class:`int`) is the pattern index.
"""
is_dir_pattern: bool
"""
*is_dir_pattern* (:class:`bool`) is whether the pattern is a directory
pattern for gitignore.
"""
@dataclass(frozen=True)
class Re2RegexDebug(Re2RegexDat):
"""
The :class:`Re2RegexDebug` class stores additional debug information related
to a regular expression.
"""
# The slots argument is not supported until Python 3.10.
__slots__ = ['regex']
regex: Union[str, bytes]
"""
*regex* (:class:`str` or :class:`bytes`) is the regular expression.
"""

View file

@ -0,0 +1,18 @@
"""
This module provides the base implementation for the :module:`re2` backend.
WARNING: The *pathspec._backends.re2* package is not part of the public API. Its
contents and structure are likely to change.
"""
from __future__ import annotations
from typing import (
Optional) # Replaced by `X | None` in 3.10.
from ._base import (
re2_error)
re2_error: Optional[Exception]
"""
*re2_error* (:class:`Exception` or :data:`None`) is the re2 import error.
"""

View file

@ -0,0 +1,179 @@
"""
This module provides the :module:`re2` backend for :class:`~pathspec.gitignore.GitIgnoreSpec`.
WARNING: The *pathspec._backends.re2* package is not part of the public API. Its
contents and structure are likely to change.
"""
from __future__ import annotations
from typing import (
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional, # Replaced by `X | None` in 3.10.
Union) # Replaced by `X | Y` in 3.10.
try:
import re2
except ModuleNotFoundError:
re2 = None
from pathspec.pattern import (
RegexPattern)
from pathspec.patterns.gitignore.spec import (
GitIgnoreSpecPattern,
_BYTES_ENCODING,
_DIR_MARK_CG,
_DIR_MARK_OPT)
from pathspec._typing import (
override) # Added in 3.12.
from ._base import (
Re2RegexDat,
Re2RegexDebug)
from .pathspec import (
Re2PsBackend)
class Re2GiBackend(Re2PsBackend):
"""
The :class:`Re2GiBackend` class is the :module:`re2` implementation used by
:class:`~pathspec.gitignore.GitIgnoreSpec` for matching files.
"""
@override
@staticmethod
def _init_set(
debug: bool,
patterns: dict[int, RegexPattern],
regex_set: re2.Set,
sort_indices: Optional[Callable[[list[int]], None]],
) -> list[Re2RegexDat]:
"""
Create the re2 regex set.
*debug* (:class:`bool`) is whether to include additional debugging
information for the regular expressions.
*patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
(:class:`.RegexPattern`).
*regex_set* (:class:`re2.Set`) is the regex set.
*sort_indices* (:class:`callable` or :data:`None`) is a function used to
sort the patterns by index. This is used during testing to ensure the order
of patterns is not accidentally relied on.
Returns a :class:`list` indexed by regex id (:class:`int`) to its data
(:class:`Re2RegexDat`).
"""
# Sort patterns.
indices = list(patterns.keys())
if sort_indices is not None:
sort_indices(indices)
# Prepare patterns.
regex_data: list[Re2RegexDat] = []
for pattern_index in indices:
pattern = patterns[pattern_index]
if pattern.include is None:
continue
assert isinstance(pattern, RegexPattern), pattern
regex = pattern.regex.pattern
use_regexes: list[tuple[Union[str, bytes], bool]] = []
if isinstance(pattern, GitIgnoreSpecPattern):
# GitIgnoreSpecPattern uses capture groups for its directory marker. Re2
# supports capture groups, but they cannot be utilized when using
# `re2.Set`. Handle this scenario.
regex_str: str
if isinstance(regex, str):
regex_str = regex
else:
assert isinstance(regex, bytes), regex
regex_str = regex.decode(_BYTES_ENCODING)
if _DIR_MARK_CG in regex_str:
# Found directory marker.
if regex_str.endswith(_DIR_MARK_OPT):
# Regex has optional directory marker. Split regex into directory
# and file variants.
base_regex = regex_str[:-len(_DIR_MARK_OPT)]
use_regexes.append((f'{base_regex}/', True))
use_regexes.append((f'{base_regex}$', False))
else:
# Remove capture group.
base_regex = regex_str.replace(_DIR_MARK_CG, '/')
use_regexes.append((base_regex, True))
if not use_regexes:
# No special case for regex.
use_regexes.append((regex, False))
for regex, is_dir_pattern in use_regexes:
if debug:
regex_data.append(Re2RegexDebug(
include=pattern.include,
index=pattern_index,
is_dir_pattern=is_dir_pattern,
regex=regex,
))
else:
regex_data.append(Re2RegexDat(
include=pattern.include,
index=pattern_index,
is_dir_pattern=is_dir_pattern,
))
regex_set.Add(regex)
# Compile patterns.
regex_set.Compile()
return regex_data
@override
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
# Find best match.
match_ids: Optional[list[int]] = self._set.Match(file)
if not match_ids:
return (None, None)
out_include: Optional[bool] = None
out_index: int = -1
out_priority = -1
regex_data = self._regex_data
for regex_id in match_ids:
regex_dat = regex_data[regex_id]
is_dir_pattern = regex_dat.is_dir_pattern
if is_dir_pattern:
# Pattern matched by a directory pattern.
priority = 1
else:
# Pattern matched by a file pattern.
priority = 2
# WARNING: According to the documentation on `RE2::Set::Match()`, there is
# no guarantee matches will be produced in order!
include = regex_dat.include
index = regex_dat.index
if (
(include and is_dir_pattern and index > out_index)
or (priority == out_priority and index > out_index)
or priority > out_priority
):
out_include = include
out_index = index
out_priority = priority
assert out_index != -1, (out_index, out_include, out_priority)
return (out_include, out_index)

View file

@ -0,0 +1,187 @@
"""
This module provides the :module:`re2` backend for :class:`~pathspec.pathspec.PathSpec`.
WARNING: The *pathspec._backends.re2* package is not part of the public API. Its
contents and structure are likely to change.
"""
from __future__ import annotations
from collections.abc import (
Sequence)
from typing import (
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional) # Replaced by `X | None` in 3.10.
try:
import re2
except ModuleNotFoundError:
re2 = None
from pathspec.backend import (
_Backend)
from pathspec.pattern import (
RegexPattern)
from pathspec._typing import (
override) # Added in 3.12.
from .._utils import (
enumerate_patterns)
from .base import (
re2_error)
from ._base import (
RE2_OPTIONS,
Re2RegexDat,
Re2RegexDebug)
class Re2PsBackend(_Backend):
"""
The :class:`Re2PsBackend` class is the :module:`re2` implementation used by
:class:`~pathspec.pathspec.PathSpec` for matching files.
"""
def __init__(
self,
patterns: Sequence[RegexPattern],
*,
_debug_regex: Optional[bool] = None,
_test_sort: Optional[Callable[[list], None]] = None,
) -> None:
"""
Initialize the :class:`Re2PsBackend` instance.
*patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the
compiled patterns.
"""
if re2_error is not None:
raise re2_error
if patterns and not isinstance(patterns[0], RegexPattern):
raise TypeError(f"{patterns[0]=!r} must be a RegexPattern.")
use_patterns = dict(enumerate_patterns(
patterns, filter=True, reverse=False,
))
regex_set = self._make_set()
self._debug_regex = bool(_debug_regex)
"""
*_debug_regex* (:class:`bool`) is whether to include additional debugging
information for the regular expressions.
"""
self._patterns: dict[int, RegexPattern] = use_patterns
"""
*_patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
(:class:`RegexPattern`).
"""
self._regex_data: list[Re2RegexDat] = self._init_set(
debug=self._debug_regex,
patterns=use_patterns,
regex_set=regex_set,
sort_indices=_test_sort,
)
"""
*_regex_data* (:class:`list`) maps regex index (:class:`int`) to regex data
(:class:`Re2RegexDat`).
"""
self._set: re2.Set = regex_set
"""
*_set* (:class:`re2.Set`) is the re2 regex set.
"""
@staticmethod
def _init_set(
debug: bool,
patterns: dict[int, RegexPattern],
regex_set: re2.Set,
sort_indices: Optional[Callable[[list[int]], None]],
) -> list[Re2RegexDat]:
"""
Create the re2 regex set.
*debug* (:class:`bool`) is whether to include additional debugging
information for the regular expressions.
*patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
(:class:`.RegexPattern`).
*regex_set* (:class:`re2.Set`) is the regex set.
*sort_indices* (:class:`callable` or :data:`None`) is a function used to
sort the patterns by index. This is used during testing to ensure the order
of patterns is not accidentally relied on.
Returns a :class:`list` indexed by regex id (:class:`int`) to its data
(:class:`Re2RegexDat`).
"""
# Sort patterns.
indices = list(patterns.keys())
if sort_indices is not None:
sort_indices(indices)
# Prepare patterns.
regex_data: list[Re2RegexDat] = []
for pattern_index in indices:
pattern = patterns[pattern_index]
if pattern.include is None:
continue
assert isinstance(pattern, RegexPattern), pattern
regex = pattern.regex.pattern
if debug:
regex_data.append(Re2RegexDebug(
include=pattern.include,
index=pattern_index,
is_dir_pattern=False,
regex=regex,
))
else:
regex_data.append(Re2RegexDat(
include=pattern.include,
index=pattern_index,
is_dir_pattern=False,
))
regex_set.Add(regex)
# Compile patterns.
regex_set.Compile()
return regex_data
@staticmethod
def _make_set() -> re2.Set:
"""
Create the re2 regex set.
Returns the set (:class:`re2.Set`).
"""
return re2.Set.SearchSet(RE2_OPTIONS)
@override
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
# Find best match.
# - WARNING: According to the documentation on `RE2::Set::Match()`, there is
# no guarantee matches will be produced in order! Later expressions have
# higher priority.
match_ids: Optional[list[int]] = self._set.Match(file)
if not match_ids:
return (None, None)
regex_data = self._regex_data
pattern_index = max(regex_data[__id].index for __id in match_ids)
pattern = self._patterns[pattern_index]
return (pattern.include, pattern_index)

View file

@ -0,0 +1,104 @@
"""
This module provides the simple backend for :class:`~pathspec.gitignore.GitIgnoreSpec`.
WARNING: The *pathspec._backends.simple* package is not part of the public API.
Its contents and structure are likely to change.
"""
from collections.abc import (
Sequence)
from typing import (
Optional) # Replaced by `X | None` in 3.10.
from pathspec.pattern import (
RegexPattern)
from pathspec.patterns.gitignore.spec import (
_DIR_MARK)
from pathspec._typing import (
override) # Added in 3.12.
from .pathspec import (
SimplePsBackend)
class SimpleGiBackend(SimplePsBackend):
"""
The :class:`SimpleGiBackend` class is the default (or simple) implementation
used by :class:`~pathspec.gitignore.GitIgnoreSpec` for matching files.
"""
# Change type hint.
_patterns: list[tuple[int, RegexPattern]]
def __init__(
self,
patterns: Sequence[RegexPattern],
*,
no_filter: Optional[bool] = None,
no_reverse: Optional[bool] = None,
) -> None:
"""
Initialize the :class:`SimpleGiBackend` instance.
*patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the
compiled patterns.
*no_filter* (:class:`bool`) is whether to keep no-op patterns (:data:`True`),
or remove them (:data:`False`).
*no_reverse* (:class:`bool`) is whether to keep the pattern order
(:data:`True`), or reverse the order (:data:`True`).
"""
super().__init__(patterns, no_filter=no_filter, no_reverse=no_reverse)
@override
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
is_reversed = self._is_reversed
out_include: Optional[bool] = None
out_index: Optional[int] = None
out_priority = 0
for index, pattern in self._patterns:
if (
(include := pattern.include) is not None
and (match := pattern.match_file(file)) is not None
):
# Pattern matched.
# Check for directory marker.
dir_mark = match.match.groupdict().get(_DIR_MARK)
if dir_mark:
# Pattern matched by a directory pattern.
priority = 1
else:
# Pattern matched by a file pattern.
priority = 2
if is_reversed:
if priority > out_priority:
out_include = include
out_index = index
out_priority = priority
else:
# Forward.
if (include and dir_mark) or priority >= out_priority:
out_include = include
out_index = index
out_priority = priority
if is_reversed and priority == 2:
# Patterns are being checked in reverse order. The first pattern that
# matches with priority 2 takes precedence.
break
return (out_include, out_index)

View file

@ -0,0 +1,76 @@
"""
This module provides the simple backend for :class:`~pathspec.pathspec.PathSpec`.
WARNING: The *pathspec._backends.simple* package is not part of the public API.
Its contents and structure are likely to change.
"""
from collections.abc import (
Sequence)
from typing import (
Optional) # Replaced by `X | None` in 3.10.
from pathspec.backend import (
_Backend)
from pathspec.pattern import (
Pattern)
from pathspec._typing import (
override) # Added in 3.12.
from pathspec.util import (
check_match_file)
from .._utils import (
enumerate_patterns)
class SimplePsBackend(_Backend):
"""
The :class:`SimplePsBackend` class is the default (or simple) implementation
used by :class:`~pathspec.pathspec.PathSpec` for matching files.
"""
def __init__(
self,
patterns: Sequence[Pattern],
*,
no_filter: Optional[bool] = None,
no_reverse: Optional[bool] = None,
) -> None:
"""
Initialize the :class:`SimplePsBackend` instance.
*patterns* (:class:`Sequence` of :class:`.Pattern`) contains the compiled
patterns.
*no_filter* (:class:`bool`) is whether to keep no-op patterns (:data:`True`),
or remove them (:data:`False`).
*no_reverse* (:class:`bool`) is whether to keep the pattern order
(:data:`True`), or reverse the order (:data:`True`).
"""
self._is_reversed: bool = not no_reverse
"""
*_is_reversed* (:class:`bool`) is whether to the pattern order was reversed.
"""
self._patterns: list[tuple[int, Pattern]] = enumerate_patterns(
patterns, filter=not no_filter, reverse=not no_reverse,
)
"""
*_patterns* (:class:`list` of :class:`tuple`) contains the enumerated
patterns.
"""
@override
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
return check_match_file(self._patterns, file, self._is_reversed)

View file

@ -0,0 +1,67 @@
"""
This module contains the project meta-data.
"""
__author__ = "Caleb P. Burns"
__copyright__ = "Copyright © 2013-2026 Caleb P. Burns"
__credits__ = [
"Hong Minhee <https://github.com/dahlia>",
"Brandon High <https://github.com/highb>",
"029xue <https://github.com/029xue>",
"Michael Huynh <https://github.com/mikexstudios>",
"Nick Humrich <https://github.com/nhumrich>",
"David Fraser <https://github.com/davidfraser>",
"Charles Samborski <https://github.com/demurgos>",
"George Hickman <https://github.com/ghickman>",
"Vincent Driessen <https://github.com/nvie>",
"Adrien Vergé <https://github.com/adrienverge>",
"Anders Blomdell <https://github.com/AndersBlomdell>",
"Xavier Thomas <https://github.com/thmxv>",
"Wim Jeantine-Glenn <https://github.com/wimglenn>",
"Hugo van Kemenade <https://github.com/hugovk>",
"Dan Cecile <https://github.com/dcecile>",
"MrOutis <https://github.com/mroutis>",
"Jon Dufresne <https://github.com/jdufresne>",
"Greg Roodt <https://github.com/groodt>",
"Florin T. <https://github.com/ftrofin>",
"Ben Felder <https://github.com/pykong>",
"Nicholas Hollander <https://github.com/nhhollander>",
"KOLANICH <https://github.com/KOLANICH>",
"Jon Hays <https://github.com/JonjonHays>",
"Isaac0616 <https://github.com/Isaac0616>",
"Sebastiaan Zeeff <https://github.com/SebastiaanZ>",
"Roel Adriaans <https://github.com/RoelAdriaans>",
"Ravi Selker <https://github.com/raviselker>",
"Johan Vergeer <https://github.com/johanvergeer>",
"danjer <https://github.com/danjer>",
"Jan-Hein Bührman <https://github.com/jhbuhrman>",
"Wim-Peter Dirks <https://github.com/WPDOrdina>",
"Karthikeyan Singaravelan <https://github.com/tirkarthi>",
"John Vandenberg <https://github.com/jayvdb>",
"John T. Wodder II <https://github.com/jwodder>",
"Tomasz Kłoczko <https://github.com/kloczek>",
"Oren <https://github.com/orens>",
"SP Mohanty <https://github.com/spMohanty>",
"Richard Si <https://github.com/ichard26>",
"Jakub Kuczys <https://github.com/jack1142>",
"Michał Górny <https://github.com/mgorny>",
"Bartłomiej Żak <https://github.com/bzakdd>",
"Matthias <https://github.com/haimat>",
"Avasam <https://github.com/Avasam>",
"Anıl Karagenç <https://github.com/karagenc>",
"Yannic Schröder <https://github.com/yschroeder>",
"axesider <https://github.com/axesider>",
"TomRuk <https://github.com/tomruk>",
"Oleh Prypin <https://github.com/oprypin>",
"Lumina <https://github.com/lumina37>",
"Kurt McKee <https://github.com/kurtmckee>",
"Dobatymo <https://github.com/Dobatymo>",
"Tomoki Nakamaru <https://github.com/tomokinakamaru>",
"Sebastien Eskenazi <https://github.com/SebastienEske>",
"Bar Vered <https://github.com/barv-jfrog>",
"Tzach Shabtay <https://github.com/tzachshabtay>",
"Adam Dangoor <https://github.com/adamtheturtle>",
"Marcel Telka <https://github.com/mtelka>",
"Dmytro Kostochko <https://github.com/Alerion>",
]
__license__ = "MPL 2.0"

View file

@ -0,0 +1,64 @@
"""
This module provides stubs for type hints not supported by all relevant Python
versions.
NOTICE: This project should have zero required dependencies which means it
cannot simply require :module:`typing_extensions`, and I do not want to maintain
a vendored copy of :module:`typing_extensions`.
"""
import functools
import warnings
from typing import (
Any,
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional, # Replaced by `X | None` in 3.10.
TypeVar)
try:
from typing import AnyStr # Removed in 3.18.
except ImportError:
AnyStr = TypeVar('AnyStr', str, bytes)
try:
from typing import Never # Added in 3.11.
except ImportError:
from typing import NoReturn as Never
F = TypeVar('F', bound=Callable[..., Any])
try:
from warnings import deprecated # Added in 3.13.
except ImportError:
try:
from typing_extensions import deprecated
except ImportError:
def deprecated(
message: str,
/, *,
category: Optional[type[Warning]] = DeprecationWarning,
stacklevel: int = 1,
) -> Callable[[F], F]:
def decorator(f: F) -> F:
@functools.wraps(f)
def wrapper(*a, **k):
warnings.warn(message, category=category, stacklevel=stacklevel+1)
return f(*a, **k)
return wrapper
return decorator
try:
from typing import override # Added in 3.12.
except ImportError:
try:
from typing_extensions import override
except ImportError:
def override(f: F) -> F:
return f
def assert_unreachable(message: str) -> Never:
"""
The code path is unreachable. Raises an :class:`AssertionError`.
*message* (:class:`str`) is the error message.
"""
raise AssertionError(message)

View file

@ -0,0 +1,5 @@
"""
This module defines the version.
"""
__version__ = "1.0.4"

View file

@ -0,0 +1,40 @@
"""
This module defines the necessary classes and type hints for exposing the bare
minimum of the internal implementations for the pattern (regular expression)
matching backends. The exact structure of the backends is not solidified and is
subject to change.
"""
from typing import (
Literal,
Optional)
BackendNamesHint = Literal['best', 'hyperscan', 're2', 'simple']
"""
The supported backend values.
"""
class _Backend(object):
"""
.. warning:: This class is not part of the public API. It is subject to
change.
The :class:`_Backend` class is the abstract base class defining how to match
files against patterns.
"""
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
raise NotImplementedError((
f"{self.__class__.__module__}.{self.__class__.__qualname__}.match_file() "
f"must be implemented."
)) # NotImplementedError

View file

@ -0,0 +1,165 @@
"""
This module provides :class:`.GitIgnoreSpec` which replicates *.gitignore*
behavior, and handles edge-cases where Git's behavior differs from what's
documented. Git allows including files from excluded directories which directly
contradicts the documentation. This uses :class:`.GitIgnoreSpecPattern` to fully
replicate Git's handling.
"""
from __future__ import annotations
from collections.abc import (
Iterable,
Sequence)
from typing import (
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional, # Replaced by `X | None` in 3.10.
TypeVar,
Union, # Replaced by `X | Y` in 3.10.
cast,
overload)
from pathspec.backend import (
BackendNamesHint,
_Backend)
from pathspec._backends.agg import (
make_gitignore_backend)
from pathspec.pathspec import (
PathSpec)
from pathspec.pattern import (
Pattern)
from pathspec.patterns.gitignore.basic import (
GitIgnoreBasicPattern)
from pathspec.patterns.gitignore.spec import (
GitIgnoreSpecPattern)
from pathspec._typing import (
AnyStr, # Removed in 3.18.
override) # Added in 3.12.
from pathspec.util import (
_is_iterable,
lookup_pattern)
Self = TypeVar("Self", bound='GitIgnoreSpec')
"""
:class:`.GitIgnoreSpec` self type hint to support Python v<3.11 using PEP 673
recommendation.
"""
class GitIgnoreSpec(PathSpec):
"""
The :class:`GitIgnoreSpec` class extends :class:`.PathSpec` to replicate
*gitignore* behavior. This is uses :class:`.GitIgnoreSpecPattern` to fully
replicate Git's handling.
"""
def __eq__(self, other: object) -> bool:
"""
Tests the equality of this gitignore-spec with *other* (:class:`.GitIgnoreSpec`)
by comparing their :attr:`self.patterns <.PathSpec.patterns>` attributes. A
non-:class:`GitIgnoreSpec` will not compare equal.
"""
if isinstance(other, GitIgnoreSpec):
return super().__eq__(other)
elif isinstance(other, PathSpec):
return False
else:
return NotImplemented
# Support reversed order of arguments from PathSpec.
@overload
@classmethod
def from_lines(
cls: type[Self],
pattern_factory: Union[str, Callable[[AnyStr], Pattern], None],
lines: Iterable[AnyStr],
*,
backend: Union[BackendNamesHint, str, None] = None,
_test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None,
) -> Self:
...
@overload
@classmethod
def from_lines(
cls: type[Self],
lines: Iterable[AnyStr],
pattern_factory: Union[str, Callable[[AnyStr], Pattern], None] = None,
*,
backend: Union[BackendNamesHint, str, None] = None,
_test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None,
) -> Self:
...
@override
@classmethod
def from_lines(
cls: type[Self],
lines: Iterable[AnyStr],
pattern_factory: Union[str, Callable[[AnyStr], Pattern], None] = None,
*,
backend: Union[BackendNamesHint, str, None] = None,
_test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None,
) -> Self:
"""
Compiles the pattern lines.
*lines* (:class:`~collections.abc.Iterable`) yields each uncompiled pattern
(:class:`str`). This simply has to yield each line, so it can be a
:class:`io.TextIOBase` (e.g., from :func:`open` or :class:`io.StringIO`) or
the result from :meth:`str.splitlines`.
*pattern_factory* does not need to be set for :class:`GitIgnoreSpec`. If
set, it should be either ``"gitignore"`` or :class:`.GitIgnoreSpecPattern`.
There is no guarantee it will work with any other pattern class. Default is
:data:`None` for :class:`.GitIgnoreSpecPattern`.
*backend* (:class:`str` or :data:`None`) is the pattern (regular expression)
matching backend to use. Default is :data:`None` for "best" to use the best
available backend. Priority of backends is: "re2", "hyperscan", "simple".
The "simple" backend is always available.
Returns the :class:`GitIgnoreSpec` instance.
"""
if (isinstance(lines, (str, bytes)) or callable(lines)) and _is_iterable(pattern_factory):
# Support reversed order of arguments from PathSpec.
pattern_factory, lines = lines, pattern_factory
if pattern_factory is None:
pattern_factory = GitIgnoreSpecPattern
elif pattern_factory == 'gitignore':
# Force use of GitIgnoreSpecPattern for "gitignore" to handle edge-cases.
# This makes usage easier.
pattern_factory = GitIgnoreSpecPattern
if isinstance(pattern_factory, str):
pattern_factory = lookup_pattern(pattern_factory)
if issubclass(pattern_factory, GitIgnoreBasicPattern):
raise TypeError((
f"{pattern_factory=!r} cannot be {GitIgnoreBasicPattern} because it "
f"will give unexpected results."
)) # TypeError
self = super().from_lines(pattern_factory, lines, backend=backend, _test_backend_factory=_test_backend_factory)
return cast(Self, self)
@override
@staticmethod
def _make_backend(
name: BackendNamesHint,
patterns: Sequence[Pattern],
) -> _Backend:
"""
.. warning:: This method is not part of the public API. It is subject to
change.
Create the backend for the patterns.
*name* (:class:`str`) is the name of the backend.
*patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`)
contains the compiled patterns.
Returns the backend (:class:`._Backend`).
"""
return make_gitignore_backend(name, patterns)

View file

@ -0,0 +1,460 @@
"""
This module provides :class:`.PathSpec` which is an object-oriented interface
for pattern matching of files.
"""
from __future__ import annotations
from collections.abc import (
Collection,
Iterable,
Iterator,
Sequence)
from itertools import (
zip_longest)
from typing import (
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional, # Replaced by `X | None` in 3.10.
TypeVar,
Union, # Replaced by `X | Y` in 3.10.
cast)
Self = TypeVar("Self", bound='PathSpec')
"""
:class:`.PathSpec` self type hint to support Python v<3.11 using PEP 673
recommendation.
"""
from pathspec import util
from pathspec.backend import (
_Backend,
BackendNamesHint)
from pathspec._backends.agg import (
make_pathspec_backend)
from pathspec.pattern import (
Pattern)
from pathspec._typing import (
AnyStr, # Removed in 3.18.
deprecated) # Added in 3.13.
from pathspec.util import (
CheckResult,
StrPath,
TStrPath,
TreeEntry,
_is_iterable,
normalize_file)
class PathSpec(object):
"""
The :class:`PathSpec` class is a wrapper around a list of compiled
:class:`.Pattern` instances.
"""
def __init__(
self,
patterns: Union[Sequence[Pattern], Iterable[Pattern]],
*,
backend: Union[BackendNamesHint, str, None] = None,
_test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None,
) -> None:
"""
Initializes the :class:`.PathSpec` instance.
*patterns* (:class:`~collections.abc.Sequence` or :class:`~collections.abc.Iterable`)
contains each compiled pattern (:class:`.Pattern`). If not a sequence, it
will be converted to a :class:`list`.
*backend* (:class:`str` or :data:`None`) is the pattern (regular expression)
matching backend to use. Default is :data:`None` for "best" to use the best
available backend. Priority of backends is: "re2", "hyperscan", "simple".
The "simple" backend is always available.
"""
if not isinstance(patterns, Sequence):
patterns = list(patterns)
if backend is None:
backend = 'best'
backend = cast(BackendNamesHint, backend)
if _test_backend_factory is not None:
use_backend = _test_backend_factory(patterns)
else:
use_backend = self._make_backend(backend, patterns)
self._backend: _Backend = use_backend
"""
*_backend* (:class:`._Backend`) is the pattern (regular expression) matching
backend.
"""
self._backend_name: BackendNamesHint = backend
"""
*_backend_name* (:class:`str`) is the name of backend to use.
"""
self.patterns: Sequence[Pattern] = patterns
"""
*patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`)
contains the compiled patterns.
"""
def __add__(self: Self, other: PathSpec) -> Self:
"""
Combines the :attr:`self.patterns <.PathSpec.patterns>` patterns from two
:class:`PathSpec` instances.
"""
if isinstance(other, PathSpec):
return self.__class__(self.patterns + other.patterns, backend=self._backend_name)
else:
return NotImplemented
def __eq__(self, other: object) -> bool:
"""
Tests the equality of this path-spec with *other* (:class:`PathSpec`) by
comparing their :attr:`self.patterns <.PathSpec.patterns>` attributes.
"""
if isinstance(other, PathSpec):
paired_patterns = zip_longest(self.patterns, other.patterns)
return all(a == b for a, b in paired_patterns)
else:
return NotImplemented
def __iadd__(self: Self, other: PathSpec) -> Self:
"""
Adds the :attr:`self.patterns <.PathSpec.patterns>` from *other*
(:class:`PathSpec`) to this instance.
"""
if isinstance(other, PathSpec):
self.patterns += other.patterns
self._backend = self._make_backend(self._backend_name, self.patterns)
return self
else:
return NotImplemented
def __len__(self) -> int:
"""
Returns the number of :attr:`self.patterns <.PathSpec.patterns>` this
path-spec contains (:class:`int`).
"""
return len(self.patterns)
def check_file(
self,
file: TStrPath,
separators: Optional[Collection[str]] = None,
) -> CheckResult[TStrPath]:
"""
Check the files against this path-spec.
*file* (:class:`str` or :class:`os.PathLike`) is the file path to be matched
against :attr:`self.patterns <.PathSpec.patterns>`.
*separators* (:class:`~collections.abc.Collection` of :class:`str`; or
:data:`None`) optionally contains the path separators to normalize. See
:func:`.normalize_file` for more information.
Returns the file check result (:class:`.CheckResult`).
"""
norm_file = normalize_file(file, separators)
include, index = self._backend.match_file(norm_file)
return CheckResult(file, include, index)
def check_files(
self,
files: Iterable[TStrPath],
separators: Optional[Collection[str]] = None,
) -> Iterator[CheckResult[TStrPath]]:
"""
Check the files against this path-spec.
*files* (:class:`~collections.abc.Iterable` of :class:`str` or
:class:`os.PathLike`) contains the file paths to be checked against
:attr:`self.patterns <.PathSpec.patterns>`.
*separators* (:class:`~collections.abc.Collection` of :class:`str`; or
:data:`None`) optionally contains the path separators to normalize. See
:func:`.normalize_file` for more information.
Returns an :class:`~collections.abc.Iterator` yielding each file check
result (:class:`.CheckResult`).
"""
if not _is_iterable(files):
raise TypeError(f"files:{files!r} is not an iterable.")
for orig_file in files:
norm_file = normalize_file(orig_file, separators)
include, index = self._backend.match_file(norm_file)
yield CheckResult(orig_file, include, index)
def check_tree_files(
self,
root: StrPath,
on_error: Optional[Callable[[OSError], None]] = None,
follow_links: Optional[bool] = None,
) -> Iterator[CheckResult[str]]:
"""
Walks the specified root path for all files and checks them against this
path-spec.
*root* (:class:`str` or :class:`os.PathLike`) is the root directory to
search for files.
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally
is the error handler for file-system exceptions. It will be called with the
exception (:exc:`OSError`). Reraise the exception to abort the walk. Default
is :data:`None` to ignore file-system exceptions.
*follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk
symbolic links that resolve to directories. Default is :data:`None` for
:data:`True`.
*negate* (:class:`bool` or :data:`None`) is whether to negate the match
results of the patterns. If :data:`True`, a pattern matching a file will
exclude the file rather than include it. Default is :data:`None` for
:data:`False`.
Returns an :class:`~collections.abc.Iterator` yielding each file check
result (:class:`.CheckResult`).
"""
files = util.iter_tree_files(root, on_error=on_error, follow_links=follow_links)
yield from self.check_files(files)
@classmethod
def from_lines(
cls: type[Self],
pattern_factory: Union[str, Callable[[AnyStr], Pattern]],
lines: Iterable[AnyStr],
*,
backend: Union[BackendNamesHint, str, None] = None,
_test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None,
) -> Self:
"""
Compiles the pattern lines.
*pattern_factory* can be either the name of a registered pattern factory
(:class:`str`), or a :class:`~collections.abc.Callable` used to compile
patterns. It must accept an uncompiled pattern (:class:`str`) and return the
compiled pattern (:class:`.Pattern`).
*lines* (:class:`~collections.abc.Iterable`) yields each uncompiled pattern
(:class:`str`). This simply has to yield each line so that it can be a
:class:`io.TextIOBase` (e.g., from :func:`open` or :class:`io.StringIO`) or
the result from :meth:`str.splitlines`.
*backend* (:class:`str` or :data:`None`) is the pattern (or regular
expression) matching backend to use. Default is :data:`None` for "best" to
use the best available backend. Priority of backends is: "re2", "hyperscan",
"simple". The "simple" backend is always available.
Returns the :class:`PathSpec` instance.
"""
if isinstance(pattern_factory, str):
pattern_factory = util.lookup_pattern(pattern_factory)
if not callable(pattern_factory):
raise TypeError(f"pattern_factory:{pattern_factory!r} is not callable.")
if not _is_iterable(lines):
raise TypeError(f"lines:{lines!r} is not an iterable.")
patterns = [pattern_factory(line) for line in lines if line]
return cls(patterns, backend=backend, _test_backend_factory=_test_backend_factory)
@staticmethod
def _make_backend(
name: BackendNamesHint,
patterns: Sequence[Pattern],
) -> _Backend:
"""
.. warning:: This method is not part of the public API. It is subject to
change.
Create the backend for the patterns.
*name* (:class:`str`) is the name of the backend.
*patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`)
contains the compiled patterns.
Returns the matcher (:class:`._Backend`).
"""
return make_pathspec_backend(name, patterns)
def match_entries(
self,
entries: Iterable[TreeEntry],
separators: Optional[Collection[str]] = None,
*,
negate: Optional[bool] = None,
) -> Iterator[TreeEntry]:
"""
Matches the entries to this path-spec.
*entries* (:class:`~collections.abc.Iterable` of :class:`.TreeEntry`)
contains the entries to be matched against :attr:`self.patterns <.PathSpec.patterns>`.
*separators* (:class:`~collections.abc.Collection` of :class:`str`; or
:data:`None`) optionally contains the path separators to normalize. See
:func:`.normalize_file` for more information.
*negate* (:class:`bool` or :data:`None`) is whether to negate the match
results of the patterns. If :data:`True`, a pattern matching a file will
exclude the file rather than include it. Default is :data:`None` for
:data:`False`.
Returns the matched entries (:class:`~collections.abc.Iterator` of
:class:`.TreeEntry`).
"""
if not _is_iterable(entries):
raise TypeError(f"entries:{entries!r} is not an iterable.")
for entry in entries:
norm_file = normalize_file(entry.path, separators)
include, _index = self._backend.match_file(norm_file)
if negate:
include = not include
if include:
yield entry
def match_file(
self,
file: StrPath,
separators: Optional[Collection[str]] = None,
) -> bool:
"""
Matches the file to this path-spec.
*file* (:class:`str` or :class:`os.PathLike`) is the file path to be matched
against :attr:`self.patterns <.PathSpec.patterns>`.
*separators* (:class:`~collections.abc.Collection` of :class:`str`)
optionally contains the path separators to normalize. See
:func:`.normalize_file` for more information.
Returns :data:`True` if *file* matched; otherwise, :data:`False`.
"""
norm_file = normalize_file(file, separators)
include, _index = self._backend.match_file(norm_file)
return bool(include)
def match_files(
self,
files: Iterable[StrPath],
separators: Optional[Collection[str]] = None,
*,
negate: Optional[bool] = None,
) -> Iterator[StrPath]:
"""
Matches the files to this path-spec.
*files* (:class:`~collections.abc.Iterable` of :class:`str` or
:class:`os.PathLike`) contains the file paths to be matched against
:attr:`self.patterns <.PathSpec.patterns>`.
*separators* (:class:`~collections.abc.Collection` of :class:`str`; or
:data:`None`) optionally contains the path separators to normalize. See
:func:`.normalize_file` for more information.
*negate* (:class:`bool` or :data:`None`) is whether to negate the match
results of the patterns. If :data:`True`, a pattern matching a file will
exclude the file rather than include it. Default is :data:`None` for
:data:`False`.
Returns the matched files (:class:`~collections.abc.Iterator` of
:class:`str` or :class:`os.PathLike`).
"""
if not _is_iterable(files):
raise TypeError(f"files:{files!r} is not an iterable.")
for orig_file in files:
norm_file = normalize_file(orig_file, separators)
include, _index = self._backend.match_file(norm_file)
if negate:
include = not include
if include:
yield orig_file
def match_tree_entries(
self,
root: StrPath,
on_error: Optional[Callable[[OSError], None]] = None,
follow_links: Optional[bool] = None,
*,
negate: Optional[bool] = None,
) -> Iterator[TreeEntry]:
"""
Walks the specified root path for all files and matches them to this
path-spec.
*root* (:class:`str` or :class:`os.PathLike`) is the root directory to
search.
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally
is the error handler for file-system exceptions. It will be called with the
exception (:exc:`OSError`). Reraise the exception to abort the walk. Default
is :data:`None` to ignore file-system exceptions.
*follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk
symbolic links that resolve to directories. Default is :data:`None` for
:data:`True`.
*negate* (:class:`bool` or :data:`None`) is whether to negate the match
results of the patterns. If :data:`True`, a pattern matching a file will
exclude the file rather than include it. Default is :data:`None` for
:data:`False`.
Returns the matched files (:class:`~collections.abc.Iterator` of
:class:`.TreeEntry`).
"""
entries = util.iter_tree_entries(root, on_error=on_error, follow_links=follow_links)
yield from self.match_entries(entries, negate=negate)
# NOTICE: The deprecation warning was only added in 1.0.0 (from 2026-01-05).
@deprecated((
"PathSpec.match_tree() is deprecated. Use .match_tree_files() instead."
))
def match_tree(self, *args, **kw) -> Iterator[str]:
"""
.. version-deprecated:: 0.3.2
This is an alias for the :meth:`self.match_tree_files <.PathSpec.match_tree_files>`
method.
"""
return self.match_tree_files(*args, **kw)
def match_tree_files(
self,
root: StrPath,
on_error: Optional[Callable[[OSError], None]] = None,
follow_links: Optional[bool] = None,
*,
negate: Optional[bool] = None,
) -> Iterator[str]:
"""
Walks the specified root path for all files and matches them to this
path-spec.
*root* (:class:`str` or :class:`os.PathLike`) is the root directory to
search for files.
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally
is the error handler for file-system exceptions. It will be called with the
exception (:exc:`OSError`). Reraise the exception to abort the walk. Default
is :data:`None` to ignore file-system exceptions.
*follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk
symbolic links that resolve to directories. Default is :data:`None` for
:data:`True`.
*negate* (:class:`bool` or :data:`None`) is whether to negate the match
results of the patterns. If :data:`True`, a pattern matching a file will
exclude the file rather than include it. Default is :data:`None` for
:data:`False`.
Returns the matched files (:class:`~collections.abc.Iterable` of :class:`str`).
"""
files = util.iter_tree_files(root, on_error=on_error, follow_links=follow_links)
yield from self.match_files(files, negate=negate)

View file

@ -0,0 +1,241 @@
"""
This module provides the base definition for patterns.
"""
from __future__ import annotations
import re
from collections.abc import (
Iterable,
Iterator)
from dataclasses import (
dataclass)
from typing import (
Any,
Optional, # Replaced by `X | None` in 3.10.
TypeVar,
Union) # Replaced by `X | Y` in 3.10.
from ._typing import (
AnyStr, # Removed in 3.18.
deprecated, # Added in 3.13.
override) # Added in 3.12.
RegexPatternSelf = TypeVar("RegexPatternSelf", bound='RegexPattern')
"""
:class:`.RegexPattern` self type hint to support Python v<3.11 using PEP 673
recommendation.
"""
class Pattern(object):
"""
The :class:`Pattern` class is the abstract definition of a pattern.
"""
# Make the class dict-less.
__slots__ = (
'include',
)
def __init__(self, include: Optional[bool]) -> None:
"""
Initializes the :class:`Pattern` instance.
*include* (:class:`bool` or :data:`None`) is whether the matched files
should be included (:data:`True`), excluded (:data:`False`), or is a
null-operation (:data:`None`).
"""
self.include = include
"""
*include* (:class:`bool` or :data:`None`) is whether the matched files
should be included (:data:`True`), excluded (:data:`False`), or is a
null-operation (:data:`None`).
"""
@deprecated((
"Pattern.match() is deprecated. Use Pattern.match_file() with a loop for "
"similar results."
))
def match(self, files: Iterable[str]) -> Iterator[str]:
"""
.. version-deprecated:: 0.10.0
This method is no longer used. Use the :meth:`self.match_file <.Pattern.match_file>`
method with a loop for similar results.
Matches this pattern against the specified files.
*files* (:class:`~collections.abc.Iterable` of :class:`str`) contains each
file relative to the root directory.
Returns an :class:`~collections.abc.Iterable` yielding each matched file
path (:class:`str`).
"""
for file in files:
if self.match_file(file) is not None:
yield file
def match_file(self, file: str) -> Optional[Any]:
"""
Matches this pattern against the specified file.
*file* (:class:`str`) is the normalized file path to match against.
Returns the match result if *file* matched; otherwise, :data:`None`.
"""
raise NotImplementedError((
"{cls.__module__}.{cls.__qualname__} must override match_file()."
).format(cls=self.__class__))
class RegexPattern(Pattern):
"""
The :class:`RegexPattern` class is an implementation of a pattern using
regular expressions.
"""
# Keep the class dict-less.
__slots__ = (
'pattern',
'regex',
)
def __init__(
self,
pattern: Union[AnyStr, re.Pattern, None],
include: Optional[bool] = None,
) -> None:
"""
Initializes the :class:`RegexPattern` instance.
*pattern* (:class:`str`, :class:`bytes`, :class:`re.Pattern`, or
:data:`None`) is the pattern to compile into a regular expression.
*include* (:class:`bool` or :data:`None`) must be :data:`None` unless
*pattern* is a precompiled regular expression (:class:`re.Pattern`) in which
case it is whether matched files should be included (:data:`True`), excluded
(:data:`False`), or is a null operation (:data:`None`).
.. note:: Subclasses do not need to support the *include* parameter.
"""
if isinstance(pattern, (str, bytes)):
assert include is None, (
f"include:{include!r} must be null when pattern:{pattern!r} is a string."
)
regex, include = self.pattern_to_regex(pattern)
# NOTE: Make sure to allow a null regular expression to be
# returned for a null-operation.
if include is not None:
regex = re.compile(regex)
elif pattern is not None and hasattr(pattern, 'match'):
# Assume pattern is a precompiled regular expression.
# - NOTE: Used specified *include*.
regex = pattern
elif pattern is None:
# NOTE: Make sure to allow a null pattern to be passed for a
# null-operation.
assert include is None, (
f"include:{include!r} must be null when pattern:{pattern!r} is null."
)
regex = None
else:
raise TypeError(f"pattern:{pattern!r} is not a string, re.Pattern, or None.")
super(RegexPattern, self).__init__(include)
self.pattern: Union[AnyStr, re.Pattern, None] = pattern
"""
*pattern* (:class:`str`, :class:`bytes`, :class:`re.Pattern`, or
:data:`None`) is the uncompiled, input pattern. This is for reference.
"""
self.regex: Optional[re.Pattern] = regex
"""
*regex* (:class:`re.Pattern` or :data:`None`) is the compiled regular
expression for the pattern.
"""
def __copy__(self: RegexPatternSelf) -> RegexPatternSelf:
"""
Performa a shallow copy of the pattern.
Returns the copy (:class:`RegexPattern`).
"""
other = self.__class__(self.regex, self.include)
other.pattern = self.pattern
return other
def __eq__(self, other: RegexPattern) -> bool:
"""
Tests the equality of this regex pattern with *other* (:class:`RegexPattern`)
by comparing their :attr:`~Pattern.include` and :attr:`~RegexPattern.regex`
attributes.
"""
if isinstance(other, RegexPattern):
return self.include == other.include and self.regex == other.regex
else:
return NotImplemented
@override
def match_file(self, file: AnyStr) -> Optional[RegexMatchResult]:
"""
Matches this pattern against the specified file.
*file* (:class:`str` or :class:`bytes`) is the file path relative to the
root directory (e.g., "relative/path/to/file").
Returns the match result (:class:`.RegexMatchResult`) if *file* matched;
otherwise, :data:`None`.
"""
if self.include is not None:
match = self.regex.search(file)
if match is not None:
return RegexMatchResult(match)
return None
@classmethod
def pattern_to_regex(
cls,
pattern: AnyStr,
) -> tuple[Optional[AnyStr], Optional[bool]]:
"""
Convert the pattern into an uncompiled regular expression.
*pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a
regular expression.
Returns a :class:`tuple` containing:
- *pattern* (:class:`str`, :class:`bytes` or :data:`None`) is the
uncompiled regular expression .
- *include* (:class:`bool` or :data:`None`) is whether matched files
should be included (:data:`True`), excluded (:data:`False`), or is a
null-operation (:data:`None`).
.. note:: The default implementation simply returns *pattern* and
:data:`True`.
"""
return pattern, True
@dataclass()
class RegexMatchResult(object):
"""
The :class:`RegexMatchResult` data class is used to return information about
the matched regular expression.
"""
# Keep the class dict-less.
__slots__ = (
'match',
)
match: re.Match
"""
*match* (:class:`re.Match`) is the regex match result.
"""

View file

@ -0,0 +1,12 @@
"""
The *pathspec.patterns* package contains the pattern matching implementations.
"""
# Load pattern implementations.
from .gitignore import basic as _
from .gitignore import spec as _
# DEPRECATED: Deprecated since 0.11.0 (from 2023-01-24). Expose the
# GitWildMatchPattern class in this module for backward compatibility with
# 0.5.0 (from 2016-08-22).
from .gitwildmatch import GitWildMatchPattern

View file

@ -0,0 +1,17 @@
"""
The *pathspec.patterns.gitignore* package provides the *gitignore*
implementations.
The following classes are imported and made available from this package:
- :class:`pathspec.patterns.gitignore.base.GitIgnorePatternError`
"""
# Expose the GitIgnorePatternError for convenience.
from .base import (
GitIgnorePatternError)
# Declare imports as part of the public interface.
__all__ = [
'GitIgnorePatternError',
]

View file

@ -0,0 +1,176 @@
"""
This module provides common classes for the gitignore patterns.
"""
import re
from pathspec.pattern import (
RegexPattern)
from pathspec._typing import (
AnyStr) # Removed in 3.18.
_BYTES_ENCODING = 'latin1'
"""
The encoding to use when parsing a byte string pattern.
"""
class _GitIgnoreBasePattern(RegexPattern):
"""
.. warning:: This class is not part of the public API. It is subject to
change.
The :class:`_GitIgnoreBasePattern` class is the base implementation for a
compiled gitignore pattern.
"""
# Keep the dict-less class hierarchy.
__slots__ = ()
@staticmethod
def escape(s: AnyStr) -> AnyStr:
"""
Escape special characters in the given string.
*s* (:class:`str` or :class:`bytes`) a filename or a string that you want to
escape, usually before adding it to a ".gitignore".
Returns the escaped string (:class:`str` or :class:`bytes`).
"""
if isinstance(s, str):
return_type = str
string = s
elif isinstance(s, bytes):
return_type = bytes
string = s.decode(_BYTES_ENCODING)
else:
raise TypeError(f"s:{s!r} is not a unicode or byte string.")
# Reference: https://git-scm.com/docs/gitignore#_pattern_format
out_string = ''.join((f"\\{x}" if x in '[]!*#?' else x) for x in string)
if return_type is bytes:
return out_string.encode(_BYTES_ENCODING)
else:
return out_string
@staticmethod
def _translate_segment_glob(pattern: str) -> str:
"""
Translates the glob pattern to a regular expression. This is used in the
constructor to translate a path segment glob pattern to its corresponding
regular expression.
*pattern* (:class:`str`) is the glob pattern.
Returns the regular expression (:class:`str`).
"""
# NOTE: This is derived from `fnmatch.translate()` and is similar to the
# POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
escape = False
regex = ''
i, end = 0, len(pattern)
while i < end:
# Get next character.
char = pattern[i]
i += 1
if escape:
# Escape the character.
escape = False
regex += re.escape(char)
elif char == '\\':
# Escape character, escape next character.
escape = True
elif char == '*':
# Multi-character wildcard. Match any string (except slashes), including
# an empty string.
regex += '[^/]*'
elif char == '?':
# Single-character wildcard. Match any single character (except a
# slash).
regex += '[^/]'
elif char == '[':
# Bracket expression wildcard. Except for the beginning exclamation
# mark, the whole bracket expression can be used directly as regex, but
# we have to find where the expression ends.
# - "[][!]" matches ']', '[' and '!'.
# - "[]-]" matches ']' and '-'.
# - "[!]a-]" matches any character except ']', 'a' and '-'.
j = i
# Pass bracket expression negation.
if j < end and (pattern[j] == '!' or pattern[j] == '^'):
j += 1
# Pass first closing bracket if it is at the beginning of the
# expression.
if j < end and pattern[j] == ']':
j += 1
# Find closing bracket. Stop once we reach the end or find it.
while j < end and pattern[j] != ']':
j += 1
if j < end:
# Found end of bracket expression. Increment j to be one past the
# closing bracket:
#
# [...]
# ^ ^
# i j
#
j += 1
expr = '['
if pattern[i] == '!':
# Bracket expression needs to be negated.
expr += '^'
i += 1
elif pattern[i] == '^':
# POSIX declares that the regex bracket expression negation "[^...]"
# is undefined in a glob pattern. Python's `fnmatch.translate()`
# escapes the caret ('^') as a literal. Git supports the using a
# caret for negation. Maintain consistency with Git because that is
# the expected behavior.
expr += '^'
i += 1
# Build regex bracket expression. Escape slashes so they are treated
# as literal slashes by regex as defined by POSIX.
expr += pattern[i:j].replace('\\', '\\\\')
# Add regex bracket expression to regex result.
regex += expr
# Set i to one past the closing bracket.
i = j
else:
# Failed to find closing bracket, treat opening bracket as a bracket
# literal instead of as an expression.
regex += '\\['
else:
# Regular character, escape it for regex.
regex += re.escape(char)
if escape:
raise ValueError((
f"Escape character found with no next character to escape: {pattern!r}"
)) # ValueError
return regex
class GitIgnorePatternError(ValueError):
"""
The :class:`GitIgnorePatternError` class indicates an invalid gitignore
pattern.
"""
pass

View file

@ -0,0 +1,317 @@
"""
This module provides :class:`GitIgnoreBasicPattern` which implements Git's
`gitignore`_ patterns as documented. This differs from how Git actually behaves
when including files in excluded directories.
.. _`gitignore`: https://git-scm.com/docs/gitignore
"""
from typing import (
Optional) # Replaced by `X | None` in 3.10.
from pathspec import util
from pathspec._typing import (
AnyStr, # Removed in 3.18.
assert_unreachable,
override) # Added in 3.12.
from .base import (
GitIgnorePatternError,
_BYTES_ENCODING,
_GitIgnoreBasePattern)
class GitIgnoreBasicPattern(_GitIgnoreBasePattern):
"""
The :class:`GitIgnoreBasicPattern` class represents a compiled gitignore
pattern as documented. This is registered as "gitignore".
"""
# Keep the dict-less class hierarchy.
__slots__ = ()
@staticmethod
def __normalize_segments(
is_dir_pattern: bool,
pattern_segs: list[str],
) -> tuple[Optional[list[str]], Optional[str]]:
"""
Normalize the pattern segments to make processing easier.
*is_dir_pattern* (:class:`bool`) is whether the pattern is a directory
pattern (i.e., ends with a slash '/').
*pattern_segs* (:class:`list` of :class:`str`) contains the pattern
segments. This may be modified in place.
Returns a :class:`tuple` containing either:
- The normalized segments (:class:`list` of :class:`str`; or :data:`None`).
- The regular expression override (:class:`str` or :data:`None`).
"""
if not pattern_segs[0]:
# A pattern beginning with a slash ('/') should match relative to the root
# directory. Remove the empty first segment to make the pattern relative
# to root.
del pattern_segs[0]
elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):
# A single segment pattern with or without a trailing slash ('/') will
# match any descendant path. This is equivalent to "**/{pattern}". Prepend
# double-asterisk segment to make pattern relative to root.
if pattern_segs[0] != '**':
pattern_segs.insert(0, '**')
else:
# A pattern without a beginning slash ('/') but contains at least one
# prepended directory (e.g., "dir/{pattern}") should match relative to the
# root directory. No segment modification is needed.
pass
if not pattern_segs:
# After normalization, we end up with no pattern at all. This must be
# because the pattern is invalid.
raise ValueError("Pattern normalized to nothing.")
if not pattern_segs[-1]:
# A pattern ending with a slash ('/') will match all descendant paths if
# it is a directory but not if it is a regular file. This is equivalent to
# "{pattern}/**". Set empty last segment to a double-asterisk to include
# all descendants.
pattern_segs[-1] = '**'
# EDGE CASE: Collapse duplicate double-asterisk sequences (i.e., '**/**').
# Iterate over the segments in reverse order and remove the duplicate double
# asterisks as we go.
for i in range(len(pattern_segs) - 1, 0, -1):
prev = pattern_segs[i-1]
seg = pattern_segs[i]
if prev == '**' and seg == '**':
del pattern_segs[i]
seg_count = len(pattern_segs)
if seg_count == 1 and pattern_segs[0] == '**':
if is_dir_pattern:
# The pattern "**/" will be normalized to "**", but it should match
# everything except for files in the root. Special case this pattern.
return (None, '/')
else:
# The pattern "**" will match every path. Special case this pattern.
return (None, '.')
elif (
seg_count == 2
and pattern_segs[0] == '**'
and pattern_segs[1] == '*'
):
# The pattern "*" will be normalized to "**/*" and will match every
# path. Special case this pattern for efficiency.
return (None, '.')
elif (
seg_count == 3
and pattern_segs[0] == '**'
and pattern_segs[1] == '*'
and pattern_segs[2] == '**'
):
# The pattern "*/" will be normalized to "**/*/**" which will match every
# file not in the root directory. Special case this pattern for
# efficiency.
return (None, '/')
# No regular expression override, return modified pattern segments.
return (pattern_segs, None)
@override
@classmethod
def pattern_to_regex(
cls,
pattern: AnyStr,
) -> tuple[Optional[AnyStr], Optional[bool]]:
"""
Convert the pattern into a regular expression.
*pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a
regular expression.
Returns a :class:`tuple` containing:
- *pattern* (:class:`str`, :class:`bytes` or :data:`None`) is the
uncompiled regular expression.
- *include* (:class:`bool` or :data:`None`) is whether matched files
should be included (:data:`True`), excluded (:data:`False`), or is a
null-operation (:data:`None`).
"""
if isinstance(pattern, str):
pattern_str = pattern
return_type = str
elif isinstance(pattern, bytes):
pattern_str = pattern.decode(_BYTES_ENCODING)
return_type = bytes
else:
raise TypeError(f"{pattern=!r} is not a unicode or byte string.")
original_pattern = pattern_str
del pattern
if pattern_str.endswith('\\ '):
# EDGE CASE: Spaces can be escaped with backslash. If a pattern that ends
# with a backslash is followed by a space, do not strip from the left.
pass
else:
# EDGE CASE: Leading spaces should be kept (only trailing spaces should be
# removed).
pattern_str = pattern_str.rstrip()
regex: Optional[str]
include: Optional[bool]
if not pattern_str:
# A blank pattern is a null-operation (neither includes nor excludes
# files).
return (None, None)
elif pattern_str.startswith('#'):
# A pattern starting with a hash ('#') serves as a comment (neither
# includes nor excludes files). Escape the hash with a backslash to match
# a literal hash (i.e., '\#').
return (None, None)
if pattern_str.startswith('!'):
# A pattern starting with an exclamation mark ('!') negates the pattern
# (exclude instead of include). Escape the exclamation mark with a back
# slash to match a literal exclamation mark (i.e., '\!').
include = False
# Remove leading exclamation mark.
pattern_str = pattern_str[1:]
else:
include = True
# Split pattern into segments.
pattern_segs = pattern_str.split('/')
# Check whether the pattern is specifically a directory pattern before
# normalization.
is_dir_pattern = not pattern_segs[-1]
if pattern_str == '/':
# EDGE CASE: A single slash ('/') is not addressed by the gitignore
# documentation. Git treats it as a no-op (does not match any files). The
# straight forward interpretation is to treat it as a directory and match
# every descendant path (equivalent to '**'). Remove the directory pattern
# flag so that it is treated as '**' instead of '**/'.
is_dir_pattern = False
# Normalize pattern to make processing easier.
try:
pattern_segs, override_regex = cls.__normalize_segments(
is_dir_pattern, pattern_segs,
)
except ValueError as e:
raise GitIgnorePatternError((
f"Invalid git pattern: {original_pattern!r}"
)) from e # GitIgnorePatternError
if override_regex is not None:
# Use regex override.
regex = override_regex
elif pattern_segs is not None:
# Build regular expression from pattern.
try:
regex_parts = cls.__translate_segments(pattern_segs)
except ValueError as e:
raise GitIgnorePatternError((
f"Invalid git pattern: {original_pattern!r}"
)) from e # GitIgnorePatternError
regex = ''.join(regex_parts)
else:
assert_unreachable((
f"{override_regex=} and {pattern_segs=} cannot both be null."
)) # assert_unreachable
# Encode regex if needed.
out_regex: AnyStr
if regex is not None and return_type is bytes:
out_regex = regex.encode(_BYTES_ENCODING)
else:
out_regex = regex
return (out_regex, include)
@classmethod
def __translate_segments(cls, pattern_segs: list[str]) -> list[str]:
"""
Translate the pattern segments to regular expressions.
*pattern_segs* (:class:`list` of :class:`str`) contains the pattern
segments.
Returns the regular expression parts (:class:`list` of :class:`str`).
"""
# Build regular expression from pattern.
out_parts = []
need_slash = False
end = len(pattern_segs) - 1
for i, seg in enumerate(pattern_segs):
if seg == '**':
if i == 0:
# A normalized pattern beginning with double-asterisks ('**') will
# match any leading path segments.
# - NOTICE: '(?:^|/)' benchmarks slower using p15 (sm=0.9382,
# hs=0.9966, re2=0.9337).
out_parts.append('^(?:.+/)?')
elif i < end:
# A pattern with inner double-asterisks ('**') will match multiple (or
# zero) inner path segments.
out_parts.append('(?:/.+)?')
need_slash = True
else:
assert i == end, (i, end)
# A normalized pattern ending with double-asterisks ('**') will match
# any trailing path segments.
out_parts.append('/')
else:
# Match path segment.
if i == 0:
# Anchor to root directory.
out_parts.append('^')
if need_slash:
out_parts.append('/')
if seg == '*':
# Match whole path segment.
out_parts.append('[^/]+')
else:
# Match segment glob pattern.
out_parts.append(cls._translate_segment_glob(seg))
if i == end:
if seg == '*':
# A pattern ending with an asterisk ('*') will match a file or
# directory (without matching descendant paths). E.g., "foo/*"
# matches "foo/test.json", "foo/bar/", but not "foo/bar/hello.c".
out_parts.append('/?$')
else:
# A pattern ending without a slash ('/') will match a file or a
# directory (with paths underneath it). E.g., "foo" matches "foo",
# "foo/bar", "foo/bar/baz", etc.
out_parts.append('(?:/|$)')
need_slash = True
return out_parts
# Register GitIgnoreBasicPattern as "gitignore".
util.register_pattern('gitignore', GitIgnoreBasicPattern)

View file

@ -0,0 +1,335 @@
"""
This module provides :class:`GitIgnoreSpecPattern` which implements Git's
`gitignore`_ patterns, and handles edge-cases where Git's behavior differs from
what's documented. Git allows including files from excluded directories which
appears to contradict the documentation. This is used by
:class:`~pathspec.gitignore.GitIgnoreSpec` to fully replicate Git's handling.
.. _`gitignore`: https://git-scm.com/docs/gitignore
"""
from typing import (
Optional) # Replaced by `X | None` in 3.10.
from pathspec._typing import (
AnyStr, # Removed in 3.18.
assert_unreachable,
override) # Added in 3.12.
from .base import (
GitIgnorePatternError,
_BYTES_ENCODING,
_GitIgnoreBasePattern)
_DIR_MARK = 'ps_d'
"""
The regex group name for the directory marker. This is only used by
:class:`GitIgnoreSpec`.
"""
_DIR_MARK_CG = f'(?P<{_DIR_MARK}>/)'
"""
This regular expression matches the directory marker.
"""
_DIR_MARK_OPT = f'(?:{_DIR_MARK_CG}|$)'
"""
This regular expression matches the optional directory marker and sub-path.
"""
class GitIgnoreSpecPattern(_GitIgnoreBasePattern):
"""
The :class:`GitIgnoreSpecPattern` class represents a compiled gitignore
pattern with special handling for edge-cases to replicate Git's behavior.
This is registered under the deprecated name "gitwildmatch" for backward
compatibility with v0.12. The registered name will be removed in a future
version.
"""
# Keep the dict-less class hierarchy.
__slots__ = ()
@staticmethod
def __normalize_segments(
is_dir_pattern: bool,
pattern_segs: list[str],
) -> tuple[Optional[list[str]], Optional[str]]:
"""
Normalize the pattern segments to make processing easier.
*is_dir_pattern* (:class:`bool`) is whether the pattern is a directory
pattern (i.e., ends with a slash '/').
*pattern_segs* (:class:`list` of :class:`str`) contains the pattern
segments. This may be modified in place.
Returns a :class:`tuple` containing either:
- The normalized segments (:class:`list` of :class:`str`; or :data:`None`).
- The regular expression override (:class:`str` or :data:`None`).
"""
if not pattern_segs[0]:
# A pattern beginning with a slash ('/') should match relative to the root
# directory. Remove the empty first segment to make the pattern relative
# to root.
del pattern_segs[0]
elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):
# A single segment pattern with or without a trailing slash ('/') will
# match any descendant path. This is equivalent to "**/{pattern}". Prepend
# double-asterisk segment to make pattern relative to root.
if pattern_segs[0] != '**':
pattern_segs.insert(0, '**')
else:
# A pattern without a beginning slash ('/') but contains at least one
# prepended directory (e.g., "dir/{pattern}") should match relative to the
# root directory. No segment modification is needed.
pass
if not pattern_segs:
# After normalization, we end up with no pattern at all. This must be
# because the pattern is invalid.
raise ValueError("Pattern normalized to nothing.")
if not pattern_segs[-1]:
# A pattern ending with a slash ('/') will match all descendant paths if
# it is a directory but not if it is a regular file. This is equivalent to
# "{pattern}/**". Set empty last segment to a double-asterisk to include
# all descendants.
pattern_segs[-1] = '**'
# EDGE CASE: Collapse duplicate double-asterisk sequences (i.e., '**/**').
# Iterate over the segments in reverse order and remove the duplicate double
# asterisks as we go.
for i in range(len(pattern_segs) - 1, 0, -1):
prev = pattern_segs[i-1]
seg = pattern_segs[i]
if prev == '**' and seg == '**':
del pattern_segs[i]
seg_count = len(pattern_segs)
if seg_count == 1 and pattern_segs[0] == '**':
if is_dir_pattern:
# The pattern "**/" will be normalized to "**", but it should match
# everything except for files in the root. Special case this pattern.
return (None, _DIR_MARK_CG)
else:
# The pattern "**" will match every path. Special case this pattern.
return (None, '.')
elif (
seg_count == 2
and pattern_segs[0] == '**'
and pattern_segs[1] == '*'
):
# The pattern "*" will be normalized to "**/*" and will match every
# path. Special case this pattern for efficiency.
return (None, '.')
elif (
seg_count == 3
and pattern_segs[0] == '**'
and pattern_segs[1] == '*'
and pattern_segs[2] == '**'
):
# The pattern "*/" will be normalized to "**/*/**" which will match every
# file not in the root directory. Special case this pattern for
# efficiency.
if is_dir_pattern:
return (None, _DIR_MARK_CG)
else:
return (None, '/')
# No regular expression override, return modified pattern segments.
return (pattern_segs, None)
@override
@classmethod
def pattern_to_regex(
cls,
pattern: AnyStr,
) -> tuple[Optional[AnyStr], Optional[bool]]:
"""
Convert the pattern into a regular expression.
*pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a
regular expression.
Returns a :class:`tuple` containing:
- *pattern* (:class:`str`, :class:`bytes` or :data:`None`) is the
uncompiled regular expression.
- *include* (:class:`bool` or :data:`None`) is whether matched files
should be included (:data:`True`), excluded (:data:`False`), or is a
null-operation (:data:`None`).
"""
if isinstance(pattern, str):
pattern_str = pattern
return_type = str
elif isinstance(pattern, bytes):
pattern_str = pattern.decode(_BYTES_ENCODING)
return_type = bytes
else:
raise TypeError(f"{pattern=!r} is not a unicode or byte string.")
original_pattern = pattern_str
del pattern
if pattern_str.endswith('\\ '):
# EDGE CASE: Spaces can be escaped with backslash. If a pattern that ends
# with a backslash is followed by a space, do not strip from the left.
pass
else:
# EDGE CASE: Leading spaces should be kept (only trailing spaces should be
# removed). Git does not remove leading spaces.
pattern_str = pattern_str.rstrip()
regex: Optional[str]
include: Optional[bool]
if not pattern_str:
# A blank pattern is a null-operation (neither includes nor excludes
# files).
return (None, None)
elif pattern_str.startswith('#'):
# A pattern starting with a hash ('#') serves as a comment (neither
# includes nor excludes files). Escape the hash with a backslash to match
# a literal hash (i.e., '\#').
return (None, None)
elif pattern_str == '/':
# EDGE CASE: According to `git check-ignore` (v2.4.1), a single '/' does
# not match any file.
return (None, None)
if pattern_str.startswith('!'):
# A pattern starting with an exclamation mark ('!') negates the pattern
# (exclude instead of include). Escape the exclamation mark with a back
# slash to match a literal exclamation mark (i.e., '\!').
include = False
# Remove leading exclamation mark.
pattern_str = pattern_str[1:]
else:
include = True
# Split pattern into segments.
pattern_segs = pattern_str.split('/')
# Check whether the pattern is specifically a directory pattern before
# normalization.
is_dir_pattern = not pattern_segs[-1]
# Normalize pattern to make processing easier.
try:
pattern_segs, override_regex = cls.__normalize_segments(
is_dir_pattern, pattern_segs,
)
except ValueError as e:
raise GitIgnorePatternError((
f"Invalid git pattern: {original_pattern!r}"
)) from e # GitIgnorePatternError
if override_regex is not None:
# Use regex override.
regex = override_regex
elif pattern_segs is not None:
# Build regular expression from pattern.
try:
regex_parts = cls.__translate_segments(is_dir_pattern, pattern_segs)
except ValueError as e:
raise GitIgnorePatternError((
f"Invalid git pattern: {original_pattern!r}"
)) from e # GitIgnorePatternError
regex = ''.join(regex_parts)
else:
assert_unreachable((
f"{override_regex=} and {pattern_segs=} cannot both be null."
)) # assert_unreachable
# Encode regex if needed.
out_regex: AnyStr
if regex is not None and return_type is bytes:
out_regex = regex.encode(_BYTES_ENCODING)
else:
out_regex = regex
return (out_regex, include)
@classmethod
def __translate_segments(
cls,
is_dir_pattern: bool,
pattern_segs: list[str],
) -> list[str]:
"""
Translate the pattern segments to regular expressions.
*is_dir_pattern* (:class:`bool`) is whether the pattern is a directory
pattern (i.e., ends with a slash '/').
*pattern_segs* (:class:`list` of :class:`str`) contains the pattern
segments.
Returns the regular expression parts (:class:`list` of :class:`str`).
"""
# Build regular expression from pattern.
out_parts = []
need_slash = False
end = len(pattern_segs) - 1
for i, seg in enumerate(pattern_segs):
if seg == '**':
if i == 0:
# A normalized pattern beginning with double-asterisks ('**') will
# match any leading path segments.
out_parts.append('^(?:.+/)?')
elif i < end:
# A pattern with inner double-asterisks ('**') will match multiple (or
# zero) inner path segments.
out_parts.append('(?:/.+)?')
need_slash = True
else:
assert i == end, (i, end)
# A normalized pattern ending with double-asterisks ('**') will match
# any trailing path segments.
if is_dir_pattern:
out_parts.append(_DIR_MARK_CG)
else:
out_parts.append('/')
else:
# Match path segment.
if i == 0:
# Anchor to root directory.
out_parts.append('^')
if need_slash:
out_parts.append('/')
if seg == '*':
# Match whole path segment.
out_parts.append('[^/]+')
else:
# Match segment glob pattern.
out_parts.append(cls._translate_segment_glob(seg))
if i == end:
# A pattern ending without a slash ('/') will match a file or a
# directory (with paths underneath it). E.g., "foo" matches "foo",
# "foo/bar", "foo/bar/baz", etc.
out_parts.append(_DIR_MARK_OPT)
need_slash = True
return out_parts

View file

@ -0,0 +1,52 @@
"""
.. version-deprecated: 1.0.0
This module is superseded by :module:`pathspec.patterns.gitignore`.
"""
from pathspec import util
from pathspec._typing import (
deprecated, # Added in 3.13.
override) # Added in 3.12.
from .gitignore.spec import (
GitIgnoreSpecPattern)
# DEPRECATED: Deprecated since version 1.0.0. Expose GitWildMatchPatternError
# in this module for backward compatibility.
from .gitignore import (
GitIgnorePatternError as GitWildMatchPatternError)
class GitWildMatchPattern(GitIgnoreSpecPattern):
"""
.. version-deprecated:: 1.0.0
This class is superseded by :class:`GitIgnoreSpecPattern` and
:class:`~pathspec.patterns.gitignore.basic.GitIgnoreBasicPattern`.
"""
@deprecated((
"GitWildMatchPattern ('gitwildmatch') is deprecated. Use 'gitignore' for "
"GitIgnoreBasicPattern or GitIgnoreSpecPattern instead."
))
def __init__(self, *args, **kw) -> None:
"""
Warn about deprecation.
"""
super().__init__(*args, **kw)
@override
@classmethod
@deprecated((
"GitWildMatchPattern ('gitwildmatch') is deprecated. Use 'gitignore' for "
"GitIgnoreBasicPattern or GitIgnoreSpecPattern instead."
))
def pattern_to_regex(cls, *args, **kw):
"""
Warn about deprecation.
"""
return super().pattern_to_regex(*args, **kw)
# DEPRECATED: Deprecated since version 1.0.0. Register GitWildMatchPattern as
# "gitwildmatch" for backward compatibility.
util.register_pattern('gitwildmatch', GitWildMatchPattern)

View file

@ -0,0 +1 @@
# Marker file for PEP 561. The pathspec package uses inline types.

View file

@ -0,0 +1,847 @@
"""
This module provides utility methods for dealing with path-specs.
"""
import os
import os.path
import pathlib
import posixpath
import stat
from collections.abc import (
Collection,
Iterable,
Iterator,
Sequence)
from dataclasses import (
dataclass)
from typing import (
Any,
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Generic,
Optional, # Replaced by `X | None` in 3.10.
TypeVar,
Union) # Replaced by `X | Y` in 3.10.
from .pattern import (
Pattern)
from ._typing import (
AnyStr, # Removed in 3.18.
deprecated) # Added in 3.13.
StrPath = Union[str, os.PathLike[str]]
TStrPath = TypeVar("TStrPath", bound=StrPath)
"""
Type variable for :class:`str` or :class:`os.PathLike`.
"""
NORMALIZE_PATH_SEPS = [
__sep
for __sep in [os.sep, os.altsep]
if __sep and __sep != posixpath.sep
]
"""
*NORMALIZE_PATH_SEPS* (:class:`list` of :class:`str`) contains the path
separators that need to be normalized to the POSIX separator for the current
operating system. The separators are determined by examining :data:`os.sep` and
:data:`os.altsep`.
"""
_registered_patterns = {}
"""
*_registered_patterns* (:class:`dict`) maps a name (:class:`str`) to the
registered pattern factory (:class:`~collections.abc.Callable`).
"""
def append_dir_sep(path: pathlib.Path) -> str:
"""
Appends the path separator to the path if the path is a directory. This can be
used to aid in distinguishing between directories and files on the file-system
by relying on the presence of a trailing path separator.
*path* (:class:`pathlib.Path`) is the path to use.
Returns the path (:class:`str`).
"""
str_path = str(path)
if path.is_dir():
str_path += os.sep
return str_path
def check_match_file(
patterns: Iterable[tuple[int, Pattern]],
file: str,
is_reversed: Optional[bool] = None,
) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*patterns* (:class:`~collections.abc.Iterable`) yields each indexed pattern
(:class:`tuple`) which contains the pattern index (:class:`int`) and actua
pattern (:class:`.Pattern`).
*file* (:class:`str`) is the normalized file path to be matched against
*patterns*.
*is_reversed* (:class:`bool` or :data:`None`) is whether the order of the
patterns has been reversed. Default is :data:`None` for :data:`False`.
Reversing the order of the patterns is an optimization.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
if is_reversed:
# Check patterns in reverse order. The first pattern that matches takes
# precedence.
for index, pattern in patterns:
if pattern.include is not None and pattern.match_file(file) is not None:
return pattern.include, index
return None, None
else:
# Check all patterns. The last pattern that matches takes precedence.
out_include: Optional[bool] = None
out_index: Optional[int] = None
for index, pattern in patterns:
if pattern.include is not None and pattern.match_file(file) is not None:
out_include = pattern.include
out_index = index
return out_include, out_index
def detailed_match_files(
patterns: Iterable[Pattern],
files: Iterable[str],
all_matches: Optional[bool] = None,
) -> dict[str, 'MatchDetail']:
"""
Matches the files to the patterns, and returns which patterns matched the
files.
*patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains
the patterns to use.
*files* (:class:`~collections.abc.Iterable` of :class:`str`) contains the
normalized file paths to be matched against *patterns*.
*all_matches* (:class:`bool` or :data:`None`) is whether to return all matches
patterns (:data:`True`), or only the last matched pattern (:data:`False`).
Default is :data:`None` for :data:`False`.
Returns the matched files (:class:`dict`) which maps each matched file
(:class:`str`) to the patterns that matched in order (:class:`.MatchDetail`).
"""
all_files = files if isinstance(files, Collection) else list(files)
return_files = {}
for pattern in patterns:
if pattern.include is not None:
result_files = pattern.match(all_files) # TODO: Replace with `.match_file()`.
if pattern.include:
# Add files and record pattern.
for result_file in result_files:
if result_file in return_files:
if all_matches:
return_files[result_file].patterns.append(pattern)
else:
return_files[result_file].patterns[0] = pattern
else:
return_files[result_file] = MatchDetail([pattern])
else:
# Remove files.
for file in result_files:
del return_files[file]
return return_files
def _filter_check_patterns(
patterns: Iterable[Pattern],
) -> list[tuple[int, Pattern]]:
"""
Filters out null-patterns.
*patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains
the patterns.
Returns a :class:`list` containing each indexed pattern (:class:`tuple`) which
contains the pattern index (:class:`int`) and the actual pattern
(:class:`.Pattern`).
"""
return [
(__index, __pat)
for __index, __pat in enumerate(patterns)
if __pat.include is not None
]
def _is_iterable(value: Any) -> bool:
"""
Check whether the value is an iterable (excludes strings).
*value* is the value to check,
Returns whether *value* is an iterable (:class:`bool`).
"""
return isinstance(value, Iterable) and not isinstance(value, (str, bytes))
@deprecated((
"pathspec.util.iter_tree() is deprecated. Use iter_tree_files() instead."
))
def iter_tree(root, on_error=None, follow_links=None):
"""
.. version-deprecated:: 0.10.0
This is an alias for the :func:`.iter_tree_files` function.
"""
return iter_tree_files(root, on_error=on_error, follow_links=follow_links)
def iter_tree_entries(
root: StrPath,
on_error: Optional[Callable[[OSError], None]] = None,
follow_links: Optional[bool] = None,
) -> Iterator['TreeEntry']:
"""
Walks the specified directory for all files and directories.
*root* (:class:`str` or :class:`os.PathLike`) is the root directory to search.
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is
the error handler for file-system exceptions. It will be called with the
exception (:exc:`OSError`). Reraise the exception to abort the walk. Default
is :data:`None` to ignore file-system exceptions.
*follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk
symbolic links that resolve to directories. Default is :data:`None` for
:data:`True`.
Raises :exc:`.RecursionError` if recursion is detected.
Returns an :class:`~collections.abc.Iterator` yielding each file or directory
entry (:class:`.TreeEntry`) relative to *root*.
"""
if on_error is not None and not callable(on_error):
raise TypeError(f"on_error:{on_error!r} is not callable.")
if follow_links is None:
follow_links = True
yield from _iter_tree_entries_next(os.path.abspath(root), '', {}, on_error, follow_links)
def _iter_tree_entries_next(
root_full: str,
dir_rel: str,
memo: dict[str, str],
on_error: Callable[[OSError], None],
follow_links: bool,
) -> Iterator['TreeEntry']:
"""
Scan the directory for all descendant files.
*root_full* (:class:`str`) the absolute path to the root directory.
*dir_rel* (:class:`str`) the path to the directory to scan relative to
*root_full*.
*memo* (:class:`dict`) keeps track of ancestor directories encountered. Maps
each ancestor real path (:class:`str`) to relative path (:class:`str`).
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is
the error handler for file-system exceptions.
*follow_links* (:class:`bool`) is whether to walk symbolic links that resolve
to directories.
Yields each entry (:class:`.TreeEntry`).
"""
dir_full = os.path.join(root_full, dir_rel)
dir_real = os.path.realpath(dir_full)
# Remember each encountered ancestor directory and its canonical (real) path.
# If a canonical path is encountered more than once, recursion has occurred.
if dir_real not in memo:
memo[dir_real] = dir_rel
else:
raise RecursionError(real_path=dir_real, first_path=memo[dir_real], second_path=dir_rel)
with os.scandir(dir_full) as scan_iter:
node_ent: os.DirEntry
for node_ent in scan_iter:
node_rel = os.path.join(dir_rel, node_ent.name)
# Inspect child node.
try:
node_lstat = node_ent.stat(follow_symlinks=False)
except OSError as e:
if on_error is not None:
on_error(e)
continue
if node_ent.is_symlink():
# Child node is a link, inspect the target node.
try:
node_stat = node_ent.stat()
except OSError as e:
if on_error is not None:
on_error(e)
continue
else:
node_stat = node_lstat
if node_ent.is_dir(follow_symlinks=follow_links):
# Child node is a directory, recurse into it and yield its descendant
# files.
yield TreeEntry(node_ent.name, node_rel, node_lstat, node_stat)
yield from _iter_tree_entries_next(root_full, node_rel, memo, on_error, follow_links)
elif node_ent.is_file() or node_ent.is_symlink():
# Child node is either a file or an unfollowed link, yield it.
yield TreeEntry(node_ent.name, node_rel, node_lstat, node_stat)
# NOTE: Make sure to remove the canonical (real) path of the directory from
# the ancestors memo once we are done with it. This allows the same directory
# to appear multiple times. If this is not done, the second occurrence of the
# directory will be incorrectly interpreted as a recursion. See
# <https://github.com/cpburnz/python-path-specification/pull/7>.
del memo[dir_real]
def iter_tree_files(
root: StrPath,
on_error: Optional[Callable[[OSError], None]] = None,
follow_links: Optional[bool] = None,
) -> Iterator[str]:
"""
Walks the specified directory for all files.
*root* (:class:`str` or :class:`os.PathLike`) is the root directory to search
for files.
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is
the error handler for file-system exceptions. It will be called with the
exception (:exc:`OSError`). Reraise the exception to abort the walk. Default
is :data:`None` to ignore file-system exceptions.
*follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk
symbolic links that resolve to directories. Default is :data:`None` for
:data:`True`.
Raises :exc:`.RecursionError` if recursion is detected.
Returns an :class:`~collections.abc.Iterator` yielding the path to each file
(:class:`str`) relative to *root*.
"""
if on_error is not None and not callable(on_error):
raise TypeError(f"on_error:{on_error!r} is not callable.")
if follow_links is None:
follow_links = True
yield from _iter_tree_files_next(os.path.abspath(root), '', {}, on_error, follow_links)
def _iter_tree_files_next(
root_full: str,
dir_rel: str,
memo: dict[str, str],
on_error: Callable[[OSError], None],
follow_links: bool,
) -> Iterator[str]:
"""
Scan the directory for all descendant files.
*root_full* (:class:`str`) the absolute path to the root directory.
*dir_rel* (:class:`str`) the path to the directory to scan relative to
*root_full*.
*memo* (:class:`dict`) keeps track of ancestor directories encountered. Maps
each ancestor real path (:class:`str`) to relative path (:class:`str`).
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is
the error handler for file-system exceptions.
*follow_links* (:class:`bool`) is whether to walk symbolic links that resolve
to directories.
Yields each file path (:class:`str`).
"""
dir_full = os.path.join(root_full, dir_rel)
dir_real = os.path.realpath(dir_full)
# Remember each encountered ancestor directory and its canonical (real) path.
# If a canonical path is encountered more than once, recursion has occurred.
if dir_real not in memo:
memo[dir_real] = dir_rel
else:
raise RecursionError(real_path=dir_real, first_path=memo[dir_real], second_path=dir_rel)
with os.scandir(dir_full) as scan_iter:
node_ent: os.DirEntry
for node_ent in scan_iter:
node_rel = os.path.join(dir_rel, node_ent.name)
if node_ent.is_dir(follow_symlinks=follow_links):
# Child node is a directory, recurse into it and yield its descendant
# files.
yield from _iter_tree_files_next(root_full, node_rel, memo, on_error, follow_links)
elif node_ent.is_file():
# Child node is a file, yield it.
yield node_rel
elif not follow_links and node_ent.is_symlink():
# Child node is an unfollowed link, yield it.
yield node_rel
# NOTE: Make sure to remove the canonical (real) path of the directory from
# the ancestors memo once we are done with it. This allows the same directory
# to appear multiple times. If this is not done, the second occurrence of the
# directory will be incorrectly interpreted as a recursion. See
# <https://github.com/cpburnz/python-path-specification/pull/7>.
del memo[dir_real]
def lookup_pattern(name: str) -> Callable[[AnyStr], Pattern]:
"""
Lookups a registered pattern factory by name.
*name* (:class:`str`) is the name of the pattern factory.
Returns the registered pattern factory (:class:`~collections.abc.Callable`).
If no pattern factory is registered, raises :exc:`KeyError`.
"""
return _registered_patterns[name]
def match_file(patterns: Iterable[Pattern], file: str) -> bool:
"""
Matches the file to the patterns.
*patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains
the patterns to use.
*file* (:class:`str`) is the normalized file path to be matched against
*patterns*.
Returns :data:`True` if *file* matched; otherwise, :data:`False`.
"""
matched = False
for pattern in patterns:
if pattern.include is not None and pattern.match_file(file) is not None:
matched = pattern.include
return matched
@deprecated((
"pathspec.util.match_files() is deprecated. Use match_file() with a loop for "
"better results."
))
def match_files(
patterns: Iterable[Pattern],
files: Iterable[str],
) -> set[str]:
"""
.. version-deprecated:: 0.10.0
This function is no longer used. Use the :func:`.match_file` function with a
loop for better results.
Matches the files to the patterns.
*patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains
the patterns to use.
*files* (:class:`~collections.abc.Iterable` of :class:`str`) contains the
normalized file paths to be matched against *patterns*.
Returns the matched files (:class:`set` of :class:`str`).
"""
use_patterns = [__pat for __pat in patterns if __pat.include is not None]
return_files = set()
for file in files:
if match_file(use_patterns, file):
return_files.add(file)
return return_files
def normalize_file(
file: StrPath,
separators: Optional[Collection[str]] = None,
) -> str:
"""
Normalizes the file path to use the POSIX path separator (i.e., ``"/"``), and
make the paths relative (remove leading ``"/"``).
*file* (:class:`str` or :class:`os.PathLike`) is the file path.
*separators* (:class:`~collections.abc.Collection` of :class:`str`; or
:data:`None`) optionally contains the path separators to normalize. This does
not need to include the POSIX path separator (``"/"``), but including it will
not affect the results. Default is ``None`` for :data:`.NORMALIZE_PATH_SEPS`.
To prevent normalization, pass an empty container (e.g., an empty tuple
``()``).
Returns the normalized file path (:class:`str`).
"""
# Normalize path separators.
if separators is None:
separators = NORMALIZE_PATH_SEPS
# Convert path object to string.
norm_file: str = os.fspath(file)
for sep in separators:
norm_file = norm_file.replace(sep, posixpath.sep)
if norm_file.startswith('/'):
# Make path relative.
norm_file = norm_file[1:]
elif norm_file.startswith('./'):
# Remove current directory prefix.
norm_file = norm_file[2:]
return norm_file
@deprecated((
"pathspec.util.normalize_files() is deprecated. Use normalize_file() with a "
"loop for better results."
))
def normalize_files(
files: Iterable[StrPath],
separators: Optional[Collection[str]] = None,
) -> dict[str, list[StrPath]]:
"""
.. version-deprecated:: 0.10.0
This function is no longer used. Use the :func:`.normalize_file` function
with a loop for better results.
Normalizes the file paths to use the POSIX path separator.
*files* (:class:`~collections.abc.Iterable` of :class:`str` or
:class:`os.PathLike`) contains the file paths to be normalized.
*separators* (:class:`~collections.abc.Collection` of :class:`str`; or
:data:`None`) optionally contains the path separators to normalize. See
:func:`.normalize_file` for more information.
Returns a :class:`dict` mapping each normalized file path (:class:`str`) to
the original file paths (:class:`list` of :class:`str` or
:class:`os.PathLike`).
"""
norm_files = {}
for path in files:
norm_file = normalize_file(path, separators=separators)
if norm_file in norm_files:
norm_files[norm_file].append(path)
else:
norm_files[norm_file] = [path]
return norm_files
def register_pattern(
name: str,
pattern_factory: Callable[[AnyStr], Pattern],
override: Optional[bool] = None,
) -> None:
"""
Registers the specified pattern factory.
*name* (:class:`str`) is the name to register the pattern factory under.
*pattern_factory* (:class:`~collections.abc.Callable`) is used to compile
patterns. It must accept an uncompiled pattern (:class:`str`) and return the
compiled pattern (:class:`.Pattern`).
*override* (:class:`bool` or :data:`None`) optionally is whether to allow
overriding an already registered pattern under the same name (:data:`True`),
instead of raising an :exc:`.AlreadyRegisteredError` (:data:`False`). Default
is :data:`None` for :data:`False`.
"""
if not isinstance(name, str):
raise TypeError(f"name:{name!r} is not a string.")
if not callable(pattern_factory):
raise TypeError(f"pattern_factory:{pattern_factory!r} is not callable.")
if name in _registered_patterns and not override:
raise AlreadyRegisteredError(name, _registered_patterns[name])
_registered_patterns[name] = pattern_factory
class AlreadyRegisteredError(Exception):
"""
The :exc:`AlreadyRegisteredError` exception is raised when a pattern factory
is registered under a name already in use.
"""
def __init__(
self,
name: str,
pattern_factory: Callable[[AnyStr], Pattern],
) -> None:
"""
Initializes the :exc:`AlreadyRegisteredError` instance.
*name* (:class:`str`) is the name of the registered pattern.
*pattern_factory* (:class:`~collections.abc.Callable`) is the registered
pattern factory.
"""
super().__init__(name, pattern_factory)
@property
def message(self) -> str:
"""
*message* (:class:`str`) is the error message.
"""
return (
f"{self.name!r} is already registered for pattern factory="
f"{self.pattern_factory!r}."
)
@property
def name(self) -> str:
"""
*name* (:class:`str`) is the name of the registered pattern.
"""
return self.args[0]
@property
def pattern_factory(self) -> Callable[[AnyStr], Pattern]:
"""
*pattern_factory* (:class:`~collections.abc.Callable`) is the registered
pattern factory.
"""
return self.args[1]
class RecursionError(Exception):
"""
The :exc:`RecursionError` exception is raised when recursion is detected.
"""
def __init__(
self,
real_path: str,
first_path: str,
second_path: str,
) -> None:
"""
Initializes the :exc:`RecursionError` instance.
*real_path* (:class:`str`) is the real path that recursion was encountered
on.
*first_path* (:class:`str`) is the first path encountered for *real_path*.
*second_path* (:class:`str`) is the second path encountered for *real_path*.
"""
super().__init__(real_path, first_path, second_path)
@property
def first_path(self) -> str:
"""
*first_path* (:class:`str`) is the first path encountered for
:attr:`self.real_path <RecursionError.real_path>`.
"""
return self.args[1]
@property
def message(self) -> str:
"""
*message* (:class:`str`) is the error message.
"""
return (
f"Real path {self.real_path!r} was encountered at {self.first_path!r} "
f"and then {self.second_path!r}."
)
@property
def real_path(self) -> str:
"""
*real_path* (:class:`str`) is the real path that recursion was
encountered on.
"""
return self.args[0]
@property
def second_path(self) -> str:
"""
*second_path* (:class:`str`) is the second path encountered for
:attr:`self.real_path <RecursionError.real_path>`.
"""
return self.args[2]
@dataclass(frozen=True)
class CheckResult(Generic[TStrPath]):
"""
The :class:`CheckResult` class contains information about the file and which
pattern matched it.
"""
# Make the class dict-less.
__slots__ = (
'file',
'include',
'index',
)
file: TStrPath
"""
*file* (:class:`str` or :class:`os.PathLike`) is the file path.
"""
include: Optional[bool]
"""
*include* (:class:`bool` or :data:`None`) is whether to include or exclude the
file. If :data:`None`, no pattern matched.
"""
index: Optional[int]
"""
*index* (:class:`int` or :data:`None`) is the index of the last pattern that
matched. If :data:`None`, no pattern matched.
"""
class MatchDetail(object):
"""
The :class:`.MatchDetail` class contains information about
"""
# Make the class dict-less.
__slots__ = ('patterns',)
def __init__(self, patterns: Sequence[Pattern]) -> None:
"""
Initialize the :class:`.MatchDetail` instance.
*patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`)
contains the patterns that matched the file in the order they were encountered.
"""
self.patterns = patterns
"""
*patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`)
contains the patterns that matched the file in the order they were
encountered.
"""
class TreeEntry(object):
"""
The :class:`TreeEntry` class contains information about a file-system entry.
"""
# Make the class dict-less.
__slots__ = ('_lstat', 'name', 'path', '_stat')
def __init__(
self,
name: str,
path: str,
lstat: os.stat_result,
stat: os.stat_result,
) -> None:
"""
Initialize the :class:`TreeEntry` instance.
*name* (:class:`str`) is the base name of the entry.
*path* (:class:`str`) is the relative path of the entry.
*lstat* (:class:`os.stat_result`) is the stat result of the direct entry.
*stat* (:class:`os.stat_result`) is the stat result of the entry,
potentially linked.
"""
self._lstat: os.stat_result = lstat
"""
*_lstat* (:class:`os.stat_result`) is the stat result of the direct entry.
"""
self.name: str = name
"""
*name* (:class:`str`) is the base name of the entry.
"""
self.path: str = path
"""
*path* (:class:`str`) is the path of the entry.
"""
self._stat: os.stat_result = stat
"""
*_stat* (:class:`os.stat_result`) is the stat result of the linked entry.
"""
def is_dir(self, follow_links: Optional[bool] = None) -> bool:
"""
Get whether the entry is a directory.
*follow_links* (:class:`bool` or :data:`None`) is whether to follow symbolic
links. If this is :data:`True`, a symlink to a directory will result in
:data:`True`. Default is :data:`None` for :data:`True`.
Returns whether the entry is a directory (:class:`bool`).
"""
if follow_links is None:
follow_links = True
node_stat = self._stat if follow_links else self._lstat
return stat.S_ISDIR(node_stat.st_mode)
def is_file(self, follow_links: Optional[bool] = None) -> bool:
"""
Get whether the entry is a regular file.
*follow_links* (:class:`bool` or :data:`None`) is whether to follow symbolic
links. If this is :data:`True`, a symlink to a regular file will result in
:data:`True`. Default is :data:`None` for :data:`True`.
Returns whether the entry is a regular file (:class:`bool`).
"""
if follow_links is None:
follow_links = True
node_stat = self._stat if follow_links else self._lstat
return stat.S_ISREG(node_stat.st_mode)
def is_symlink(self) -> bool:
"""
Returns whether the entry is a symbolic link (:class:`bool`).
"""
return stat.S_ISLNK(self._lstat.st_mode)
def stat(self, follow_links: Optional[bool] = None) -> os.stat_result:
"""
Get the cached stat result for the entry.
*follow_links* (:class:`bool` or :data:`None`) is whether to follow symbolic
links. If this is :data:`True`, the stat result of the linked file will be
returned. Default is :data:`None` for :data:`True`.
Returns that stat result (:class:`os.stat_result`).
"""
if follow_links is None:
follow_links = True
return self._stat if follow_links else self._lstat