Refactor code structure for improved readability and maintainability
This commit is contained in:
parent
389d72a136
commit
aa4c067ea8
1685 changed files with 393439 additions and 71932 deletions
258
.venv_codegen/Lib/site-packages/pytokens/cli.py
Normal file
258
.venv_codegen/Lib/site-packages/pytokens/cli.py
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
"""CLI interface for pytokens."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import enum
|
||||
import io
|
||||
import json
|
||||
import os.path
|
||||
import tokenize
|
||||
from typing import Iterable, NamedTuple
|
||||
import warnings
|
||||
|
||||
import pytokens
|
||||
|
||||
|
||||
class ValidationStatus(enum.Enum):
|
||||
"""Status of validation for a single file."""
|
||||
|
||||
SUCCESS = "SUCCESS"
|
||||
SKIP = "SKIP"
|
||||
FAILURE = "FAILURE"
|
||||
|
||||
|
||||
class CLIArgs:
|
||||
filepath: str
|
||||
validate: bool
|
||||
issue_128233_handling: bool
|
||||
json: bool
|
||||
strict: bool
|
||||
quiet: bool
|
||||
|
||||
|
||||
def cli(argv: list[str] | None = None) -> int:
|
||||
"""CLI interface."""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("filepath")
|
||||
parser.add_argument(
|
||||
"--no-128233-handling",
|
||||
dest="issue_128233_handling",
|
||||
action="store_false",
|
||||
)
|
||||
parser.add_argument("--validate", action="store_true")
|
||||
parser.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="Output validation results as JSON",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--strict",
|
||||
action="store_true",
|
||||
help="Exit with code 1 if any validation failures occur",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--quiet",
|
||||
action="store_true",
|
||||
help="Suppress visual output (dots, S, F)",
|
||||
)
|
||||
args = parser.parse_args(argv, namespace=CLIArgs())
|
||||
|
||||
# --json implies --quiet
|
||||
if args.json:
|
||||
args.quiet = True
|
||||
|
||||
if os.path.isdir(args.filepath):
|
||||
files = find_all_python_files(args.filepath)
|
||||
verbose = False
|
||||
else:
|
||||
files = [args.filepath]
|
||||
verbose = True
|
||||
|
||||
validation_results: list[dict[str, str]] = []
|
||||
failure_count = 0
|
||||
|
||||
for filepath in sorted(files):
|
||||
with open(filepath, "rb") as file:
|
||||
try:
|
||||
encoding, read_bytes = tokenize.detect_encoding(file.readline)
|
||||
except SyntaxError:
|
||||
if args.validate:
|
||||
# Broken `# coding` comment, tokenizer bails, skip file
|
||||
if not args.quiet:
|
||||
print("\033[1;33mS\033[0m", end="", flush=True)
|
||||
if args.json:
|
||||
validation_results.append(
|
||||
{
|
||||
"filepath": filepath,
|
||||
"status": ValidationStatus.SKIP.value,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
raise
|
||||
|
||||
source = b"".join(read_bytes) + file.read()
|
||||
|
||||
if args.validate:
|
||||
status = validate(
|
||||
filepath,
|
||||
source,
|
||||
encoding,
|
||||
verbose=verbose,
|
||||
issue_128233_handling=args.issue_128233_handling,
|
||||
quiet=args.quiet,
|
||||
)
|
||||
|
||||
if args.json:
|
||||
validation_results.append(
|
||||
{
|
||||
"filepath": filepath,
|
||||
"status": status.value,
|
||||
}
|
||||
)
|
||||
|
||||
if status == ValidationStatus.FAILURE:
|
||||
failure_count += 1
|
||||
|
||||
else:
|
||||
source_str = source.decode(encoding)
|
||||
for token in pytokens.tokenize(
|
||||
source_str,
|
||||
issue_128233_handling=args.issue_128233_handling,
|
||||
):
|
||||
token_source = source_str[token.start_index : token.end_index]
|
||||
print(repr(token_source), token)
|
||||
|
||||
if args.json and args.validate:
|
||||
print(json.dumps(validation_results, indent=2))
|
||||
|
||||
if args.strict and failure_count > 0:
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
class TokenTuple(NamedTuple):
|
||||
type: str
|
||||
start: tuple[int, int]
|
||||
end: tuple[int, int]
|
||||
|
||||
|
||||
def validate(
|
||||
filepath: str,
|
||||
source: bytes,
|
||||
encoding: str,
|
||||
*,
|
||||
issue_128233_handling: bool,
|
||||
verbose: bool = True,
|
||||
quiet: bool = False,
|
||||
) -> ValidationStatus:
|
||||
"""Validate the source code."""
|
||||
warnings.simplefilter("ignore")
|
||||
|
||||
# Ensure all line endings have newline as a valid index
|
||||
if len(source) == 0 or source[-1:] != b"\n":
|
||||
source = source + b"\n"
|
||||
|
||||
# Same as .splitlines(keepends=True), but doesn't split on linefeeds i.e. \x0c
|
||||
sourcelines = [line + b"\n" for line in source.split(b"\n")]
|
||||
# For that last newline token that exists on an imaginary line sometimes
|
||||
sourcelines.append(b"\n")
|
||||
|
||||
source_file = io.BytesIO(source)
|
||||
builtin_tokens = tokenize.tokenize(source_file.readline)
|
||||
# drop the encoding token
|
||||
next(builtin_tokens)
|
||||
|
||||
try:
|
||||
expected_tokens_unprocessed = [
|
||||
TokenTuple(tokenize.tok_name[token.type], token.start, token.end)
|
||||
for token in builtin_tokens
|
||||
]
|
||||
except tokenize.TokenError:
|
||||
if not quiet:
|
||||
print("\033[1;33mS\033[0m", end="", flush=True)
|
||||
return ValidationStatus.SKIP
|
||||
|
||||
expected_tokens = [expected_tokens_unprocessed[0]]
|
||||
for index, token in enumerate(expected_tokens_unprocessed[1:], start=1):
|
||||
last_token = expected_tokens[-1]
|
||||
|
||||
current_token = token
|
||||
# Merge consecutive FSTRING_MIDDLE tokens. it's weird cpython has it like that.
|
||||
if current_token.type == last_token.type == "FSTRING_MIDDLE":
|
||||
expected_tokens.pop()
|
||||
current_token = TokenTuple(
|
||||
current_token.type,
|
||||
last_token.start,
|
||||
current_token.end,
|
||||
)
|
||||
|
||||
if index + 1 < len(expected_tokens_unprocessed):
|
||||
# When an FSTRING_MIDDLE ends with a `{{{` like f'x{{{1}', Python eats
|
||||
# the last { char as well as its end index, so we get a `x{` token
|
||||
# instead of the expected `x{{` token. This fixes that case. Pretty
|
||||
# much always there should be no gap between an fstring-middle ending
|
||||
# and the { op after it.
|
||||
# Same deal for `}}}"`
|
||||
next_token = expected_tokens_unprocessed[index + 1]
|
||||
if (
|
||||
(current_token.type == "FSTRING_MIDDLE" and next_token.type == "OP")
|
||||
or (
|
||||
current_token.type == "FSTRING_MIDDLE"
|
||||
and next_token.type == "FSTRING_END"
|
||||
)
|
||||
and next_token.start[0] == current_token.end[0]
|
||||
and next_token.start[1] > current_token.end[1]
|
||||
):
|
||||
expected_tokens.append(
|
||||
TokenTuple(
|
||||
current_token.type,
|
||||
current_token.start,
|
||||
next_token.start,
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
expected_tokens.append(current_token)
|
||||
|
||||
source_string = source.decode(encoding)
|
||||
our_tokens = (
|
||||
TokenTuple(
|
||||
token.type.to_python_token(),
|
||||
(token.start_line, token.start_col),
|
||||
(token.end_line, token.end_col),
|
||||
)
|
||||
for token in pytokens.tokenize(
|
||||
source_string, issue_128233_handling=issue_128233_handling
|
||||
)
|
||||
if token.type != pytokens.TokenType.whitespace
|
||||
)
|
||||
|
||||
for builtin_token, our_token in zip(expected_tokens, our_tokens, strict=True):
|
||||
mismatch = builtin_token != our_token
|
||||
if mismatch or verbose:
|
||||
if not quiet:
|
||||
print("EXPECTED", builtin_token)
|
||||
print("---- GOT", our_token)
|
||||
|
||||
if mismatch:
|
||||
if not quiet:
|
||||
print("Filepath:", filepath)
|
||||
print("\033[1;31mF\033[0m", end="", flush=True)
|
||||
return ValidationStatus.FAILURE
|
||||
|
||||
if not quiet:
|
||||
print("\033[1;32m.\033[0m", end="", flush=True)
|
||||
return ValidationStatus.SUCCESS
|
||||
|
||||
|
||||
def find_all_python_files(directory: str) -> Iterable[str]:
|
||||
"""Recursively find all Python files in the given directory."""
|
||||
python_files = set()
|
||||
for root, _, files in os.walk(directory, followlinks=False):
|
||||
for file in files:
|
||||
if file.endswith(".py"):
|
||||
python_files.add(os.path.join(root, file))
|
||||
return python_files
|
||||
Loading…
Add table
Add a link
Reference in a new issue