# tools/scan_imports.py import argparse import ast import sys from pathlib import Path from collections import defaultdict # Маппинг модуль→PyPI-пакет MODULE_TO_PYPI = { # Научный стек "numpy": "numpy", "cv2": "opencv-python", "skimage": "scikit-image", "matplotlib": "matplotlib", "PIL": "pillow", "pillow": "pillow", "pydicom": "pydicom", # GUI "PyQt5": "PyQt5", # Утилиты "yaml": "PyYAML", "bs4": "beautifulsoup4", "lxml": "lxml", "dateutil": "python-dateutil", "dotenv": "python-dotenv", "yattag": "yattag", # Сеть "requests": "requests", "aiohttp": "aiohttp", } # Локальные пакеты по префиксам (дополняй при необходимости) PROJECT_LOCAL_PREFIXES = {"knee"} try: STDLIB = set(sys.stdlib_module_names) # Python 3.10+ except Exception: STDLIB = set() STDLIB.update({ "typing", "pathlib", "json", "re", "subprocess", "shutil", "itertools", "functools", "collections", "dataclasses", "asyncio", "concurrent", "logging", "argparse", "base64", "hashlib", "hmac", "uuid", "tempfile", "time", "datetime", "math", "statistics", "http", "urllib", "xml", "csv", "sqlite3", "queue", "threading", "multiprocessing", "enum", "inspect", "traceback", "glob", "zipfile", "tarfile", "importlib", "pkgutil", "venv", }) IGNORED = {"__future__"} def top_level(name: str) -> str: return name.split(".")[0] def find_py_files(src_dir: Path): for p in src_dir.rglob("*.py"): # пропускаем .venv if ".venv" in p.parts: continue yield p def collect_imports(py_file: Path): try: tree = ast.parse(py_file.read_text(encoding="utf-8")) except Exception: return [] mods = [] for node in ast.walk(tree): if isinstance(node, ast.Import): for alias in node.names: mods.append(("abs", top_level(alias.name))) elif isinstance(node, ast.ImportFrom): # относительные импорты считаем локальными и игнорируем if getattr(node, "level", 0) and node.level > 0: continue if node.module: mods.append(("abs", top_level(node.module))) return mods def discover_local_top_levels(src_dir: Path): """Имена, которые существуют в src как top-level пакет/модуль.""" local = set() # Папки-пакеты for pkg_init in src_dir.rglob("__init__.py"): try: rel = pkg_init.parent.relative_to(src_dir) except ValueError: continue if rel.parts: local.add(rel.parts[0]) # Одиночные модули for mod in src_dir.glob("*.py"): local.add(mod.stem) return local def map_to_pypi(mods, local_names): result = defaultdict(int) for kind, m in mods: if m in IGNORED or m in STDLIB: continue if m in PROJECT_LOCAL_PREFIXES: continue if m in local_names: continue pkg = MODULE_TO_PYPI.get(m, m) result[pkg] += 1 return result def merge_with_existing(out_path: Path, counts: dict): existing = [] if out_path.exists(): existing = [ line.strip() for line in out_path.read_text(encoding="utf-8").splitlines() if line.strip() and not line.strip().startswith("#") ] existing_pkgs = {line.split("==")[0].split(">=")[0] for line in existing} lines = list(existing) for pkg in sorted(counts.keys()): if pkg not in existing_pkgs: lines.append(pkg) return "\n".join(lines) + "\n" def main(): ap = argparse.ArgumentParser(description="Scan imports and update requirements.in") ap.add_argument("--src", default="src", help="Source directory to scan") ap.add_argument("--out", default="requirements.in", help="Output requirements.in path") ap.add_argument("--update", action="store_true", help="Update existing file instead of overwrite") args = ap.parse_args() src_dir = Path(args.src).resolve() out_path = Path(args.out).resolve() all_mods = [] for py in find_py_files(src_dir): all_mods.extend(collect_imports(py)) local_names = discover_local_top_levels(src_dir) counts = map_to_pypi(all_mods, local_names) if args.update and out_path.exists(): content = merge_with_existing(out_path, counts) else: content = "\n".join(sorted(counts.keys())) + "\n" out_path.write_text(content, encoding="utf-8") print(f"[✓] requirements.in updated at {out_path}") if __name__ == "__main__": main()