#!/usr/bin/env python3 from __future__ import annotations import argparse import os from dataclasses import dataclass from pathlib import Path from typing import Dict, Iterable, Iterator, Optional, Tuple @dataclass class Stats: files: int = 0 total: int = 0 code: int = 0 comment: int = 0 blank: int = 0 def add(self, other: "Stats") -> None: self.files += other.files self.total += other.total self.code += other.code self.comment += other.comment self.blank += other.blank C_LIKE_SUFFIXES = { ".c", ".cc", ".cpp", ".cxx", ".h", ".hh", ".hpp", ".hxx", ".ipp", ".tpp", ".ino", ".proto" } HASH_COMMENT_SUFFIXES = {".py", ".sh", ".bash", ".zsh", ".toml", ".yml", ".yaml"} SLASH_COMMENT_SUFFIXES = {".js", ".ts", ".tsx", ".jsx", ".java", ".cs", ".rs", ".go"} POWERSHELL_SUFFIXES = {".ps1", ".psm1", ".psd1"} CMAKE_SUFFIXES = {".cmake"} DEFAULT_EXCLUDE_DIRS = { ".git", ".pio", ".tmp", ".vscode", ".idea", "__pycache__", "managed_components", } DEFAULT_EXCLUDE_PREFIXES = ("build",) DEFAULT_EXCLUDE_FILES = { "sdkconfig", "sdkconfig.old", } def detect_language(path: Path) -> Optional[str]: name = path.name suffix = path.suffix.lower() if name == "CMakeLists.txt" or suffix in CMAKE_SUFFIXES: return "CMake" if suffix in C_LIKE_SUFFIXES: return "C/C++" if suffix in HASH_COMMENT_SUFFIXES: return "Script" if suffix in POWERSHELL_SUFFIXES: return "PowerShell" if suffix in SLASH_COMMENT_SUFFIXES: return "OtherCode" return None def should_skip_dir(dirname: str, include_managed: bool, include_build: bool) -> bool: lower = dirname.lower() if lower in DEFAULT_EXCLUDE_DIRS: if lower == "managed_components" and include_managed: return False return True if not include_build and any(lower == prefix or lower.startswith(prefix + ".") for prefix in DEFAULT_EXCLUDE_PREFIXES): return True return False def iter_source_files(root: Path, include_managed: bool, include_build: bool) -> Iterator[Tuple[Path, str]]: for current_root, dirnames, filenames in os.walk(root): dirnames[:] = [ dirname for dirname in dirnames if not should_skip_dir(dirname, include_managed=include_managed, include_build=include_build) ] current_path = Path(current_root) for filename in filenames: if filename in DEFAULT_EXCLUDE_FILES: continue path = current_path / filename language = detect_language(path) if language: yield path, language def count_hash_comment_lines(lines: Iterable[str], comment_prefix: str = "#") -> Stats: stats = Stats(files=1) for raw_line in lines: stats.total += 1 stripped = raw_line.strip() if not stripped: stats.blank += 1 elif stripped.startswith(comment_prefix): stats.comment += 1 else: stats.code += 1 return stats def count_c_like_lines(lines: Iterable[str]) -> Stats: stats = Stats(files=1) in_block_comment = False for raw_line in lines: stats.total += 1 line = raw_line.rstrip("\n\r") if not line.strip() and not in_block_comment: stats.blank += 1 continue i = 0 has_code = False has_comment = False length = len(line) while i < length: if in_block_comment: has_comment = True end = line.find("*/", i) if end == -1: i = length break in_block_comment = False i = end + 2 continue if line.startswith("//", i): has_comment = True break if line.startswith("/*", i): has_comment = True in_block_comment = True i += 2 continue ch = line[i] if ch in ('"', "'"): has_code = True quote = ch i += 1 while i < length: if line[i] == "\\": i += 2 elif line[i] == quote: i += 1 break else: i += 1 continue if not ch.isspace(): has_code = True i += 1 if has_code: stats.code += 1 elif has_comment or in_block_comment: stats.comment += 1 else: stats.blank += 1 return stats def count_file(path: Path, language: str) -> Stats: with path.open("r", encoding="utf-8", errors="ignore") as handle: lines = handle.readlines() if language == "C/C++" or language == "OtherCode": return count_c_like_lines(lines) if language == "PowerShell": return count_hash_comment_lines(lines, comment_prefix="#") return count_hash_comment_lines(lines, comment_prefix="#") def top_level_group(root: Path, path: Path) -> str: rel = path.relative_to(root) if len(rel.parts) <= 1: return "." return rel.parts[0] def format_table(rows: list[tuple[str, Stats]], title: str) -> str: if not rows: return f"{title}\n " header = ("Name", "Files", "Total", "Code", "Comment", "Blank") widths = [ max(len(str(item[idx])) for item in ([header] + [(name, s.files, s.total, s.code, s.comment, s.blank) for name, s in rows])) for idx in range(len(header)) ] def fmt_row(values: tuple[object, ...]) -> str: return " " + " ".join( str(value).ljust(widths[idx]) if idx == 0 else str(value).rjust(widths[idx]) for idx, value in enumerate(values) ) lines = [title, fmt_row(header), fmt_row(tuple("-" * width for width in widths))] for name, stat in rows: lines.append(fmt_row((name, stat.files, stat.total, stat.code, stat.comment, stat.blank))) return "\n".join(lines) def main() -> int: parser = argparse.ArgumentParser(description="统计工程中的代码行数") parser.add_argument("root", nargs="?", default=".", help="工程根目录,默认当前目录") parser.add_argument("--include-managed", action="store_true", help="包含 managed_components") parser.add_argument("--include-build", action="store_true", help="包含 build/build.* 目录") parser.add_argument("--by-dir", action="store_true", help="额外输出按顶层目录分组的统计") args = parser.parse_args() root = Path(args.root).resolve() all_stats = Stats() by_language: Dict[str, Stats] = {} by_dir: Dict[str, Stats] = {} for path, language in iter_source_files(root, include_managed=args.include_managed, include_build=args.include_build): stat = count_file(path, language) all_stats.add(stat) by_language.setdefault(language, Stats()).add(stat) if args.by_dir: by_dir.setdefault(top_level_group(root, path), Stats()).add(stat) print(f"Root: {root}") print("Exclude dirs: " + ", ".join(sorted(DEFAULT_EXCLUDE_DIRS if not args.include_managed else (DEFAULT_EXCLUDE_DIRS - {'managed_components'})))) print("Exclude build dirs: " + ("no" if args.include_build else "yes")) print() language_rows = sorted(by_language.items(), key=lambda item: (-item[1].code, item[0])) print(format_table(language_rows, "By Language")) print() print(format_table([("TOTAL", all_stats)], "Summary")) if args.by_dir: print() dir_rows = sorted(by_dir.items(), key=lambda item: (-item[1].code, item[0])) print(format_table(dir_rows, "By Top-Level Directory")) return 0 if __name__ == "__main__": raise SystemExit(main())