# -*- coding: utf-8 -*- """筛选个人主页模板:删除非静态项,并按网页 title 重命名顶层文件夹。""" from __future__ import annotations import re import shutil import sys import uuid from pathlib import Path BASE = Path( r"d:\SmyProjects\Frontend-Backend\InfoGenie\infogenie-frontend\public\toolbox\个人主页模板" ) # 无 HTML 无法作为静态页;PHP 文件较多视为需服务端,非纯静态 PHP_THRESHOLD = 5 def count_html(root: Path) -> int: n = 0 for p in root.rglob("*"): if p.is_file() and p.suffix.lower() in (".html", ".htm"): n += 1 return n def count_php(root: Path) -> int: return sum(1 for p in root.rglob("*.php") if p.is_file()) def extract_title_from_text(text: str) -> str | None: for pattern in ( r"]*>([^<]*)", r"]*>([^<]*)", ): m = re.search(pattern, text, re.I | re.DOTALL) if m: raw = m.group(1) raw = re.sub(r"<[^>]+>", "", raw) title = re.sub(r"\s+", " ", raw).strip() title = re.sub(r'[<>:"/\\|?*]', "", title) title = title.strip(" -_|") if title and len(title) < 100: return title return None def read_html(path: Path) -> str: for enc in ("utf-8", "utf-8-sig", "gbk", "gb2312"): try: return path.read_text(encoding=enc) except (UnicodeDecodeError, OSError): continue return path.read_text(encoding="utf-8", errors="ignore") def find_entry_html(folder: Path) -> Path | None: for name in ("index.html", "index.htm"): for p in folder.rglob(name): if p.is_file(): return p for p in folder.rglob("*"): if p.is_file() and p.suffix.lower() in (".html", ".htm"): return p return None def sanitize_folder_name(name: str) -> str: for c in '<>:"/\\|?*': name = name.replace(c, "") name = name.strip(" .") if len(name) > 120: name = name[:120].rstrip() return name or "未命名模板" def ps_single_quote(s: str) -> str: return "'" + s.replace("'", "''") + "'" def write_rename_ps1(renames: list[tuple[str, str]], out_path: Path) -> None: """两阶段重命名,避免 A→B 与 B→A 等占用冲突。""" pairs = [(o, n) for o, n in renames if o != n] tag = uuid.uuid4().hex[:8] lines = [ "# 由 tidy_profile_templates.py 生成:关闭占用该目录的 IDE/资源管理器窗口后执行。", "$ErrorActionPreference = 'Stop'", f"$base = {ps_single_quote(str(BASE))}", "", "# 阶段 1:改为临时名", ] mids: list[tuple[str, str, str]] = [] for i, (old, new) in enumerate(pairs): mid = f"__tmp_rename_{tag}_{i}__" mids.append((old, mid, new)) lines.append( f"Rename-Item -LiteralPath (Join-Path $base {ps_single_quote(old)}) " f"-NewName {ps_single_quote(mid)}" ) lines.extend(["", "# 阶段 2:改为最终名"]) for _old, mid, new in mids: lines.append( f"Rename-Item -LiteralPath (Join-Path $base {ps_single_quote(mid)}) " f"-NewName {ps_single_quote(new)}" ) out_path.write_text("\n".join(lines) + "\n", encoding="utf-8-sig") def compute_renames(remaining: list[Path]) -> list[tuple[str, str]]: used: set[str] = {c.name for c in remaining} renames: list[tuple[str, str]] = [] for folder in remaining: entry = find_entry_html(folder) if not entry: continue try: text = read_html(entry) except OSError: continue title = extract_title_from_text(text) if not title: continue new_name = sanitize_folder_name(title) if new_name == folder.name: continue final = new_name if final in used and final != folder.name: i = 2 while f"{new_name}-{i}" in used: i += 1 final = f"{new_name}-{i}" used.discard(folder.name) used.add(final) renames.append((folder.name, final)) return renames def main() -> None: dry = "--apply" not in sys.argv write_script = "--write-rename-script" in sys.argv rename_only = "--rename-only" in sys.argv script_path = Path(__file__).resolve().parent / "profile_template_renames.ps1" if not BASE.is_dir(): print(f"Missing base: {BASE}") sys.exit(1) if rename_only: remaining = sorted([c for c in BASE.iterdir() if c.is_dir()], key=lambda x: x.name) renames = compute_renames(remaining) write_rename_ps1(renames, script_path) print(f"已写入重命名脚本: {script_path}") print("请在关闭占用该文件夹的程序后,在 PowerShell 中执行:") print(f" powershell -ExecutionPolicy Bypass -File \"{script_path}\"") return children = [c for c in BASE.iterdir() if c.is_dir()] to_delete: list[tuple[Path, str, int, int]] = [] for child in children: h, p = count_html(child), count_php(child) if h == 0: to_delete.append((child, "no_html", h, p)) elif p >= PHP_THRESHOLD: to_delete.append((child, "php_heavy", h, p)) print("=== 将删除(非静态或无可展示 HTML)===") for path, reason, h, p in sorted(to_delete, key=lambda x: x[0].name): print(f" [{reason}] html={h} php={p} {path.name}") if dry: print("\n[DRY RUN] 加参数 --apply 执行删除与重命名\n") delete_set = {p for p, _, _, _ in to_delete} if not dry: for path, _, _, _ in to_delete: shutil.rmtree(path, ignore_errors=False) print(f"已删除: {path.name}") # 重命名:dry-run 时排除即将删除的目录 remaining = sorted( [c for c in BASE.iterdir() if c.is_dir() and (dry and c not in delete_set or not dry)], key=lambda x: x.name, ) renames = compute_renames(remaining) print("=== 计划重命名(按页面 title)===") for old, new in renames: if old != new: print(f" {old}\n -> {new}") if dry: if write_script: write_rename_ps1(renames, script_path) print(f"\n已写入重命名脚本: {script_path}") return write_rename_ps1(renames, script_path) print(f"\n已写入重命名脚本(若本机重命名失败可手动执行): {script_path}") pairs = [(o, n) for o, n in renames if o != n] tag = uuid.uuid4().hex[:8] mids: list[tuple[str, str, str]] = [] for i, (old_name, new_name) in enumerate(pairs): mid = f"__tmp_rename_{tag}_{i}__" mids.append((old_name, mid, new_name)) for old_name, mid, new_name in mids: src = BASE / old_name dst = BASE / mid if not src.is_dir(): continue try: src.rename(dst) except OSError as e: print(f"阶段1 重命名失败(可稍后运行脚本): {old_name} -> {mid} {e}") for old_name, mid, new_name in mids: src = BASE / mid dst = BASE / new_name if not src.is_dir(): continue if dst.exists(): print(f"跳过(目标已存在): {mid} -> {new_name}") continue try: src.rename(dst) print(f"重命名: {old_name} -> {new_name}") except OSError as e: print(f"阶段2 重命名失败(可稍后运行脚本): {mid} -> {new_name} {e}") if __name__ == "__main__": main()