update: 2026-03-28 20:59
This commit is contained in:
234
InfoGenie-frontend/scripts/tidy_profile_templates.py
Normal file
234
InfoGenie-frontend/scripts/tidy_profile_templates.py
Normal file
@@ -0,0 +1,234 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""筛选个人主页模板:删除非静态项,并按网页 title 重命名顶层文件夹。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
BASE = Path(
|
||||
r"d:\SmyProjects\Frontend-Backend\InfoGenie\infogenie-frontend\public\toolbox\个人主页模板"
|
||||
)
|
||||
|
||||
# 无 HTML 无法作为静态页;PHP 文件较多视为需服务端,非纯静态
|
||||
PHP_THRESHOLD = 5
|
||||
|
||||
|
||||
def count_html(root: Path) -> int:
|
||||
n = 0
|
||||
for p in root.rglob("*"):
|
||||
if p.is_file() and p.suffix.lower() in (".html", ".htm"):
|
||||
n += 1
|
||||
return n
|
||||
|
||||
|
||||
def count_php(root: Path) -> int:
|
||||
return sum(1 for p in root.rglob("*.php") if p.is_file())
|
||||
|
||||
|
||||
def extract_title_from_text(text: str) -> str | None:
|
||||
for pattern in (
|
||||
r"<title[^>]*>([^<]*)</title>",
|
||||
r"<h1[^>]*>([^<]*)</h1>",
|
||||
):
|
||||
m = re.search(pattern, text, re.I | re.DOTALL)
|
||||
if m:
|
||||
raw = m.group(1)
|
||||
raw = re.sub(r"<[^>]+>", "", raw)
|
||||
title = re.sub(r"\s+", " ", raw).strip()
|
||||
title = re.sub(r'[<>:"/\\|?*]', "", title)
|
||||
title = title.strip(" -_|")
|
||||
if title and len(title) < 100:
|
||||
return title
|
||||
return None
|
||||
|
||||
|
||||
def read_html(path: Path) -> str:
|
||||
for enc in ("utf-8", "utf-8-sig", "gbk", "gb2312"):
|
||||
try:
|
||||
return path.read_text(encoding=enc)
|
||||
except (UnicodeDecodeError, OSError):
|
||||
continue
|
||||
return path.read_text(encoding="utf-8", errors="ignore")
|
||||
|
||||
|
||||
def find_entry_html(folder: Path) -> Path | None:
|
||||
for name in ("index.html", "index.htm"):
|
||||
for p in folder.rglob(name):
|
||||
if p.is_file():
|
||||
return p
|
||||
for p in folder.rglob("*"):
|
||||
if p.is_file() and p.suffix.lower() in (".html", ".htm"):
|
||||
return p
|
||||
return None
|
||||
|
||||
|
||||
def sanitize_folder_name(name: str) -> str:
|
||||
for c in '<>:"/\\|?*':
|
||||
name = name.replace(c, "")
|
||||
name = name.strip(" .")
|
||||
if len(name) > 120:
|
||||
name = name[:120].rstrip()
|
||||
return name or "未命名模板"
|
||||
|
||||
|
||||
def ps_single_quote(s: str) -> str:
|
||||
return "'" + s.replace("'", "''") + "'"
|
||||
|
||||
|
||||
def write_rename_ps1(renames: list[tuple[str, str]], out_path: Path) -> None:
|
||||
"""两阶段重命名,避免 A→B 与 B→A 等占用冲突。"""
|
||||
pairs = [(o, n) for o, n in renames if o != n]
|
||||
tag = uuid.uuid4().hex[:8]
|
||||
lines = [
|
||||
"# 由 tidy_profile_templates.py 生成:关闭占用该目录的 IDE/资源管理器窗口后执行。",
|
||||
"$ErrorActionPreference = 'Stop'",
|
||||
f"$base = {ps_single_quote(str(BASE))}",
|
||||
"",
|
||||
"# 阶段 1:改为临时名",
|
||||
]
|
||||
mids: list[tuple[str, str, str]] = []
|
||||
for i, (old, new) in enumerate(pairs):
|
||||
mid = f"__tmp_rename_{tag}_{i}__"
|
||||
mids.append((old, mid, new))
|
||||
lines.append(
|
||||
f"Rename-Item -LiteralPath (Join-Path $base {ps_single_quote(old)}) "
|
||||
f"-NewName {ps_single_quote(mid)}"
|
||||
)
|
||||
lines.extend(["", "# 阶段 2:改为最终名"])
|
||||
for _old, mid, new in mids:
|
||||
lines.append(
|
||||
f"Rename-Item -LiteralPath (Join-Path $base {ps_single_quote(mid)}) "
|
||||
f"-NewName {ps_single_quote(new)}"
|
||||
)
|
||||
out_path.write_text("\n".join(lines) + "\n", encoding="utf-8-sig")
|
||||
|
||||
|
||||
def compute_renames(remaining: list[Path]) -> list[tuple[str, str]]:
|
||||
used: set[str] = {c.name for c in remaining}
|
||||
renames: list[tuple[str, str]] = []
|
||||
for folder in remaining:
|
||||
entry = find_entry_html(folder)
|
||||
if not entry:
|
||||
continue
|
||||
try:
|
||||
text = read_html(entry)
|
||||
except OSError:
|
||||
continue
|
||||
title = extract_title_from_text(text)
|
||||
if not title:
|
||||
continue
|
||||
new_name = sanitize_folder_name(title)
|
||||
if new_name == folder.name:
|
||||
continue
|
||||
final = new_name
|
||||
if final in used and final != folder.name:
|
||||
i = 2
|
||||
while f"{new_name}-{i}" in used:
|
||||
i += 1
|
||||
final = f"{new_name}-{i}"
|
||||
used.discard(folder.name)
|
||||
used.add(final)
|
||||
renames.append((folder.name, final))
|
||||
return renames
|
||||
|
||||
|
||||
def main() -> None:
|
||||
dry = "--apply" not in sys.argv
|
||||
write_script = "--write-rename-script" in sys.argv
|
||||
rename_only = "--rename-only" in sys.argv
|
||||
script_path = Path(__file__).resolve().parent / "profile_template_renames.ps1"
|
||||
|
||||
if not BASE.is_dir():
|
||||
print(f"Missing base: {BASE}")
|
||||
sys.exit(1)
|
||||
|
||||
if rename_only:
|
||||
remaining = sorted([c for c in BASE.iterdir() if c.is_dir()], key=lambda x: x.name)
|
||||
renames = compute_renames(remaining)
|
||||
write_rename_ps1(renames, script_path)
|
||||
print(f"已写入重命名脚本: {script_path}")
|
||||
print("请在关闭占用该文件夹的程序后,在 PowerShell 中执行:")
|
||||
print(f" powershell -ExecutionPolicy Bypass -File \"{script_path}\"")
|
||||
return
|
||||
|
||||
children = [c for c in BASE.iterdir() if c.is_dir()]
|
||||
to_delete: list[tuple[Path, str, int, int]] = []
|
||||
for child in children:
|
||||
h, p = count_html(child), count_php(child)
|
||||
if h == 0:
|
||||
to_delete.append((child, "no_html", h, p))
|
||||
elif p >= PHP_THRESHOLD:
|
||||
to_delete.append((child, "php_heavy", h, p))
|
||||
|
||||
print("=== 将删除(非静态或无可展示 HTML)===")
|
||||
for path, reason, h, p in sorted(to_delete, key=lambda x: x[0].name):
|
||||
print(f" [{reason}] html={h} php={p} {path.name}")
|
||||
|
||||
if dry:
|
||||
print("\n[DRY RUN] 加参数 --apply 执行删除与重命名\n")
|
||||
|
||||
delete_set = {p for p, _, _, _ in to_delete}
|
||||
|
||||
if not dry:
|
||||
for path, _, _, _ in to_delete:
|
||||
shutil.rmtree(path, ignore_errors=False)
|
||||
print(f"已删除: {path.name}")
|
||||
|
||||
# 重命名:dry-run 时排除即将删除的目录
|
||||
remaining = sorted(
|
||||
[c for c in BASE.iterdir() if c.is_dir() and (dry and c not in delete_set or not dry)],
|
||||
key=lambda x: x.name,
|
||||
)
|
||||
renames = compute_renames(remaining)
|
||||
|
||||
print("=== 计划重命名(按页面 title)===")
|
||||
for old, new in renames:
|
||||
if old != new:
|
||||
print(f" {old}\n -> {new}")
|
||||
|
||||
if dry:
|
||||
if write_script:
|
||||
write_rename_ps1(renames, script_path)
|
||||
print(f"\n已写入重命名脚本: {script_path}")
|
||||
return
|
||||
|
||||
write_rename_ps1(renames, script_path)
|
||||
print(f"\n已写入重命名脚本(若本机重命名失败可手动执行): {script_path}")
|
||||
|
||||
pairs = [(o, n) for o, n in renames if o != n]
|
||||
tag = uuid.uuid4().hex[:8]
|
||||
mids: list[tuple[str, str, str]] = []
|
||||
for i, (old_name, new_name) in enumerate(pairs):
|
||||
mid = f"__tmp_rename_{tag}_{i}__"
|
||||
mids.append((old_name, mid, new_name))
|
||||
|
||||
for old_name, mid, new_name in mids:
|
||||
src = BASE / old_name
|
||||
dst = BASE / mid
|
||||
if not src.is_dir():
|
||||
continue
|
||||
try:
|
||||
src.rename(dst)
|
||||
except OSError as e:
|
||||
print(f"阶段1 重命名失败(可稍后运行脚本): {old_name} -> {mid} {e}")
|
||||
|
||||
for old_name, mid, new_name in mids:
|
||||
src = BASE / mid
|
||||
dst = BASE / new_name
|
||||
if not src.is_dir():
|
||||
continue
|
||||
if dst.exists():
|
||||
print(f"跳过(目标已存在): {mid} -> {new_name}")
|
||||
continue
|
||||
try:
|
||||
src.rename(dst)
|
||||
print(f"重命名: {old_name} -> {new_name}")
|
||||
except OSError as e:
|
||||
print(f"阶段2 重命名失败(可稍后运行脚本): {mid} -> {new_name} {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user