il y a 1 mois · 8048dc25d6
--- a/src/api/miaoguo/literacyController.js
+++ b/src/api/miaoguo/literacyController.js
@@ -445,7 +445,7 @@ async function getAiData(word){
 
				 
			
 
				     let aiProvider="";
			
 
				     //aiProvider="doubao-seed-1-8-251228";
			
 
				-    aiProvider="doubao-1-5-lite-32k-250115";
			
 
				+    aiProvider="doubao-deepseek-v4-flash-260425";
			
 
				     
			
 
				     //生成结果
			
 
				     result = await aiController.generateArticle(content, aiProvider);
			
--- a/src/api/yjbdc/aiController.js
+++ b/src/api/yjbdc/aiController.js
@@ -497,6 +497,8 @@ class AIProviderFactory {
 
				                 return new VolcesAIProvider("deepseek-v3-1-250821");
			
 
				             case 'doubao-deepseek-v3-2-251201':
			
 
				                 return new VolcesAIProvider("deepseek-v3-2-251201");
			
 
				+            case 'doubao-deepseek-v4-flash-260425':
			
 
				+                return new VolcesAIProvider("deepseek-v4-flash-260425");
			
 
				 
			
 
				             case 'doubao-deepseek-r1-250528':
			
 
				                 return new VolcesAIProvider("deepseek-r1-250528");
			
--- a/src/api/yjbdc/yjbdcController.js
+++ b/src/api/yjbdc/yjbdcController.js
@@ -105,7 +105,7 @@ export async function GenerateArticle(ctx) {
 
				                 //'doubao-kimi-k2-250711': 100,
			
 
				                 //'doubao-deepseek-v3-2-251201': 100,
			
 
				                 //'doubao-seed-1-8-251228': 100,
			
 
				-                'doubao-1-5-lite-32k-250115':100,
			
 
				+                'doubao-deepseek-v4-flash-260425':100,
			
 
				                 //'ali-Moonshot-kimi-k2.5': 100,
			
 
				             });
			
 
				            
			
--- a/src/util/constant/index.js
+++ b/src/util/constant/index.js
@@ -27,12 +27,13 @@ export default {
 
				             {Name:"人生励志",ID:12,CSS:"",Eng:"Inspirational",English:"Inspirational",Content:"做一个有价值的人"}
			
 
				         ];        
			
 
				         result.AIVersion=[
			
 
				-            {Version:"db20",BuildSecond:45,Model:"doubao-1-5-lite-32k-250115",Content:"字节豆包2.0\n平均45秒生成",CSS:""},
			
 
				+            {Version:"zdp4f",BuildSecond:30,Model:"doubao-deepseek-v4-flash-260425",Content:"字节deepseek_v4flash\n平均30秒生成",CSS:""},
			
 
				             {Version:"1.0",BuildSecond:45,Model:"",Content:"词句丰富，结构简明\n平均30秒生成",CSS:"Selected"},
			
 
				             {Version:"1.5",BuildSecond:60,Model:"",Content:"深度表达，更多要素\n平均60秒生成",CSS:""},
			
 
				             {Version:"db15",BuildSecond:30,Model:"doubao-1-5-pro-32k-250115",Content:"字节豆包1.5\n平均30秒生成",CSS:""},
			
 
				             {Version:"db16",BuildSecond:60,Model:"doubao-seed-1-6-250615",Content:"字节豆包1.6\n平均60秒生成",CSS:""},
			
 
				             {Version:"db18",BuildSecond:30,Model:"doubao-seed-1-8-251228",Content:"字节豆包1.8\n平均30秒生成",CSS:""},
			
 
				+            {Version:"db20",BuildSecond:45,Model:"doubao-1-5-lite-32k-250115",Content:"字节豆包2.0\n平均45秒生成",CSS:""},
			
 
				             
			
 
				             {Version:"zdp3",BuildSecond:60,Model:"doubao-deepseek-v3-250324",Content:"字节deepseek_v3\n平均60秒生成",CSS:""},
			
 
				             {Version:"zdp31",BuildSecond:30,Model:"doubao-deepseek-v3-1-250821",Content:"字节deepseek_v31\n平均30秒生成",CSS:""},
			
--- a/秒过分数线数据导入/README.md
+++ b/秒过分数线数据导入/README.md
@@ -165,6 +165,21 @@ WHERE ScoreYear = '2025'
 
				 - 对“原名（现某某）”这种文本，匹配时应同时尝试原名、括号内现名、去括号名称。
			
 
				 - 图片清晰时可以 OCR/读图解决，但要把结果转成结构化行，再按学校表 ID 入库。
			
 
				 
			
 
				+2026 懿德中学问题复盘：
			
 
				+
			
 
				+- 触发点：复查浦东新区 `上海市浦东新区懿德中学` 时发现名额到校目标高中不对。
			
 
				+- 直接错误：PDF 原始行中最后两列应为 `上海市浦东复旦附中分校` 和 `上海中学东校`，旧脚本分别写成了 `复旦大学附属中学` 和 `上海市上海中学`。
			
 
				+- 根因：高中表头没有 6 位代码时，旧逻辑先做简称别名匹配，`上海市浦东复旦附中分校` 先命中 `复旦附中`，`上海中学东校` 先命中 `上海中学`，导致分校/东校被主校抢走。
			
 
				+- 同类影响：普陀 `华二普陀`、宝山 `华二宝山` / `上师附中宝山`、浦东 `上海中学东校` / `浦东复旦附中分校` / `华二临港奉贤分校`、松江 `松江二中` / `华二松江分校`、奉贤 `华二临港奉贤分校`。
			
 
				+- 修正原则：学校匹配顺序必须是“6 位代码优先，其次精确全称/简称/别名字段，最后才用简称兜底”；简称兜底还要按别名长度从长到短匹配，避免 `华二` 抢在 `华二普陀` 前面。
			
 
				+- 额外问题：青浦区名额到校 PDF 是长表跨页，高中段落在表格抽取中会丢失，不能只依赖 `pdfplumber.extract_tables()` 的表格状态续接。
			
 
				+- 第一次青浦修正仍有隐患：用 `extract_text()` 的自然文本顺序识别高中段落，会把视觉上同行的内容拆错。例如 PDF 表格中 `102056 上海交通大学附属中学 / 181021 上海市青浦区思源中学 / 1` 是同一行，但文本抽取顺序会先输出 `181021 上海市青浦区思源中学 1`，再输出 `102056 上海交通大学附属中学`，导致思源中学被错误归到上一段 `上海市上海中学`。
			
 
				+- 最终修正方式：青浦区改用 `pdfplumber.find_tables()` 的行坐标作为主依据，再用左侧高中代码文字的坐标判断“从哪一行开始切换高中”。这样可以处理页 1 同行高中代码、页 2 跨页延续、页 3/页 4 中途切换高中段落等情况。
			
 
				+- 青浦修正结果示例：`上海市上海中学` 只对应 `上海市青浦区凤溪中学`；`上海交通大学附属中学` 对应 `上海市青浦区思源中学` 和 `上海市青浦区实验中学`。
			
 
				+- 同类风险扫描：已扫描 16 个区 PDF，仅青浦存在“左侧高中段落 + 右侧三列表格 + 文本顺序错位”的版式；其他区未发现同类结构。
			
 
				+- 修正方式：备份受影响区旧数据到 `mps_score_school_quota_2026_bad_targets_backup.json`，再重建普陀、宝山、浦东、松江、青浦、奉贤 6 个区数据；后续又单独备份青浦旧数据到 `mps_score_school_quota_2026_qingpu_reparse_backup.json`，并重建青浦区数据。
			
 
				+- 后续要求：遇到“分校、校区、东校、宝山、普陀、松江、临港奉贤”等表头，必须人工抽样检查目标高中 ID；遇到长表跨页或左右分栏版式，不能相信纯文本抽取顺序，必须结合表格行坐标或人工抽样；导入后必须做重复业务 key 检查，即 `ScoreYear + ScoreType + DistrictID + SchoolOfGraduation + SchoolTarget` 不应重复。
			
 
				+
			
 
				 ## 当前脚本说明
			
 
				 
			
 
				 脚本分为三类：主流程脚本、公共解析/补录脚本、2026 一次性补充脚本。后续年度工作时，主流程和公共脚本可以复制改年份；一次性补充脚本主要用于追溯 2026 的特殊处理，不建议直接运行到新年份。
			
@@ -184,6 +199,13 @@ WHERE ScoreYear = '2025'
 
				 - 如果某区已存在数据，会跳过并报告。
			
 
				 - 对图片或解析失败区，使用 `import_mps_score_quota_manual_2026.py` 做手工/OCR 补充。
			
 
				 
			
 
				+名额到区官方总表审核：
			
 
				+
			
 
				+- `audit_mps_score_quota_2026.py`：读取官方《2026全市高中名额到区招生计划统计表》PDF，并与数据库 `2026 名额到区` 按高中汇总结果比对。
			
 
				+- 审核口径：以 PDF 中高中 6 位招生代码为准，对应 `MPS_School.SchoolNumber`，再比较官方计划数与数据库 `SUM(PlanNum)`。
			
 
				+- 当前审核结果：官方 77 所、计划数 7171；数据库 77 所、计划数 7171；逐校差异 0。
			
 
				+- 经验：只要官方发布全市/全区统计表，就应作为最终验算口径。它不能证明每一条初中分配都正确，但能快速发现高中目标映射错误、漏导、重复导入、计划数错位等系统性问题。
			
 
				+
			
 
				 名额到校：
			
 
				 
			
 
				 - `research_mps_score_school_quota_2026.py`
			
@@ -216,6 +238,15 @@ WHERE ScoreYear = '2025'
 
				 
			
 
				 - `__pycache__/` 和 `*.pyc` 是 Python 运行缓存，不属于业务数据或脚本，已在主仓库 `.gitignore` 中忽略。
			
 
				 
			
 
				+名额到校区内合计审核：
			
 
				+
			
 
				+- `audit_mps_score_school_quota_totals_2026.py`：读取各区名额到校 PDF 中明确存在的“合计”行，与数据库 `2026 名额到校` 按区/高中汇总结果比对。
			
 
				+- 当前可自动审核区：长宁区、宝山区、金山区、松江区、崇明区。
			
 
				+- 审核结果：上述 5 个区 PDF 合计与数据库逐项一致，差异 0。
			
 
				+- 注意：有些 PDF 的合计行包含水印或序号类数字，例如金山、松江合计行中不参与总计的数字，脚本中已显式忽略；没有明确合计行的区不纳入此脚本自动审核。
			
 
				+- 经验：名额到校审核要优先找 PDF 自带的“合计/总计”行。能自动审核的区，应至少核对区总计和高中列合计；没有合计行的区，也要尽量通过官方后续统计表、人工抽样、重复 key 检查来补充验证。
			
 
				+- 边界：合计审核只能证明“高中列汇总”和“区总量”正确，不能完全证明每个初中分配行都正确；因此它应与学校匹配日志、问题清单、重复 key 检查一起使用。
			
 
				+
			
 
				 ## 2026 已完成结果
			
 
				 
			
 
				 计划/自主招生：
			
@@ -236,10 +267,15 @@ WHERE ScoreYear = '2025'
 
				 
			
 
				 - `ScoreYear = 2026`
			
 
				 - `ScoreType = 名额到校`
			
 
				-- 已导入 3892 行
			
 
				-- 计划数合计 12833
			
 
				+- 已导入 3893 行
			
 
				+- 计划数合计 12887
			
 
				 - 问题清单 `mps_score_school_quota_2026_problems.json` 已清空
			
 
				 
			
 
				+
			
 
				+修正记录：
			
 
				+
			
 
				+- 2026-06-01 修正名额到校部分高中目标误匹配问题。原因是分校/校区/东校表头先命中了主校简称别名；同时修正青浦长表跨页高中段落解析。修正前旧数据已备份到 `mps_score_school_quota_2026_bad_targets_backup.json`。
			
 
				+
			
 
				 2026 名额到校最终分区汇总：
			
 
				 
			
 
				 | DistrictID | 区 | 行数 | 计划数 |
			
@@ -252,12 +288,12 @@ WHERE ScoreYear = '2025'
 
				 | 6 | 虹口区 | 80 | 488 |
			
 
				 | 7 | 杨浦区 | 144 | 707 |
			
 
				 | 8 | 闵行区 | 460 | 1290 |
			
 
				-| 9 | 宝山区 | 348 | 1076 |
			
 
				+| 9 | 宝山区 | 343 | 1076 |
			
 
				 | 10 | 嘉定区 | 130 | 612 |
			
 
				-| 11 | 浦东新区 | 1259 | 2082 |
			
 
				+| 11 | 浦东新区 | 1260 | 2095 |
			
 
				 | 12 | 金山区 | 56 | 355 |
			
 
				 | 13 | 松江区 | 190 | 779 |
			
 
				-| 14 | 青浦区 | 93 | 725 |
			
 
				+| 14 | 青浦区 | 98 | 766 |
			
 
				 | 15 | 奉贤区 | 131 | 345 |
			
 
				 | 16 | 崇明区 | 50 | 223 |
			
 
				 
			
--- a/秒过分数线数据导入/audit_mps_score_quota_2026.py
+++ b/秒过分数线数据导入/audit_mps_score_quota_2026.py
@@ -0,0 +1,160 @@
 
				+import json
			
 
				+import re
			
 
				+import sys
			
 
				+
			
 
				+import pdfplumber
			
 
				+
			
 
				+sys.path.insert(0, "/private/tmp/codex_mysql_driver")
			
 
				+import pymysql  # noqa: E402
			
 
				+
			
 
				+
			
 
				+DB_CONFIG = {
			
 
				+    "host": "589ae8e08493d.sh.cdb.myqcloud.com",
			
 
				+    "port": 8124,
			
 
				+    "user": "cdb_outerroot",
			
 
				+    "password": "kylx!@#!QAZ@WSX",
			
 
				+    "database": "kylx365_db",
			
 
				+    "charset": "utf8mb4",
			
 
				+    "connect_timeout": 10,
			
 
				+    "read_timeout": 20,
			
 
				+}
			
 
				+
			
 
				+YEAR = "2026"
			
 
				+SCORE_TYPE = "名额到区"
			
 
				+OFFICIAL_PDF = (
			
 
				+    "/Volumes/程杰外接SD盘/上海中考招生计划/2026/计划/"
			
 
				+    "2026全市高中名额到区扫生计划统计表.pdf"
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def clean_text(value):
			
 
				+    text = str(value or "").replace("\n", "")
			
 
				+    text = re.sub(r"\s+", "", text)
			
 
				+    return re.sub(r"^[院试考育教市海上]+(?=\d{6}|上海|复旦|华东|同济)", "", text)
			
 
				+
			
 
				+
			
 
				+def clean_code(value):
			
 
				+    match = re.search(r"\d{6}", str(value or ""))
			
 
				+    return match.group(0) if match else None
			
 
				+
			
 
				+
			
 
				+def clean_num(value):
			
 
				+    nums = re.findall(r"\d+", str(value or ""))
			
 
				+    return int(nums[-1]) if nums else None
			
 
				+
			
 
				+
			
 
				+def parse_official_pdf(path):
			
 
				+    official = {}
			
 
				+    with pdfplumber.open(path) as pdf:
			
 
				+        for page in pdf.pages:
			
 
				+            for table in page.extract_tables():
			
 
				+                for raw in table[1:]:
			
 
				+                    if len(raw) < 9:
			
 
				+                        continue
			
 
				+                    code = clean_code(raw[1])
			
 
				+                    plan = clean_num(raw[-1])
			
 
				+                    if not code or plan is None:
			
 
				+                        continue
			
 
				+                    item = official.setdefault(
			
 
				+                        code, {"SchoolNumber": code, "pdf_names": [], "official_plan": 0}
			
 
				+                    )
			
 
				+                    item["pdf_names"].append(clean_text(raw[2]))
			
 
				+                    item["official_plan"] += plan
			
 
				+    return official
			
 
				+
			
 
				+
			
 
				+def load_db_summary(cursor):
			
 
				+    cursor.execute(
			
 
				+        """
			
 
				+        SELECT ID, SchoolNumber, SchoolFullName
			
 
				+        FROM MPS_School
			
 
				+        WHERE SchoolType1 = '高中'
			
 
				+        """
			
 
				+    )
			
 
				+    schools = cursor.fetchall()
			
 
				+    school_by_id = {int(row["ID"]): row for row in schools}
			
 
				+    school_by_code = {str(row["SchoolNumber"]): row for row in schools if row["SchoolNumber"]}
			
 
				+
			
 
				+    cursor.execute(
			
 
				+        """
			
 
				+        SELECT SchoolTarget, SchoolFullName, SUM(PlanNum) AS total
			
 
				+        FROM MPS_Score
			
 
				+        WHERE ScoreYear = %s AND ScoreType = %s
			
 
				+        GROUP BY SchoolTarget, SchoolFullName
			
 
				+        ORDER BY CAST(SchoolTarget AS UNSIGNED), SchoolTarget
			
 
				+        """,
			
 
				+        (YEAR, SCORE_TYPE),
			
 
				+    )
			
 
				+
			
 
				+    db_by_code = {}
			
 
				+    unmatched = []
			
 
				+    for row in cursor.fetchall():
			
 
				+        school = school_by_id.get(int(row["SchoolTarget"]))
			
 
				+        if not school or not school.get("SchoolNumber"):
			
 
				+            unmatched.append(row)
			
 
				+            continue
			
 
				+        code = str(school["SchoolNumber"])
			
 
				+        item = db_by_code.setdefault(
			
 
				+            code,
			
 
				+            {
			
 
				+                "SchoolNumber": code,
			
 
				+                "SchoolTarget": school["ID"],
			
 
				+                "db_name": row["SchoolFullName"],
			
 
				+                "db_plan": 0,
			
 
				+            },
			
 
				+        )
			
 
				+        item["db_plan"] += int(row["total"] or 0)
			
 
				+
			
 
				+    return school_by_code, db_by_code, unmatched
			
 
				+
			
 
				+
			
 
				+def compare(official, school_by_code, db_by_code):
			
 
				+    diffs = []
			
 
				+    for code in sorted(set(official) | set(db_by_code)):
			
 
				+        official_item = official.get(code)
			
 
				+        db_item = db_by_code.get(code)
			
 
				+        school = school_by_code.get(code)
			
 
				+        row = {
			
 
				+            "SchoolNumber": code,
			
 
				+            "SchoolTarget": school["ID"] if school else (db_item or {}).get("SchoolTarget"),
			
 
				+            "school_name": school["SchoolFullName"] if school else (db_item or {}).get("db_name"),
			
 
				+            "pdf_name": " / ".join(dict.fromkeys((official_item or {}).get("pdf_names", []))),
			
 
				+            "official_plan": (official_item or {}).get("official_plan", 0),
			
 
				+            "db_plan": (db_item or {}).get("db_plan", 0),
			
 
				+        }
			
 
				+        row["delta_db_minus_official"] = row["db_plan"] - row["official_plan"]
			
 
				+        if row["delta_db_minus_official"] or not official_item or not db_item:
			
 
				+            if not official_item:
			
 
				+                row["status"] = "db_only"
			
 
				+            elif not db_item:
			
 
				+                row["status"] = "official_only"
			
 
				+            else:
			
 
				+                row["status"] = "plan_mismatch"
			
 
				+            diffs.append(row)
			
 
				+    return diffs
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    official = parse_official_pdf(OFFICIAL_PDF)
			
 
				+    conn = pymysql.connect(**DB_CONFIG)
			
 
				+    try:
			
 
				+        with conn.cursor(pymysql.cursors.DictCursor) as cursor:
			
 
				+            school_by_code, db_by_code, unmatched = load_db_summary(cursor)
			
 
				+    finally:
			
 
				+        conn.close()
			
 
				+
			
 
				+    diffs = compare(official, school_by_code, db_by_code)
			
 
				+    result = {
			
 
				+        "official_schools": len(official),
			
 
				+        "official_total": sum(row["official_plan"] for row in official.values()),
			
 
				+        "db_schools": len(db_by_code),
			
 
				+        "db_total": sum(row["db_plan"] for row in db_by_code.values()),
			
 
				+        "diff_count": len(diffs),
			
 
				+        "unmatched_db_rows": unmatched,
			
 
				+        "diffs": diffs,
			
 
				+    }
			
 
				+    print(json.dumps(result, ensure_ascii=False, default=str, indent=2))
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/秒过分数线数据导入/audit_mps_score_school_quota_totals_2026.py
+++ b/秒过分数线数据导入/audit_mps_score_school_quota_totals_2026.py
@@ -0,0 +1,212 @@
 
				+import json
			
 
				+import os
			
 
				+import re
			
 
				+import sys
			
 
				+
			
 
				+import pdfplumber
			
 
				+
			
 
				+sys.path.insert(0, "/private/tmp/codex_mysql_driver")
			
 
				+import pymysql  # noqa: E402
			
 
				+
			
 
				+import research_mps_score_school_quota_2026 as parser  # noqa: E402
			
 
				+
			
 
				+
			
 
				+YEAR = "2026"
			
 
				+SCORE_TYPE = "名额到校"
			
 
				+
			
 
				+# Districts whose PDFs contain explicit footer totals.
			
 
				+# Footer columns are configured because some PDFs do not repeat headers on the final page.
			
 
				+AUDIT_CONFIG = {
			
 
				+    3: [
			
 
				+        {"kind": "school_total", "code": "052002"},
			
 
				+        {"kind": "school_total", "code": "053004"},
			
 
				+        {"kind": "school_total", "code": "052001"},
			
 
				+        {"kind": "school_total", "code": "042032"},
			
 
				+        {"kind": "school_total", "code": "102057"},
			
 
				+        {"kind": "school_total", "code": "102056"},
			
 
				+        {"kind": "school_total", "code": "152003"},
			
 
				+        {"kind": "school_total", "code": "152006"},
			
 
				+    ],
			
 
				+    9: [
			
 
				+        {"kind": "school_total", "code": "132001"},
			
 
				+        {"kind": "school_total", "code": "133001"},
			
 
				+        {"kind": "school_total", "code": "132002"},
			
 
				+        {"kind": "school_total", "code": "133003"},
			
 
				+        {"kind": "school_total", "code": "132003"},
			
 
				+        {"kind": "school_total", "code": "042032"},
			
 
				+        {"kind": "school_total", "code": "152003"},
			
 
				+        {"kind": "school_total", "code": "102057"},
			
 
				+        {"kind": "school_total", "code": "102056"},
			
 
				+    ],
			
 
				+    12: [
			
 
				+        {"kind": "ignore", "label": "ignored_pdf_noise"},
			
 
				+        {"kind": "school_total", "code": "162000"},
			
 
				+        {"kind": "school_total", "code": "163002"},
			
 
				+        {"kind": "district_total", "label": "合计"},
			
 
				+    ],
			
 
				+    13: [
			
 
				+        {"kind": "ignore", "label": "ignored_pdf_noise"},
			
 
				+        {"kind": "school_total", "code": "172001"},
			
 
				+        {"kind": "school_total", "code": "173001"},
			
 
				+        {"kind": "school_total", "code": "174003"},
			
 
				+        {"kind": "school_total", "code": "172002"},
			
 
				+        {"kind": "school_total", "code": "172004"},
			
 
				+        {"kind": "district_total", "label": "合计"},
			
 
				+    ],
			
 
				+    16: [
			
 
				+        {"kind": "school_total", "code": "512000"},
			
 
				+        {"kind": "school_total", "code": "512001"},
			
 
				+        {"kind": "school_total", "code": "042032"},
			
 
				+        {"kind": "school_total", "code": "152003"},
			
 
				+        {"kind": "school_total", "code": "102057"},
			
 
				+        {"kind": "school_total", "code": "102056"},
			
 
				+    ],
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def clean_num(value):
			
 
				+    nums = re.findall(r"-?\d+", parser.clean_text(value))
			
 
				+    return int(nums[-1]) if nums else None
			
 
				+
			
 
				+
			
 
				+def find_footer_numbers(path):
			
 
				+    footers = []
			
 
				+    with pdfplumber.open(path) as pdf:
			
 
				+        for page in pdf.pages:
			
 
				+            for table in page.extract_tables():
			
 
				+                if not table:
			
 
				+                    continue
			
 
				+                for row in table:
			
 
				+                    if any("合计" in parser.clean_text(cell) for cell in row):
			
 
				+                        nums = [clean_num(cell) for cell in row]
			
 
				+                        nums = [num for num in nums if num is not None]
			
 
				+                        if len(nums) >= 2:
			
 
				+                            footers.append(nums)
			
 
				+    if not footers:
			
 
				+        return []
			
 
				+    return footers[-1]
			
 
				+
			
 
				+
			
 
				+def load_db(cursor):
			
 
				+    cursor.execute(
			
 
				+        """
			
 
				+        SELECT ID, SchoolNumber, SchoolFullName
			
 
				+        FROM MPS_School
			
 
				+        WHERE SchoolType1 = '高中'
			
 
				+        """
			
 
				+    )
			
 
				+    school_by_code = {str(row["SchoolNumber"]): row for row in cursor.fetchall() if row["SchoolNumber"]}
			
 
				+
			
 
				+    cursor.execute(
			
 
				+        """
			
 
				+        SELECT DistrictID, SchoolTarget, SchoolFullName, SUM(PlanNum) AS total
			
 
				+        FROM MPS_Score
			
 
				+        WHERE ScoreYear = %s AND ScoreType = %s
			
 
				+        GROUP BY DistrictID, SchoolTarget, SchoolFullName
			
 
				+        """,
			
 
				+        (YEAR, SCORE_TYPE),
			
 
				+    )
			
 
				+
			
 
				+    by_district = {}
			
 
				+    by_school = {}
			
 
				+    for row in cursor.fetchall():
			
 
				+        district_id = int(row["DistrictID"])
			
 
				+        school_target = int(row["SchoolTarget"])
			
 
				+        plan = int(row["total"] or 0)
			
 
				+        by_district[district_id] = by_district.get(district_id, 0) + plan
			
 
				+        by_school[(district_id, school_target)] = {"name": row["SchoolFullName"], "plan": plan}
			
 
				+    return school_by_code, by_district, by_school
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    conn = pymysql.connect(**parser.DB_CONFIG)
			
 
				+    try:
			
 
				+        with conn.cursor(pymysql.cursors.DictCursor) as cursor:
			
 
				+            school_by_code, db_by_district, db_by_school = load_db(cursor)
			
 
				+    finally:
			
 
				+        conn.close()
			
 
				+
			
 
				+    audits = []
			
 
				+    diffs = []
			
 
				+    problems = {}
			
 
				+
			
 
				+    for district_id, config in AUDIT_CONFIG.items():
			
 
				+        district_name = parser.DISTRICTS[district_id]
			
 
				+        path = os.path.join(parser.BASE_DIR, f"2026名额到校{district_name}.pdf")
			
 
				+        numbers = find_footer_numbers(path)
			
 
				+        if len(numbers) != len(config):
			
 
				+            problems[str(district_id)] = {
			
 
				+                "district": district_name,
			
 
				+                "problem": "footer_number_count_mismatch",
			
 
				+                "numbers": numbers,
			
 
				+                "expected_columns": len(config),
			
 
				+            }
			
 
				+            continue
			
 
				+
			
 
				+        listed_targets = []
			
 
				+        official_items = []
			
 
				+        for spec, official_plan in zip(config, numbers):
			
 
				+            if spec["kind"] == "ignore":
			
 
				+                continue
			
 
				+            item = {"kind": spec["kind"], "official_plan": official_plan}
			
 
				+            if spec["kind"] == "school_total":
			
 
				+                school = school_by_code.get(spec["code"])
			
 
				+                if not school:
			
 
				+                    problems.setdefault(str(district_id), {"district": district_name, "problems": []}).setdefault("problems", []).append(
			
 
				+                        {"problem": "school_code_not_found", "code": spec["code"]}
			
 
				+                    )
			
 
				+                    continue
			
 
				+                item.update(
			
 
				+                    {
			
 
				+                        "SchoolNumber": spec["code"],
			
 
				+                        "SchoolTarget": int(school["ID"]),
			
 
				+                        "label": school["SchoolFullName"],
			
 
				+                    }
			
 
				+                )
			
 
				+                listed_targets.append(int(school["ID"]))
			
 
				+            else:
			
 
				+                item["label"] = spec["label"]
			
 
				+            official_items.append(item)
			
 
				+
			
 
				+        district_rows = []
			
 
				+        for item in official_items:
			
 
				+            if item["kind"] == "district_total":
			
 
				+                db_plan = db_by_district.get(district_id, 0)
			
 
				+            elif item["kind"] == "delegated_total":
			
 
				+                db_plan = db_by_district.get(district_id, 0) - sum(
			
 
				+                    db_by_school.get((district_id, target), {}).get("plan", 0)
			
 
				+                    for target in listed_targets
			
 
				+                )
			
 
				+            else:
			
 
				+                db_plan = db_by_school.get((district_id, item["SchoolTarget"]), {}).get("plan", 0)
			
 
				+
			
 
				+            row = {
			
 
				+                **item,
			
 
				+                "db_plan": db_plan,
			
 
				+                "delta_db_minus_official": db_plan - item["official_plan"],
			
 
				+            }
			
 
				+            district_rows.append(row)
			
 
				+            if row["delta_db_minus_official"]:
			
 
				+                diffs.append({"DistrictID": district_id, "district": district_name, **row})
			
 
				+
			
 
				+        audits.append({"DistrictID": district_id, "district": district_name, "rows": district_rows})
			
 
				+
			
 
				+    print(
			
 
				+        json.dumps(
			
 
				+            {
			
 
				+                "audited_districts": len(AUDIT_CONFIG),
			
 
				+                "audited_district_ids": sorted(AUDIT_CONFIG),
			
 
				+                "diff_count": len(diffs),
			
 
				+                "diffs": diffs,
			
 
				+                "problems": problems,
			
 
				+                "audits": audits,
			
 
				+            },
			
 
				+            ensure_ascii=False,
			
 
				+            default=str,
			
 
				+            indent=2,
			
 
				+        )
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/秒过分数线数据导入/fix_mps_score_school_quota_2026_bad_targets.py
+++ b/秒过分数线数据导入/fix_mps_score_school_quota_2026_bad_targets.py
@@ -0,0 +1,146 @@
 
				+import json
			
 
				+import os
			
 
				+import sys
			
 
				+from collections import defaultdict
			
 
				+from datetime import datetime
			
 
				+
			
 
				+sys.path.insert(0, "/private/tmp/codex_mysql_driver")
			
 
				+import pymysql
			
 
				+
			
 
				+import research_mps_score_school_quota_2026 as parser
			
 
				+from import_mps_score_school_quota_2026 import INSERT_COLUMNS, build_record, load_previous_plan_nums
			
 
				+
			
 
				+
			
 
				+AFFECTED_DISTRICTS = [5, 9, 11, 13, 14, 15]
			
 
				+BACKUP_FILE = "mps_score_school_quota_2026_bad_targets_backup.json"
			
 
				+
			
 
				+
			
 
				+def aggregate_rows(rows):
			
 
				+    grouped = {}
			
 
				+    methods = defaultdict(lambda: [set(), set()])
			
 
				+    for junior, high, plan_num, junior_method, high_method in rows:
			
 
				+        key = (int(junior["ID"]), int(high["ID"]))
			
 
				+        if key not in grouped:
			
 
				+            grouped[key] = [junior, high, 0]
			
 
				+        grouped[key][2] += int(plan_num)
			
 
				+        methods[key][0].add(junior_method)
			
 
				+        methods[key][1].add(high_method)
			
 
				+    return [
			
 
				+        (junior, high, plan, "+".join(sorted(methods[(int(junior["ID"]), int(high["ID"]))][0])), "+".join(sorted(methods[(int(junior["ID"]), int(high["ID"]))][1])))
			
 
				+        for junior, high, plan in grouped.values()
			
 
				+    ]
			
 
				+
			
 
				+
			
 
				+def load_existing(cursor):
			
 
				+    placeholders = ", ".join(["%s"] * len(AFFECTED_DISTRICTS))
			
 
				+    cursor.execute(
			
 
				+        f"""
			
 
				+        SELECT *
			
 
				+        FROM MPS_Score
			
 
				+        WHERE ScoreYear = '2026'
			
 
				+          AND ScoreType = '名额到校'
			
 
				+          AND DistrictID IN ({placeholders})
			
 
				+        ORDER BY DistrictID, SchoolOfGraduation, SchoolTarget, ID
			
 
				+        """,
			
 
				+        tuple(AFFECTED_DISTRICTS),
			
 
				+    )
			
 
				+    return cursor.fetchall()
			
 
				+
			
 
				+
			
 
				+def collect_new_records(cursor):
			
 
				+    high_by_code, high_by_name, _ = parser.load_schools(cursor, "高中")
			
 
				+    junior_by_code, junior_by_name, _ = parser.load_schools(cursor, "初中")
			
 
				+    previous = load_previous_plan_nums(cursor)
			
 
				+
			
 
				+    records_by_district = {}
			
 
				+    problems_by_district = {}
			
 
				+    for district_id in AFFECTED_DISTRICTS:
			
 
				+        district_name = parser.DISTRICTS[district_id]
			
 
				+        pdf_path = os.path.join(parser.BASE_DIR, f"2026名额到校{district_name}.pdf")
			
 
				+        rows, problems = parser.parse_tables(
			
 
				+            pdf_path, district_id, high_by_code, high_by_name, junior_by_code, junior_by_name
			
 
				+        )
			
 
				+        rows = aggregate_rows(rows)
			
 
				+        records_by_district[district_id] = [build_record(district_id, row, previous) for row in rows]
			
 
				+        if problems:
			
 
				+            problems_by_district[str(district_id)] = {
			
 
				+                "district": district_name,
			
 
				+                "file": pdf_path,
			
 
				+                "problem_count": len(problems),
			
 
				+                "problems": [repr(item) for item in problems[:100]],
			
 
				+            }
			
 
				+    return records_by_district, problems_by_district
			
 
				+
			
 
				+
			
 
				+def insert_records(cursor, records):
			
 
				+    if not records:
			
 
				+        return 0
			
 
				+    columns = ", ".join(INSERT_COLUMNS)
			
 
				+    placeholders = ", ".join(["%s"] * len(INSERT_COLUMNS))
			
 
				+    sql = f"INSERT INTO MPS_Score ({columns}) VALUES ({placeholders})"
			
 
				+    cursor.executemany(sql, [[row[column] for column in INSERT_COLUMNS] for row in records])
			
 
				+    return len(records)
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    conn = pymysql.connect(**parser.DB_CONFIG)
			
 
				+    try:
			
 
				+        with conn.cursor(pymysql.cursors.DictCursor) as cursor:
			
 
				+            existing = load_existing(cursor)
			
 
				+            records_by_district, problems = collect_new_records(cursor)
			
 
				+            if problems:
			
 
				+                raise RuntimeError(json.dumps(problems, ensure_ascii=False, indent=2))
			
 
				+
			
 
				+            backup = {
			
 
				+                "created_at": datetime.now().isoformat(timespec="seconds"),
			
 
				+                "affected_districts": AFFECTED_DISTRICTS,
			
 
				+                "row_count": len(existing),
			
 
				+                "rows": existing,
			
 
				+            }
			
 
				+            with open(BACKUP_FILE, "w", encoding="utf-8") as handle:
			
 
				+                json.dump(backup, handle, ensure_ascii=False, indent=2, default=str)
			
 
				+                handle.write("\n")
			
 
				+
			
 
				+            print("backup", BACKUP_FILE, "rows", len(existing))
			
 
				+            for district_id in AFFECTED_DISTRICTS:
			
 
				+                rows = records_by_district[district_id]
			
 
				+                print(
			
 
				+                    "ready",
			
 
				+                    district_id,
			
 
				+                    parser.DISTRICTS[district_id],
			
 
				+                    "rows",
			
 
				+                    len(rows),
			
 
				+                    "plan",
			
 
				+                    sum(row["PlanNum"] for row in rows),
			
 
				+                )
			
 
				+
			
 
				+            placeholders = ", ".join(["%s"] * len(AFFECTED_DISTRICTS))
			
 
				+            cursor.execute(
			
 
				+                f"""
			
 
				+                DELETE FROM MPS_Score
			
 
				+                WHERE ScoreYear = '2026'
			
 
				+                  AND ScoreType = '名额到校'
			
 
				+                  AND DistrictID IN ({placeholders})
			
 
				+                """,
			
 
				+                tuple(AFFECTED_DISTRICTS),
			
 
				+            )
			
 
				+            deleted = cursor.rowcount
			
 
				+
			
 
				+            all_records = [
			
 
				+                row
			
 
				+                for district_id in AFFECTED_DISTRICTS
			
 
				+                for row in records_by_district[district_id]
			
 
				+            ]
			
 
				+            inserted = insert_records(cursor, all_records)
			
 
				+            conn.commit()
			
 
				+            print("deleted", deleted)
			
 
				+            print("inserted", inserted)
			
 
				+    except Exception:
			
 
				+        conn.rollback()
			
 
				+        raise
			
 
				+    finally:
			
 
				+        conn.close()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/秒过分数线数据导入/mps_score_school_quota_2026_bad_targets_backup.json
+++ b/秒过分数线数据导入/mps_score_school_quota_2026_bad_targets_backup.json
--- a/秒过分数线数据导入/mps_score_school_quota_2026_qingpu_reparse_backup.json
+++ b/秒过分数线数据导入/mps_score_school_quota_2026_qingpu_reparse_backup.json
--- a/秒过分数线数据导入/research_mps_score_school_quota_2026.py
+++ b/秒过分数线数据导入/research_mps_score_school_quota_2026.py
@@ -157,10 +157,6 @@ def match_school(code, name, by_code, by_name, district_id=None):
 
				     if code and code in by_code:
			
 
				         return by_code[code], "code"
			
 
				     cleaned = clean_text(name)
			
 
				-    if by_code is not None:
			
 
				-        for alias, alias_code in HIGH_ALIAS_CODES.items():
			
 
				-            if alias in cleaned and alias_code in by_code:
			
 
				-                return by_code[alias_code], f"alias:{alias}"
			
 
				     candidates = []
			
 
				     for variant in name_variants(name):
			
 
				         if variant in by_name:
			
@@ -182,6 +178,10 @@ def match_school(code, name, by_code, by_name, district_id=None):
 
				         return candidates[0], "name"
			
 
				     if candidates:
			
 
				         return None, f"ambiguous:{[row['SchoolFullName'] for row in candidates[:4]]}"
			
 
				+    if by_code is not None:
			
 
				+        for alias, alias_code in sorted(HIGH_ALIAS_CODES.items(), key=lambda item: len(item[0]), reverse=True):
			
 
				+            if alias in cleaned and alias_code in by_code:
			
 
				+                return by_code[alias_code], f"alias:{alias}"
			
 
				     if district_id is not None:
			
 
				         fuzzy_candidates = []
			
 
				         for variant in name_variants(name):
			
@@ -209,6 +209,72 @@ def match_school(code, name, by_code, by_name, district_id=None):
 
				     return None, "not_found"
			
 
				 
			
 
				 
			
 
				+def parse_qingpu_text(path, high_by_code, junior_by_code):
			
 
				+    rows = []
			
 
				+    problems = []
			
 
				+    current_high = None
			
 
				+
			
 
				+    with pdfplumber.open(path) as pdf:
			
 
				+        for page in pdf.pages:
			
 
				+            high_markers = []
			
 
				+            for word in page.extract_words(x_tolerance=3, y_tolerance=3):
			
 
				+                if word.get("x0", 999) > 230:
			
 
				+                    continue
			
 
				+                code = clean_code(word.get("text"))
			
 
				+                if code in high_by_code:
			
 
				+                    high_markers.append((float(word["top"]), code, high_by_code[code]))
			
 
				+            high_markers.sort(key=lambda item: item[0])
			
 
				+
			
 
				+            for table in page.find_tables():
			
 
				+                extracted = table.extract()
			
 
				+                table_top = float(table.bbox[1])
			
 
				+                preceding = [item for item in high_markers if item[0] < table_top]
			
 
				+                if preceding:
			
 
				+                    current_high = preceding[-1][2]
			
 
				+
			
 
				+                for index, raw in enumerate(extracted):
			
 
				+                    if not raw:
			
 
				+                        continue
			
 
				+                    row_bbox = table.rows[index].bbox
			
 
				+                    row_top, row_bottom = float(row_bbox[1]), float(row_bbox[3])
			
 
				+                    markers_in_row = [
			
 
				+                        item for item in high_markers if row_top - 3 <= item[0] <= row_bottom + 3
			
 
				+                    ]
			
 
				+                    if markers_in_row:
			
 
				+                        current_high = markers_in_row[-1][2]
			
 
				+
			
 
				+                    if len(raw) >= 5 and any("招生学校代码" in clean_text(cell) for cell in raw):
			
 
				+                        continue
			
 
				+                    if len(raw) >= 5:
			
 
				+                        high_code = clean_code(raw[0])
			
 
				+                        if high_code in high_by_code:
			
 
				+                            current_high = high_by_code[high_code]
			
 
				+                        junior_code = clean_code(raw[2])
			
 
				+                        junior_name = clean_text(raw[3])
			
 
				+                        plan_num = clean_num(raw[4])
			
 
				+                    elif len(raw) >= 3:
			
 
				+                        junior_code = clean_code(raw[0])
			
 
				+                        junior_name = clean_text(raw[1])
			
 
				+                        plan_num = clean_num(raw[2])
			
 
				+                    else:
			
 
				+                        continue
			
 
				+
			
 
				+                    if not current_high:
			
 
				+                        problems.append((raw, "high", "not_found"))
			
 
				+                        continue
			
 
				+                    if not junior_code and not junior_name:
			
 
				+                        continue
			
 
				+                    junior = junior_by_code.get(junior_code)
			
 
				+                    if not junior:
			
 
				+                        problems.append((raw, "code", "junior_not_found"))
			
 
				+                        continue
			
 
				+                    if plan_num is None or plan_num == 0:
			
 
				+                        continue
			
 
				+                    rows.append((junior, current_high, plan_num, "code", "code"))
			
 
				+
			
 
				+    return rows, problems
			
 
				+
			
 
				+
			
 
				 def extract_codes_from_header(header_rows, col_index):
			
 
				     for row in header_rows:
			
 
				         if col_index < len(row):
			
@@ -346,6 +412,9 @@ def parse_matrix_table(table, district_id, high_by_code, high_by_name, junior_by
 
				 
			
 
				 
			
 
				 def parse_tables(path, district_id, high_by_code, high_by_name, junior_by_code, junior_by_name):
			
 
				+    if district_id == 14:
			
 
				+        return parse_qingpu_text(path, high_by_code, junior_by_code)
			
 
				+
			
 
				     all_rows = []
			
 
				     all_problems = []
			
 
				     long_state = {}