import os import json import requests from bs4 import BeautifulSoup from dotenv import load_dotenv from requests.exceptions import RequestException # 加载 .env 文件 load_dotenv() # 请求头与 Cookie headers = { 'User-Agent': ( 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) ' 'Gecko/20100101 Firefox/115.0' ) } cookies = { "__client_id": os.getenv("CLIENT_ID"), "_uid": os.getenv("UID"), "cf_clearance": os.getenv("CF_CLEARANCE"), "C3VK": os.getenv("C3VK") } BASE_HEADERS = { 'User-Agent': ( 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) ' 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Mobile Safari/537.36' ), 'Origin': 'https://www.luogu.com.cn', 'Content-Type': 'application/json', 'X-Requested-With': 'XMLHttpRequest' } class Problem: def __init__(self, problem_id): self.problem_id = problem_id self.problem_url = f"https://luogu.com.cn/problem/{problem_id}" self.html = None self.json = None self.title = "" self.background = "" self.description = "" self.formatI = "" self.formatO = "" self.hint = "" self.samples = [] try: resp = requests.get(self.problem_url, headers=headers, cookies=cookies, timeout=10) resp.raise_for_status() self.html = resp.text except RequestException as e: raise ConnectionError(f"无法访问题目页面:{self.problem_url},错误:{e}") try: self.json = self._extract_json_from_html(self.html)['data']['problem'] except (KeyError, ValueError) as e: raise RuntimeError(f"解析题目数据失败:{e}") self.title = self.json.get('title', '') self.background = self.json['contenu'].get('background', '') self.description = self.json['contenu'].get('description', '') self.formatI = self.json['contenu'].get('formatI', '') self.formatO = self.json['contenu'].get('formatO', '') self.hint = self.json['contenu'].get('hint', '') self.samples = self.json.get('samples', []) def _extract_json_from_html(self, html_content): try: soup = BeautifulSoup(html_content, 'html.parser') script_tag = soup.find('script', {'id': 'lentille-context', 'type': 'application/json'}) if not script_tag: raise ValueError("无法找到指定的JSON脚本标签") return json.loads(script_tag.string) except json.JSONDecodeError as e: raise ValueError(f"JSON 解析失败:{e}") except Exception as e: raise RuntimeError(f"HTML 解析失败:{e}") def gen_problem_md(self): try: md = f"# {self.title}" if self.background: md += f"\n\n## 【题目背景】\n{self.background}" if self.description: md += f"\n\n## 【题目描述】\n{self.description}" if self.formatI: md += f"\n\n## 【输入格式】\n{self.formatI}" if self.formatO: md += f"\n\n## 【输出格式】\n{self.formatO}" if self.hint: md += f"\n\n## 【提示】\n{self.hint}" with open(f"{self.problem_id}.md", "w", encoding="utf8") as f: f.write(md) except IOError as e: raise RuntimeError(f"写入题面Markdown文件失败:{e}") def gen_samples(self): try: os.makedirs("samples", exist_ok=True) cnt = 1 for sample in self.samples: with open(f"samples/sample_{cnt}.in", "w", encoding="utf8") as f: f.write(sample[0]) with open(f"samples/sample_{cnt}.out", "w", encoding="utf8") as f: f.write(sample[1]) cnt += 1 except (IOError, IndexError) as e: raise RuntimeError(f"写入样例文件失败:{e}") def gen_solutions(self): solution_url = f"https://luogu.com.cn/problem/solution/{self.problem_id}" try: resp = requests.get(solution_url, headers=headers, cookies=cookies, timeout=10) resp.raise_for_status() except RequestException as e: raise ConnectionError(f"无法访问题解页面:{solution_url},错误:{e}") try: solutions_data = self._extract_json_from_html(resp.text).get("data", {}) solutions = solutions_data.get("solutions", {}).get("result", []) except (KeyError, ValueError) as e: raise RuntimeError(f"解析题解数据失败:{e}") try: os.makedirs("solutions", exist_ok=True) for solution in solutions: author = solution.get('author', {}) title = solution.get('title', '未命名题解') lid = solution.get('lid', 'unknown') content = solution.get('content', '') markdown_content = ( f"# {title}\n" f"> By ![{author.get('name', '')}]({author.get('avatar', '')}?" "x-oss-process=image/resize,m_fixed,h_30,w_30,image/circle,r_100/format,png)" f"[{author.get('name', '')}](https://luogu.com.cn/user/{author.get('uid', '')})" "\n\n" f"{content}" ) filename = f"solutions/{title}_{lid}.md" with open(filename, "w", encoding="utf8") as f: f.write(markdown_content) except IOError as e: raise RuntimeError(f"写入题解文件失败:{e}") def gen_all(self): try: os.makedirs(self.problem_id, exist_ok=True) os.chdir(self.problem_id) self.gen_problem_md() self.gen_samples() self.gen_solutions() except Exception as e: raise RuntimeError(f"生成全部内容失败:{e}") if __name__ == "__main__": try: p = Problem("P1145") p.gen_all() except Exception as e: print(f"[ERROR] 发生错误:{e}")