168 lines
6.1 KiB
Python
168 lines
6.1 KiB
Python
![]() |
import os
|
|||
|
import json
|
|||
|
import requests
|
|||
|
from bs4 import BeautifulSoup
|
|||
|
from dotenv import load_dotenv
|
|||
|
from requests.exceptions import RequestException
|
|||
|
|
|||
|
# 加载 .env 文件
|
|||
|
load_dotenv()
|
|||
|
|
|||
|
# 请求头与 Cookie
|
|||
|
headers = {
|
|||
|
'User-Agent': (
|
|||
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) '
|
|||
|
'Gecko/20100101 Firefox/115.0'
|
|||
|
)
|
|||
|
}
|
|||
|
|
|||
|
cookies = {
|
|||
|
"__client_id": os.getenv("CLIENT_ID"),
|
|||
|
"_uid": os.getenv("UID"),
|
|||
|
"cf_clearance": os.getenv("CF_CLEARANCE"),
|
|||
|
"C3VK": os.getenv("C3VK")
|
|||
|
}
|
|||
|
|
|||
|
BASE_HEADERS = {
|
|||
|
'User-Agent': (
|
|||
|
'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) '
|
|||
|
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Mobile Safari/537.36'
|
|||
|
),
|
|||
|
'Origin': 'https://www.luogu.com.cn',
|
|||
|
'Content-Type': 'application/json',
|
|||
|
'X-Requested-With': 'XMLHttpRequest'
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
class Problem:
|
|||
|
def __init__(self, problem_id):
|
|||
|
self.problem_id = problem_id
|
|||
|
self.problem_url = f"https://luogu.com.cn/problem/{problem_id}"
|
|||
|
self.html = None
|
|||
|
self.json = None
|
|||
|
self.title = ""
|
|||
|
self.background = ""
|
|||
|
self.description = ""
|
|||
|
self.formatI = ""
|
|||
|
self.formatO = ""
|
|||
|
self.hint = ""
|
|||
|
self.samples = []
|
|||
|
|
|||
|
try:
|
|||
|
resp = requests.get(self.problem_url, headers=headers, cookies=cookies, timeout=10)
|
|||
|
resp.raise_for_status()
|
|||
|
self.html = resp.text
|
|||
|
except RequestException as e:
|
|||
|
raise ConnectionError(f"无法访问题目页面:{self.problem_url},错误:{e}")
|
|||
|
|
|||
|
try:
|
|||
|
self.json = self._extract_json_from_html(self.html)['data']['problem']
|
|||
|
except (KeyError, ValueError) as e:
|
|||
|
raise RuntimeError(f"解析题目数据失败:{e}")
|
|||
|
|
|||
|
self.title = self.json.get('title', '')
|
|||
|
self.background = self.json['contenu'].get('background', '')
|
|||
|
self.description = self.json['contenu'].get('description', '')
|
|||
|
self.formatI = self.json['contenu'].get('formatI', '')
|
|||
|
self.formatO = self.json['contenu'].get('formatO', '')
|
|||
|
self.hint = self.json['contenu'].get('hint', '')
|
|||
|
self.samples = self.json.get('samples', [])
|
|||
|
|
|||
|
def _extract_json_from_html(self, html_content):
|
|||
|
try:
|
|||
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|||
|
script_tag = soup.find('script', {'id': 'lentille-context', 'type': 'application/json'})
|
|||
|
if not script_tag:
|
|||
|
raise ValueError("无法找到指定的JSON脚本标签")
|
|||
|
return json.loads(script_tag.string)
|
|||
|
except json.JSONDecodeError as e:
|
|||
|
raise ValueError(f"JSON 解析失败:{e}")
|
|||
|
except Exception as e:
|
|||
|
raise RuntimeError(f"HTML 解析失败:{e}")
|
|||
|
|
|||
|
def gen_problem_md(self):
|
|||
|
try:
|
|||
|
md = f"# {self.title}"
|
|||
|
if self.background:
|
|||
|
md += f"\n\n## 【题目背景】\n{self.background}"
|
|||
|
if self.description:
|
|||
|
md += f"\n\n## 【题目描述】\n{self.description}"
|
|||
|
if self.formatI:
|
|||
|
md += f"\n\n## 【输入格式】\n{self.formatI}"
|
|||
|
if self.formatO:
|
|||
|
md += f"\n\n## 【输出格式】\n{self.formatO}"
|
|||
|
if self.hint:
|
|||
|
md += f"\n\n## 【提示】\n{self.hint}"
|
|||
|
|
|||
|
with open(f"{self.problem_id}.md", "w", encoding="utf8") as f:
|
|||
|
f.write(md)
|
|||
|
except IOError as e:
|
|||
|
raise RuntimeError(f"写入题面Markdown文件失败:{e}")
|
|||
|
|
|||
|
def gen_samples(self):
|
|||
|
try:
|
|||
|
os.makedirs("samples", exist_ok=True)
|
|||
|
cnt = 1
|
|||
|
for sample in self.samples:
|
|||
|
with open(f"samples/sample_{cnt}.in", "w", encoding="utf8") as f:
|
|||
|
f.write(sample[0])
|
|||
|
with open(f"samples/sample_{cnt}.out", "w", encoding="utf8") as f:
|
|||
|
f.write(sample[1])
|
|||
|
cnt += 1
|
|||
|
except (IOError, IndexError) as e:
|
|||
|
raise RuntimeError(f"写入样例文件失败:{e}")
|
|||
|
|
|||
|
def gen_solutions(self):
|
|||
|
solution_url = f"https://luogu.com.cn/problem/solution/{self.problem_id}"
|
|||
|
try:
|
|||
|
resp = requests.get(solution_url, headers=headers, cookies=cookies, timeout=10)
|
|||
|
resp.raise_for_status()
|
|||
|
except RequestException as e:
|
|||
|
raise ConnectionError(f"无法访问题解页面:{solution_url},错误:{e}")
|
|||
|
|
|||
|
try:
|
|||
|
solutions_data = self._extract_json_from_html(resp.text).get("data", {})
|
|||
|
solutions = solutions_data.get("solutions", {}).get("result", [])
|
|||
|
except (KeyError, ValueError) as e:
|
|||
|
raise RuntimeError(f"解析题解数据失败:{e}")
|
|||
|
|
|||
|
try:
|
|||
|
os.makedirs("solutions", exist_ok=True)
|
|||
|
for solution in solutions:
|
|||
|
author = solution.get('author', {})
|
|||
|
title = solution.get('title', '未命名题解')
|
|||
|
lid = solution.get('lid', 'unknown')
|
|||
|
content = solution.get('content', '')
|
|||
|
|
|||
|
markdown_content = (
|
|||
|
f"# {title}\n"
|
|||
|
f"> By }?"
|
|||
|
"x-oss-process=image/resize,m_fixed,h_30,w_30,image/circle,r_100/format,png)"
|
|||
|
f"[{author.get('name', '')}](https://luogu.com.cn/user/{author.get('uid', '')})"
|
|||
|
"\n\n"
|
|||
|
f"{content}"
|
|||
|
)
|
|||
|
|
|||
|
filename = f"solutions/{title}_{lid}.md"
|
|||
|
with open(filename, "w", encoding="utf8") as f:
|
|||
|
f.write(markdown_content)
|
|||
|
except IOError as e:
|
|||
|
raise RuntimeError(f"写入题解文件失败:{e}")
|
|||
|
|
|||
|
def gen_all(self):
|
|||
|
try:
|
|||
|
os.makedirs(self.problem_id, exist_ok=True)
|
|||
|
os.chdir(self.problem_id)
|
|||
|
self.gen_problem_md()
|
|||
|
self.gen_samples()
|
|||
|
self.gen_solutions()
|
|||
|
except Exception as e:
|
|||
|
raise RuntimeError(f"生成全部内容失败:{e}")
|
|||
|
|
|||
|
|
|||
|
if __name__ == "__main__":
|
|||
|
try:
|
|||
|
p = Problem("P1145")
|
|||
|
p.gen_all()
|
|||
|
except Exception as e:
|
|||
|
print(f"[ERROR] 发生错误:{e}")
|