luogu-cli/models.py
2025-05-01 14:33:06 +08:00

168 lines
6.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import json
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from requests.exceptions import RequestException
# 加载 .env 文件
load_dotenv()
# 请求头与 Cookie
headers = {
'User-Agent': (
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) '
'Gecko/20100101 Firefox/115.0'
)
}
cookies = {
"__client_id": os.getenv("CLIENT_ID"),
"_uid": os.getenv("UID"),
"cf_clearance": os.getenv("CF_CLEARANCE"),
"C3VK": os.getenv("C3VK")
}
BASE_HEADERS = {
'User-Agent': (
'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Mobile Safari/537.36'
),
'Origin': 'https://www.luogu.com.cn',
'Content-Type': 'application/json',
'X-Requested-With': 'XMLHttpRequest'
}
class Problem:
def __init__(self, problem_id):
self.problem_id = problem_id
self.problem_url = f"https://luogu.com.cn/problem/{problem_id}"
self.html = None
self.json = None
self.title = ""
self.background = ""
self.description = ""
self.formatI = ""
self.formatO = ""
self.hint = ""
self.samples = []
try:
resp = requests.get(self.problem_url, headers=headers, cookies=cookies, timeout=10)
resp.raise_for_status()
self.html = resp.text
except RequestException as e:
raise ConnectionError(f"无法访问题目页面:{self.problem_url},错误:{e}")
try:
self.json = self._extract_json_from_html(self.html)['data']['problem']
except (KeyError, ValueError) as e:
raise RuntimeError(f"解析题目数据失败:{e}")
self.title = self.json.get('title', '')
self.background = self.json['contenu'].get('background', '')
self.description = self.json['contenu'].get('description', '')
self.formatI = self.json['contenu'].get('formatI', '')
self.formatO = self.json['contenu'].get('formatO', '')
self.hint = self.json['contenu'].get('hint', '')
self.samples = self.json.get('samples', [])
def _extract_json_from_html(self, html_content):
try:
soup = BeautifulSoup(html_content, 'html.parser')
script_tag = soup.find('script', {'id': 'lentille-context', 'type': 'application/json'})
if not script_tag:
raise ValueError("无法找到指定的JSON脚本标签")
return json.loads(script_tag.string)
except json.JSONDecodeError as e:
raise ValueError(f"JSON 解析失败:{e}")
except Exception as e:
raise RuntimeError(f"HTML 解析失败:{e}")
def gen_problem_md(self):
try:
md = f"# {self.title}"
if self.background:
md += f"\n\n## 【题目背景】\n{self.background}"
if self.description:
md += f"\n\n## 【题目描述】\n{self.description}"
if self.formatI:
md += f"\n\n## 【输入格式】\n{self.formatI}"
if self.formatO:
md += f"\n\n## 【输出格式】\n{self.formatO}"
if self.hint:
md += f"\n\n## 【提示】\n{self.hint}"
with open(f"{self.problem_id}.md", "w", encoding="utf8") as f:
f.write(md)
except IOError as e:
raise RuntimeError(f"写入题面Markdown文件失败{e}")
def gen_samples(self):
try:
os.makedirs("samples", exist_ok=True)
cnt = 1
for sample in self.samples:
with open(f"samples/sample_{cnt}.in", "w", encoding="utf8") as f:
f.write(sample[0])
with open(f"samples/sample_{cnt}.out", "w", encoding="utf8") as f:
f.write(sample[1])
cnt += 1
except (IOError, IndexError) as e:
raise RuntimeError(f"写入样例文件失败:{e}")
def gen_solutions(self):
solution_url = f"https://luogu.com.cn/problem/solution/{self.problem_id}"
try:
resp = requests.get(solution_url, headers=headers, cookies=cookies, timeout=10)
resp.raise_for_status()
except RequestException as e:
raise ConnectionError(f"无法访问题解页面:{solution_url},错误:{e}")
try:
solutions_data = self._extract_json_from_html(resp.text).get("data", {})
solutions = solutions_data.get("solutions", {}).get("result", [])
except (KeyError, ValueError) as e:
raise RuntimeError(f"解析题解数据失败:{e}")
try:
os.makedirs("solutions", exist_ok=True)
for solution in solutions:
author = solution.get('author', {})
title = solution.get('title', '未命名题解')
lid = solution.get('lid', 'unknown')
content = solution.get('content', '')
markdown_content = (
f"# {title}\n"
f"> By ![{author.get('name', '')}]({author.get('avatar', '')}?"
"x-oss-process=image/resize,m_fixed,h_30,w_30,image/circle,r_100/format,png)"
f"[{author.get('name', '')}](https://luogu.com.cn/user/{author.get('uid', '')})"
"\n\n"
f"{content}"
)
filename = f"solutions/{title}_{lid}.md"
with open(filename, "w", encoding="utf8") as f:
f.write(markdown_content)
except IOError as e:
raise RuntimeError(f"写入题解文件失败:{e}")
def gen_all(self):
try:
os.makedirs(self.problem_id, exist_ok=True)
os.chdir(self.problem_id)
self.gen_problem_md()
self.gen_samples()
self.gen_solutions()
except Exception as e:
raise RuntimeError(f"生成全部内容失败:{e}")
if __name__ == "__main__":
try:
p = Problem("P1145")
p.gen_all()
except Exception as e:
print(f"[ERROR] 发生错误:{e}")