在 hexo 创建独立书&影页面

最近想在博客上创建几个独立的页面,来展示我的书单,影单和旅行记录

由于是独立页面,页面样式不是博客主题的样式,需要绕过 hexo 渲染的流程。hexo 提供了 skip_render 选项,可以让 hexo 跳过对特定文件的渲染,直接将其复制到 public 目录下

下面是我在 _config.yml 中添加的配置:

1
2
skip_render:
- 'project/**' # 跳过 source/project 目录下所有文件的渲染

创建了几个独立的页面,分别是书单和影单,地址如下:

目前信息还是手动维护,后期需要添加的功能有: + 书&影信息的自动化维护:从豆瓣获取书籍信息,包括封面,评分,简介等 + 旅行页面:需要重新设计,目前只是一个空页面

旅行页面

看这篇笔记:MapLibre GL 集成笔记

书&影页面

以影单页面为例,书单页面的实现方式类似:

  1. 在豆瓣中标记看过的电影,通过脚本导出 csv 文件,包含电影标题,标记日期和链接等信息
  2. 使用 Python + playwright 爬取电影的封面,并保存到本地

导出 csv 文件

  1. 获取你的豆瓣 ID : 打开豆瓣个人主页,浏览器顶部 URL 中的数字部分就是你的 ID,例如 https://www.douban.com/people/123456 中的 12345678 就是 ID
  2. 打开浏览器控制台,输入 允许粘贴 后回车
  3. 粘贴下面的脚本并回车,注意需要脚本中 你的豆瓣ID 为你的实际 ID
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
(async () => {
let start = 0;
let movies = [];
while (true) {
const res = await fetch(`/people/你的豆瓣ID/collect?start=${start}&sort=time&rating=all&filter=all&mode=grid`);
const text = await res.text();
const doc = new DOMParser().parseFromString(text, 'text/html');
const items = [...doc.querySelectorAll('.item')].map(el => ({
title: el.querySelector('.title')?.textContent.trim(),
rating: (el.querySelector('[class^="rating"]')?.className.match(/rating(\d)-t/)?.[1] || 0),
date: el.querySelector('.date')?.textContent.trim(),
link: el.querySelector('a')?.href
}));
if (items.length === 0) break;
movies.push(...items);
start += 15;
}
console.log('共获取到', movies.length, '部影片');
console.log(movies);
// 可导出为 CSV
const csv = '标题,标记日期,链接\n' + movies.map(m => `"${m.title}","${m.date}","${m.link}"`).join('\n');
const blob = new Blob([csv], { type: 'text/csv' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'douban-movies.csv';
a.click();
})();

爬取封面

我是将 csv 文件转换成了 json 文件,保存在 movies.json 中,格式如下:

1
2
3
4
5
6
7
8
9
[
{
"title": "电影标题",
"date": "标记日期",
"link": "豆瓣链接",
"poster": "封面链接"
},
...
]

然后使用 Python + Playwright 去爬取封面图片,在执行代码之前,需要创建下面几个文件:

  1. movies.json : 包含电影信息的 json 文件
  2. dbCookies.json : 保存豆瓣的登录信息
  3. movieImg 目录 : 用于保存爬取到的封面图片,图片名称就是影片名称
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import json
import random
import re
import sys
import time
from pathlib import Path

from playwright.sync_api import sync_playwright

SAVE_DIR = Path(__file__).parent / "movieImg"
STATE_PATH = Path(__file__).parent / "dbCookies.json"

HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Referer": "https://movie.douban.com/",
}

STEALTH_SCRIPT = """
// 隐藏 webdriver 特征
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });

// 补全 plugins 数组
Object.defineProperty(navigator, 'plugins', {
get: () => [
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer' },
{ name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai' },
{ name: 'Native Client', filename: 'internal-nacl-plugin' },
],
});

// 补全 languages
Object.defineProperty(navigator, 'languages', { get: () => ['zh-CN', 'zh'] });

// 伪造 chrome 对象
window.chrome = window.chrome || {};
window.chrome.runtime = {};

// 覆盖 permissions
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (params) =>
params.name === 'notifications'
? Promise.resolve({ state: 'denied' })
: originalQuery(params);
"""


def create_context(browser, storage_state=None):
"""创建带反检测措施的 browser context"""
context = browser.new_context(
viewport={"width": 1920, "height": 1080},
storage_state=storage_state,
)
context.add_init_script(STEALTH_SCRIPT)
return context


def has_valid_login(path):
"""检查 storageState 是否包含有效的豆瓣登录态"""
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
except (json.JSONDecodeError, FileNotFoundError):
return False

for c in data.get("cookies", []):
if "douban.com" in c.get("domain", "") and c.get("name") == "dbcl2":
return True
return False


def login_and_save_state():
"""打开豆瓣登录页,等待用户手动登录后保存 storageState"""
print("请在打开的浏览器中手动登录豆瓣...")
with sync_playwright() as p:
browser = p.chromium.launch(
channel="chrome",
headless=False,
args=["--disable-blink-features=AutomationControlled"],
)
context = create_context(browser)
page = context.new_page()

try:
page.goto("https://accounts.douban.com/passport/login", timeout=30000)
page.wait_for_load_state("networkidle")
print("登录完成后,按 Enter 键继续...")
input()
context.storage_state(path=str(STATE_PATH))
print(f"✅ storageState 已保存至: {STATE_PATH}")
except Exception as e:
print(f"❌ 登录流程出错: {e}")
finally:
browser.close()


def main():
SAVE_DIR.mkdir(parents=True, exist_ok=True)

movies_path = Path(__file__).parent / "movies.json"
with open(movies_path, "r", encoding="utf-8") as f:
movies = json.load(f)

targets = [(m["title"], m["poster"]) for m in movies if m["poster"].startswith("http")]
print(f"共找到 {len(targets)} 个 HTTP poster 待下载")

with sync_playwright() as p:
browser = p.chromium.launch(
channel="chrome",
headless=False,
args=["--disable-blink-features=AutomationControlled"],
)

if STATE_PATH.exists() and has_valid_login(STATE_PATH):
context = create_context(browser, storage_state=str(STATE_PATH))
print(f"✅ 已加载登录态: {STATE_PATH}")
else:
context = create_context(browser)
print("⚠️ 未检测到有效登录态,以未登录状态运行(请先执行 --login)")

page = context.new_page()
page.set_extra_http_headers(HEADERS)

for title, url in targets:
safe_name = re.sub(r'[\\/:*?"<>|]', '_', title)
save_path = str(SAVE_DIR / f"{safe_name}.jpg")
print(f"\n正在处理: {url}")

try:
page.goto(url, timeout=15000)
cover_selector = "#mainpic img"
page.wait_for_selector(cover_selector, timeout=10000)

cover_img = page.query_selector(cover_selector)
img_src = cover_img.get_attribute("src")
print(f"找到图片 URL: {img_src}")

img_data = context.request.get(img_src, headers={"Referer": "https://movie.douban.com/"}).body()
with open(save_path, "wb") as f:
f.write(img_data)

print(f"封面已保存至: {save_path}")
except Exception as e:
print(f"下载失败 [{url}]: {e}")

delay = random.uniform(1, 3)
print(f"等待 {delay:.1f} 秒...")
time.sleep(delay)

browser.close()


if __name__ == "__main__":
if "--login" in sys.argv:
login_and_save_state()
else:
main()

效果如下

执行结果

最后将图片复制到博客的 source/project/movies/images 目录下,在影单页面就可以看到这些图片了