177 lines
7.5 KiB
Python
177 lines
7.5 KiB
Python
"""
|
||
Douyin Cookie Fetcher (T1)
|
||
用途: 模拟打开浏览器,用户扫码登录后自动获取 Cookie 并保存到 config_douyin.yml
|
||
"""
|
||
|
||
import sys
|
||
import os
|
||
import json
|
||
import yaml
|
||
import asyncio
|
||
import io
|
||
from pathlib import Path
|
||
|
||
# 强制设置控制台输出编码为 UTF-8,解决 Windows 环境下的乱码问题
|
||
if sys.platform.startswith('win'):
|
||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
|
||
|
||
# =================================================================
|
||
# 1. 环境配置与路径初始化
|
||
# =================================================================
|
||
current_file_path = os.path.abspath(__file__)
|
||
current_dir = os.path.dirname(current_file_path)
|
||
config_path = os.path.join(current_dir, "config_douyin.yml")
|
||
|
||
# =================================================================
|
||
# 2. Cookie 过滤与更新逻辑
|
||
# =================================================================
|
||
|
||
# 我们关注的核心 Cookie 键名
|
||
# 进一步扩大范围以包含所有可能的校验 Cookie
|
||
REQUIRED_KEYS = {
|
||
"msToken", "ttwid", "odin_tt", "passport_csrf_token", "sid_guard",
|
||
"sessionid", "sid_tt", "uid_tt", "uid_tt_ss", "sid_ucp_v1",
|
||
"ssid_ucp_v1", "n_sdk_version", "s_v_web_id", "webid",
|
||
"__ac_nonce", "__ac_signature", "fp", "device_web_cpu_core",
|
||
"device_web_memory_size", "architecture", "hevc_supported"
|
||
}
|
||
|
||
def update_config_yaml(new_cookies):
|
||
"""将获取到的 Cookie 更新到 config_douyin.yml"""
|
||
config_data = {}
|
||
|
||
# 1. 读取现有配置
|
||
if os.path.exists(config_path):
|
||
try:
|
||
with open(config_path, 'r', encoding='utf-8') as f:
|
||
config_data = yaml.safe_load(f) or {}
|
||
except Exception as e:
|
||
print(f"[ Warning ]: Failed to read existing config: {e}")
|
||
|
||
# 2. 更新 cookies 字段
|
||
if "cookies" not in config_data:
|
||
config_data["cookies"] = {}
|
||
|
||
# 过滤出我们需要的键
|
||
filtered_cookies = {k: v for k, v in new_cookies.items() if k in REQUIRED_KEYS}
|
||
config_data["cookies"].update(filtered_cookies)
|
||
|
||
# 同时更新全局 cookie 字符串 - 包含所有捕获到的 cookie 以确保完整性
|
||
cookie_str = "; ".join(f"{k}={v}" for k, v in new_cookies.items())
|
||
config_data["cookie"] = cookie_str
|
||
|
||
# 3. 写回文件
|
||
try:
|
||
with open(config_path, 'w', encoding='utf-8') as f:
|
||
yaml.dump(config_data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
|
||
print(f"\n[ Success ]: Cookie updated to: {config_path}")
|
||
print(f"[ Info ]: Captured {len(new_cookies)} total cookies, {len(filtered_cookies)} essential keys saved.")
|
||
except Exception as e:
|
||
print(f"[ Error ]: Failed to write config: {e}")
|
||
|
||
# =================================================================
|
||
# 3. Playwright 自动化逻辑
|
||
# =================================================================
|
||
|
||
async def main():
|
||
try:
|
||
from playwright.async_api import async_playwright
|
||
except ImportError:
|
||
print("[ Error ]: Playwright not installed. Run: pip install playwright && playwright install chromium")
|
||
return
|
||
|
||
print("\n" + "="*60)
|
||
print(" Douyin Login & Cookie Fetcher")
|
||
print("="*60)
|
||
print("[ Instructions ]:")
|
||
print("1. Browser will open and navigate to Douyin.")
|
||
print("2. Please login via QR code scanning.")
|
||
print("3. After login, return here and press [ENTER] to save cookies.")
|
||
print("="*60 + "\n")
|
||
|
||
async with async_playwright() as p:
|
||
# 启动浏览器
|
||
browser = await p.chromium.launch(headless=False)
|
||
context = await browser.new_context(
|
||
viewport={'width': 1280, 'height': 800},
|
||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
|
||
)
|
||
page = await context.new_page()
|
||
|
||
# 跳转到抖音
|
||
# 优化: 使用 'domcontentloaded' 提高在弱网环境下的加载成功率,并增加超时到 90s
|
||
print("[ Navigating ]: https://www.douyin.com/ ...")
|
||
try:
|
||
await page.goto("https://www.douyin.com/", wait_until="domcontentloaded", timeout=90000)
|
||
except Exception as e:
|
||
print(f"[ Warning ]: Initial navigation timed out or failed: {e}")
|
||
print("[ Info ]: Will continue anyway, please check if the page is visible.")
|
||
|
||
# =================================================================
|
||
# 自动检测登录 + 手动回车双重保险
|
||
# =================================================================
|
||
print("\n" + "-"*40)
|
||
print("[ Waiting ]: Please complete the QR login in the browser.")
|
||
print("[ Auto-Detect ]: The script will automatically proceed if login is detected.")
|
||
print("[ Manual ]: If auto-detect fails, type 'go' and press [ENTER] here.")
|
||
print("-"*40 + "\n")
|
||
|
||
# 尝试自动检测登录成功的元素(比如头像或“发布视频”按钮)
|
||
login_detected = False
|
||
for _ in range(120): # 最多等待 120 秒
|
||
try:
|
||
# 检查是否存在登录后的特有元素
|
||
# .dy-avatar 是头像,.upload-video-text 是发布视频按钮
|
||
if await page.query_selector('.dy-avatar') or await page.query_selector('text=发布视频'):
|
||
print("[ Success ]: Login detected automatically!")
|
||
login_detected = True
|
||
break
|
||
except:
|
||
pass
|
||
await asyncio.sleep(1)
|
||
|
||
if not login_detected:
|
||
# 如果自动检测没成功,再尝试手动输入
|
||
print("[ Timeout ]: Auto-detection timed out. Please ensure you are logged in.")
|
||
print("[ Action ]: Type anything and press [ENTER] to force capture cookies:")
|
||
await asyncio.to_thread(sys.stdin.readline)
|
||
|
||
# 获取所有 Cookie
|
||
all_cookies = await context.cookies()
|
||
cookie_dict = {c['name']: c['value'] for c in all_cookies}
|
||
|
||
# 尝试捕获动态参数 msToken (如果存在于页面中)
|
||
try:
|
||
# 1. 尝试从 localStorage 获取
|
||
ms_token = await page.evaluate("() => window.localStorage.getItem('msToken') || ''")
|
||
|
||
# 2. 如果没获取到,尝试从所有 Cookie 中找最新的 msToken
|
||
if not ms_token:
|
||
ms_token = cookie_dict.get('msToken', '')
|
||
|
||
# 3. 尝试从页面全局变量中寻找
|
||
if not ms_token:
|
||
ms_token = await page.evaluate("() => window._ROUTER_DATA?.msToken || ''")
|
||
|
||
if ms_token:
|
||
cookie_dict['msToken'] = ms_token
|
||
print(f"[ Info ]: Captured msToken: {ms_token[:20]}...")
|
||
else:
|
||
print("[ Warning ]: msToken not found in common locations.")
|
||
except Exception as e:
|
||
print(f"[ Debug ]: Error capturing msToken: {e}")
|
||
|
||
# 关闭浏览器
|
||
await context.close()
|
||
await browser.close()
|
||
|
||
# 更新配置文件
|
||
if cookie_dict:
|
||
update_config_yaml(cookie_dict)
|
||
else:
|
||
print("[ Failed ]: No cookies captured. Did you login successfully?")
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(main())
|