177 lines
7.5 KiB
Python
177 lines
7.5 KiB
Python
|
|
"""
|
|||
|
|
Douyin Cookie Fetcher (T1)
|
|||
|
|
用途: 模拟打开浏览器,用户扫码登录后自动获取 Cookie 并保存到 config_douyin.yml
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import sys
|
|||
|
|
import os
|
|||
|
|
import json
|
|||
|
|
import yaml
|
|||
|
|
import asyncio
|
|||
|
|
import io
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
# 强制设置控制台输出编码为 UTF-8,解决 Windows 环境下的乱码问题
|
|||
|
|
if sys.platform.startswith('win'):
|
|||
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
|||
|
|
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
|
|||
|
|
|
|||
|
|
# =================================================================
|
|||
|
|
# 1. 环境配置与路径初始化
|
|||
|
|
# =================================================================
|
|||
|
|
current_file_path = os.path.abspath(__file__)
|
|||
|
|
current_dir = os.path.dirname(current_file_path)
|
|||
|
|
config_path = os.path.join(current_dir, "config_douyin.yml")
|
|||
|
|
|
|||
|
|
# =================================================================
|
|||
|
|
# 2. Cookie 过滤与更新逻辑
|
|||
|
|
# =================================================================
|
|||
|
|
|
|||
|
|
# 我们关注的核心 Cookie 键名
|
|||
|
|
# 进一步扩大范围以包含所有可能的校验 Cookie
|
|||
|
|
REQUIRED_KEYS = {
|
|||
|
|
"msToken", "ttwid", "odin_tt", "passport_csrf_token", "sid_guard",
|
|||
|
|
"sessionid", "sid_tt", "uid_tt", "uid_tt_ss", "sid_ucp_v1",
|
|||
|
|
"ssid_ucp_v1", "n_sdk_version", "s_v_web_id", "webid",
|
|||
|
|
"__ac_nonce", "__ac_signature", "fp", "device_web_cpu_core",
|
|||
|
|
"device_web_memory_size", "architecture", "hevc_supported"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
def update_config_yaml(new_cookies):
|
|||
|
|
"""将获取到的 Cookie 更新到 config_douyin.yml"""
|
|||
|
|
config_data = {}
|
|||
|
|
|
|||
|
|
# 1. 读取现有配置
|
|||
|
|
if os.path.exists(config_path):
|
|||
|
|
try:
|
|||
|
|
with open(config_path, 'r', encoding='utf-8') as f:
|
|||
|
|
config_data = yaml.safe_load(f) or {}
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"[ Warning ]: Failed to read existing config: {e}")
|
|||
|
|
|
|||
|
|
# 2. 更新 cookies 字段
|
|||
|
|
if "cookies" not in config_data:
|
|||
|
|
config_data["cookies"] = {}
|
|||
|
|
|
|||
|
|
# 过滤出我们需要的键
|
|||
|
|
filtered_cookies = {k: v for k, v in new_cookies.items() if k in REQUIRED_KEYS}
|
|||
|
|
config_data["cookies"].update(filtered_cookies)
|
|||
|
|
|
|||
|
|
# 同时更新全局 cookie 字符串 - 包含所有捕获到的 cookie 以确保完整性
|
|||
|
|
cookie_str = "; ".join(f"{k}={v}" for k, v in new_cookies.items())
|
|||
|
|
config_data["cookie"] = cookie_str
|
|||
|
|
|
|||
|
|
# 3. 写回文件
|
|||
|
|
try:
|
|||
|
|
with open(config_path, 'w', encoding='utf-8') as f:
|
|||
|
|
yaml.dump(config_data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
|
|||
|
|
print(f"\n[ Success ]: Cookie updated to: {config_path}")
|
|||
|
|
print(f"[ Info ]: Captured {len(new_cookies)} total cookies, {len(filtered_cookies)} essential keys saved.")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"[ Error ]: Failed to write config: {e}")
|
|||
|
|
|
|||
|
|
# =================================================================
|
|||
|
|
# 3. Playwright 自动化逻辑
|
|||
|
|
# =================================================================
|
|||
|
|
|
|||
|
|
async def main():
|
|||
|
|
try:
|
|||
|
|
from playwright.async_api import async_playwright
|
|||
|
|
except ImportError:
|
|||
|
|
print("[ Error ]: Playwright not installed. Run: pip install playwright && playwright install chromium")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
print("\n" + "="*60)
|
|||
|
|
print(" Douyin Login & Cookie Fetcher")
|
|||
|
|
print("="*60)
|
|||
|
|
print("[ Instructions ]:")
|
|||
|
|
print("1. Browser will open and navigate to Douyin.")
|
|||
|
|
print("2. Please login via QR code scanning.")
|
|||
|
|
print("3. After login, return here and press [ENTER] to save cookies.")
|
|||
|
|
print("="*60 + "\n")
|
|||
|
|
|
|||
|
|
async with async_playwright() as p:
|
|||
|
|
# 启动浏览器
|
|||
|
|
browser = await p.chromium.launch(headless=False)
|
|||
|
|
context = await browser.new_context(
|
|||
|
|
viewport={'width': 1280, 'height': 800},
|
|||
|
|
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
|
|||
|
|
)
|
|||
|
|
page = await context.new_page()
|
|||
|
|
|
|||
|
|
# 跳转到抖音
|
|||
|
|
# 优化: 使用 'domcontentloaded' 提高在弱网环境下的加载成功率,并增加超时到 90s
|
|||
|
|
print("[ Navigating ]: https://www.douyin.com/ ...")
|
|||
|
|
try:
|
|||
|
|
await page.goto("https://www.douyin.com/", wait_until="domcontentloaded", timeout=90000)
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"[ Warning ]: Initial navigation timed out or failed: {e}")
|
|||
|
|
print("[ Info ]: Will continue anyway, please check if the page is visible.")
|
|||
|
|
|
|||
|
|
# =================================================================
|
|||
|
|
# 自动检测登录 + 手动回车双重保险
|
|||
|
|
# =================================================================
|
|||
|
|
print("\n" + "-"*40)
|
|||
|
|
print("[ Waiting ]: Please complete the QR login in the browser.")
|
|||
|
|
print("[ Auto-Detect ]: The script will automatically proceed if login is detected.")
|
|||
|
|
print("[ Manual ]: If auto-detect fails, type 'go' and press [ENTER] here.")
|
|||
|
|
print("-"*40 + "\n")
|
|||
|
|
|
|||
|
|
# 尝试自动检测登录成功的元素(比如头像或“发布视频”按钮)
|
|||
|
|
login_detected = False
|
|||
|
|
for _ in range(120): # 最多等待 120 秒
|
|||
|
|
try:
|
|||
|
|
# 检查是否存在登录后的特有元素
|
|||
|
|
# .dy-avatar 是头像,.upload-video-text 是发布视频按钮
|
|||
|
|
if await page.query_selector('.dy-avatar') or await page.query_selector('text=发布视频'):
|
|||
|
|
print("[ Success ]: Login detected automatically!")
|
|||
|
|
login_detected = True
|
|||
|
|
break
|
|||
|
|
except:
|
|||
|
|
pass
|
|||
|
|
await asyncio.sleep(1)
|
|||
|
|
|
|||
|
|
if not login_detected:
|
|||
|
|
# 如果自动检测没成功,再尝试手动输入
|
|||
|
|
print("[ Timeout ]: Auto-detection timed out. Please ensure you are logged in.")
|
|||
|
|
print("[ Action ]: Type anything and press [ENTER] to force capture cookies:")
|
|||
|
|
await asyncio.to_thread(sys.stdin.readline)
|
|||
|
|
|
|||
|
|
# 获取所有 Cookie
|
|||
|
|
all_cookies = await context.cookies()
|
|||
|
|
cookie_dict = {c['name']: c['value'] for c in all_cookies}
|
|||
|
|
|
|||
|
|
# 尝试捕获动态参数 msToken (如果存在于页面中)
|
|||
|
|
try:
|
|||
|
|
# 1. 尝试从 localStorage 获取
|
|||
|
|
ms_token = await page.evaluate("() => window.localStorage.getItem('msToken') || ''")
|
|||
|
|
|
|||
|
|
# 2. 如果没获取到,尝试从所有 Cookie 中找最新的 msToken
|
|||
|
|
if not ms_token:
|
|||
|
|
ms_token = cookie_dict.get('msToken', '')
|
|||
|
|
|
|||
|
|
# 3. 尝试从页面全局变量中寻找
|
|||
|
|
if not ms_token:
|
|||
|
|
ms_token = await page.evaluate("() => window._ROUTER_DATA?.msToken || ''")
|
|||
|
|
|
|||
|
|
if ms_token:
|
|||
|
|
cookie_dict['msToken'] = ms_token
|
|||
|
|
print(f"[ Info ]: Captured msToken: {ms_token[:20]}...")
|
|||
|
|
else:
|
|||
|
|
print("[ Warning ]: msToken not found in common locations.")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"[ Debug ]: Error capturing msToken: {e}")
|
|||
|
|
|
|||
|
|
# 关闭浏览器
|
|||
|
|
await context.close()
|
|||
|
|
await browser.close()
|
|||
|
|
|
|||
|
|
# 更新配置文件
|
|||
|
|
if cookie_dict:
|
|||
|
|
update_config_yaml(cookie_dict)
|
|||
|
|
else:
|
|||
|
|
print("[ Failed ]: No cookies captured. Did you login successfully?")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
asyncio.run(main())
|