Files
aiData/DouYin/Test/T1_GetCookie.py

177 lines
7.5 KiB
Python
Raw Normal View History

2026-02-27 14:57:13 +08:00
"""
Douyin Cookie Fetcher (T1)
用途: 模拟打开浏览器用户扫码登录后自动获取 Cookie 并保存到 config_douyin.yml
"""
import sys
import os
import json
import yaml
import asyncio
import io
from pathlib import Path
# 强制设置控制台输出编码为 UTF-8解决 Windows 环境下的乱码问题
if sys.platform.startswith('win'):
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
# =================================================================
# 1. 环境配置与路径初始化
# =================================================================
current_file_path = os.path.abspath(__file__)
current_dir = os.path.dirname(current_file_path)
config_path = os.path.join(current_dir, "config_douyin.yml")
# =================================================================
# 2. Cookie 过滤与更新逻辑
# =================================================================
# 我们关注的核心 Cookie 键名
# 进一步扩大范围以包含所有可能的校验 Cookie
REQUIRED_KEYS = {
"msToken", "ttwid", "odin_tt", "passport_csrf_token", "sid_guard",
"sessionid", "sid_tt", "uid_tt", "uid_tt_ss", "sid_ucp_v1",
"ssid_ucp_v1", "n_sdk_version", "s_v_web_id", "webid",
"__ac_nonce", "__ac_signature", "fp", "device_web_cpu_core",
"device_web_memory_size", "architecture", "hevc_supported"
}
def update_config_yaml(new_cookies):
"""将获取到的 Cookie 更新到 config_douyin.yml"""
config_data = {}
# 1. 读取现有配置
if os.path.exists(config_path):
try:
with open(config_path, 'r', encoding='utf-8') as f:
config_data = yaml.safe_load(f) or {}
except Exception as e:
print(f"[ Warning ]: Failed to read existing config: {e}")
# 2. 更新 cookies 字段
if "cookies" not in config_data:
config_data["cookies"] = {}
# 过滤出我们需要的键
filtered_cookies = {k: v for k, v in new_cookies.items() if k in REQUIRED_KEYS}
config_data["cookies"].update(filtered_cookies)
# 同时更新全局 cookie 字符串 - 包含所有捕获到的 cookie 以确保完整性
cookie_str = "; ".join(f"{k}={v}" for k, v in new_cookies.items())
config_data["cookie"] = cookie_str
# 3. 写回文件
try:
with open(config_path, 'w', encoding='utf-8') as f:
yaml.dump(config_data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
print(f"\n[ Success ]: Cookie updated to: {config_path}")
print(f"[ Info ]: Captured {len(new_cookies)} total cookies, {len(filtered_cookies)} essential keys saved.")
except Exception as e:
print(f"[ Error ]: Failed to write config: {e}")
# =================================================================
# 3. Playwright 自动化逻辑
# =================================================================
async def main():
try:
from playwright.async_api import async_playwright
except ImportError:
print("[ Error ]: Playwright not installed. Run: pip install playwright && playwright install chromium")
return
print("\n" + "="*60)
print(" Douyin Login & Cookie Fetcher")
print("="*60)
print("[ Instructions ]:")
print("1. Browser will open and navigate to Douyin.")
print("2. Please login via QR code scanning.")
print("3. After login, return here and press [ENTER] to save cookies.")
print("="*60 + "\n")
async with async_playwright() as p:
# 启动浏览器
browser = await p.chromium.launch(headless=False)
context = await browser.new_context(
viewport={'width': 1280, 'height': 800},
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
)
page = await context.new_page()
# 跳转到抖音
# 优化: 使用 'domcontentloaded' 提高在弱网环境下的加载成功率,并增加超时到 90s
print("[ Navigating ]: https://www.douyin.com/ ...")
try:
await page.goto("https://www.douyin.com/", wait_until="domcontentloaded", timeout=90000)
except Exception as e:
print(f"[ Warning ]: Initial navigation timed out or failed: {e}")
print("[ Info ]: Will continue anyway, please check if the page is visible.")
# =================================================================
# 自动检测登录 + 手动回车双重保险
# =================================================================
print("\n" + "-"*40)
print("[ Waiting ]: Please complete the QR login in the browser.")
print("[ Auto-Detect ]: The script will automatically proceed if login is detected.")
print("[ Manual ]: If auto-detect fails, type 'go' and press [ENTER] here.")
print("-"*40 + "\n")
# 尝试自动检测登录成功的元素(比如头像或“发布视频”按钮)
login_detected = False
for _ in range(120): # 最多等待 120 秒
try:
# 检查是否存在登录后的特有元素
# .dy-avatar 是头像,.upload-video-text 是发布视频按钮
if await page.query_selector('.dy-avatar') or await page.query_selector('text=发布视频'):
print("[ Success ]: Login detected automatically!")
login_detected = True
break
except:
pass
await asyncio.sleep(1)
if not login_detected:
# 如果自动检测没成功,再尝试手动输入
print("[ Timeout ]: Auto-detection timed out. Please ensure you are logged in.")
print("[ Action ]: Type anything and press [ENTER] to force capture cookies:")
await asyncio.to_thread(sys.stdin.readline)
# 获取所有 Cookie
all_cookies = await context.cookies()
cookie_dict = {c['name']: c['value'] for c in all_cookies}
# 尝试捕获动态参数 msToken (如果存在于页面中)
try:
# 1. 尝试从 localStorage 获取
ms_token = await page.evaluate("() => window.localStorage.getItem('msToken') || ''")
# 2. 如果没获取到,尝试从所有 Cookie 中找最新的 msToken
if not ms_token:
ms_token = cookie_dict.get('msToken', '')
# 3. 尝试从页面全局变量中寻找
if not ms_token:
ms_token = await page.evaluate("() => window._ROUTER_DATA?.msToken || ''")
if ms_token:
cookie_dict['msToken'] = ms_token
print(f"[ Info ]: Captured msToken: {ms_token[:20]}...")
else:
print("[ Warning ]: msToken not found in common locations.")
except Exception as e:
print(f"[ Debug ]: Error capturing msToken: {e}")
# 关闭浏览器
await context.close()
await browser.close()
# 更新配置文件
if cookie_dict:
update_config_yaml(cookie_dict)
else:
print("[ Failed ]: No cookies captured. Did you login successfully?")
if __name__ == "__main__":
asyncio.run(main())