Files
aiData/DouYin/Test/T1_GetCookie.py
HuangHai f665e38bc0 'commit'
2026-02-27 14:57:13 +08:00

177 lines
7.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Douyin Cookie Fetcher (T1)
用途: 模拟打开浏览器,用户扫码登录后自动获取 Cookie 并保存到 config_douyin.yml
"""
import sys
import os
import json
import yaml
import asyncio
import io
from pathlib import Path
# 强制设置控制台输出编码为 UTF-8解决 Windows 环境下的乱码问题
if sys.platform.startswith('win'):
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
# =================================================================
# 1. 环境配置与路径初始化
# =================================================================
current_file_path = os.path.abspath(__file__)
current_dir = os.path.dirname(current_file_path)
config_path = os.path.join(current_dir, "config_douyin.yml")
# =================================================================
# 2. Cookie 过滤与更新逻辑
# =================================================================
# 我们关注的核心 Cookie 键名
# 进一步扩大范围以包含所有可能的校验 Cookie
REQUIRED_KEYS = {
"msToken", "ttwid", "odin_tt", "passport_csrf_token", "sid_guard",
"sessionid", "sid_tt", "uid_tt", "uid_tt_ss", "sid_ucp_v1",
"ssid_ucp_v1", "n_sdk_version", "s_v_web_id", "webid",
"__ac_nonce", "__ac_signature", "fp", "device_web_cpu_core",
"device_web_memory_size", "architecture", "hevc_supported"
}
def update_config_yaml(new_cookies):
"""将获取到的 Cookie 更新到 config_douyin.yml"""
config_data = {}
# 1. 读取现有配置
if os.path.exists(config_path):
try:
with open(config_path, 'r', encoding='utf-8') as f:
config_data = yaml.safe_load(f) or {}
except Exception as e:
print(f"[ Warning ]: Failed to read existing config: {e}")
# 2. 更新 cookies 字段
if "cookies" not in config_data:
config_data["cookies"] = {}
# 过滤出我们需要的键
filtered_cookies = {k: v for k, v in new_cookies.items() if k in REQUIRED_KEYS}
config_data["cookies"].update(filtered_cookies)
# 同时更新全局 cookie 字符串 - 包含所有捕获到的 cookie 以确保完整性
cookie_str = "; ".join(f"{k}={v}" for k, v in new_cookies.items())
config_data["cookie"] = cookie_str
# 3. 写回文件
try:
with open(config_path, 'w', encoding='utf-8') as f:
yaml.dump(config_data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
print(f"\n[ Success ]: Cookie updated to: {config_path}")
print(f"[ Info ]: Captured {len(new_cookies)} total cookies, {len(filtered_cookies)} essential keys saved.")
except Exception as e:
print(f"[ Error ]: Failed to write config: {e}")
# =================================================================
# 3. Playwright 自动化逻辑
# =================================================================
async def main():
try:
from playwright.async_api import async_playwright
except ImportError:
print("[ Error ]: Playwright not installed. Run: pip install playwright && playwright install chromium")
return
print("\n" + "="*60)
print(" Douyin Login & Cookie Fetcher")
print("="*60)
print("[ Instructions ]:")
print("1. Browser will open and navigate to Douyin.")
print("2. Please login via QR code scanning.")
print("3. After login, return here and press [ENTER] to save cookies.")
print("="*60 + "\n")
async with async_playwright() as p:
# 启动浏览器
browser = await p.chromium.launch(headless=False)
context = await browser.new_context(
viewport={'width': 1280, 'height': 800},
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
)
page = await context.new_page()
# 跳转到抖音
# 优化: 使用 'domcontentloaded' 提高在弱网环境下的加载成功率,并增加超时到 90s
print("[ Navigating ]: https://www.douyin.com/ ...")
try:
await page.goto("https://www.douyin.com/", wait_until="domcontentloaded", timeout=90000)
except Exception as e:
print(f"[ Warning ]: Initial navigation timed out or failed: {e}")
print("[ Info ]: Will continue anyway, please check if the page is visible.")
# =================================================================
# 自动检测登录 + 手动回车双重保险
# =================================================================
print("\n" + "-"*40)
print("[ Waiting ]: Please complete the QR login in the browser.")
print("[ Auto-Detect ]: The script will automatically proceed if login is detected.")
print("[ Manual ]: If auto-detect fails, type 'go' and press [ENTER] here.")
print("-"*40 + "\n")
# 尝试自动检测登录成功的元素(比如头像或“发布视频”按钮)
login_detected = False
for _ in range(120): # 最多等待 120 秒
try:
# 检查是否存在登录后的特有元素
# .dy-avatar 是头像,.upload-video-text 是发布视频按钮
if await page.query_selector('.dy-avatar') or await page.query_selector('text=发布视频'):
print("[ Success ]: Login detected automatically!")
login_detected = True
break
except:
pass
await asyncio.sleep(1)
if not login_detected:
# 如果自动检测没成功,再尝试手动输入
print("[ Timeout ]: Auto-detection timed out. Please ensure you are logged in.")
print("[ Action ]: Type anything and press [ENTER] to force capture cookies:")
await asyncio.to_thread(sys.stdin.readline)
# 获取所有 Cookie
all_cookies = await context.cookies()
cookie_dict = {c['name']: c['value'] for c in all_cookies}
# 尝试捕获动态参数 msToken (如果存在于页面中)
try:
# 1. 尝试从 localStorage 获取
ms_token = await page.evaluate("() => window.localStorage.getItem('msToken') || ''")
# 2. 如果没获取到,尝试从所有 Cookie 中找最新的 msToken
if not ms_token:
ms_token = cookie_dict.get('msToken', '')
# 3. 尝试从页面全局变量中寻找
if not ms_token:
ms_token = await page.evaluate("() => window._ROUTER_DATA?.msToken || ''")
if ms_token:
cookie_dict['msToken'] = ms_token
print(f"[ Info ]: Captured msToken: {ms_token[:20]}...")
else:
print("[ Warning ]: msToken not found in common locations.")
except Exception as e:
print(f"[ Debug ]: Error capturing msToken: {e}")
# 关闭浏览器
await context.close()
await browser.close()
# 更新配置文件
if cookie_dict:
update_config_yaml(cookie_dict)
else:
print("[ Failed ]: No cookies captured. Did you login successfully?")
if __name__ == "__main__":
asyncio.run(main())