""" Douyin Cookie Fetcher (T1) 用途: 模拟打开浏览器,用户扫码登录后自动获取 Cookie 并保存到 config_douyin.yml """ import sys import os import json import yaml import asyncio import io from pathlib import Path # 强制设置控制台输出编码为 UTF-8,解决 Windows 环境下的乱码问题 if sys.platform.startswith('win'): sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') # ================================================================= # 1. 环境配置与路径初始化 # ================================================================= current_file_path = os.path.abspath(__file__) current_dir = os.path.dirname(current_file_path) config_path = os.path.join(current_dir, "config_douyin.yml") # ================================================================= # 2. Cookie 过滤与更新逻辑 # ================================================================= # 我们关注的核心 Cookie 键名 # 进一步扩大范围以包含所有可能的校验 Cookie REQUIRED_KEYS = { "msToken", "ttwid", "odin_tt", "passport_csrf_token", "sid_guard", "sessionid", "sid_tt", "uid_tt", "uid_tt_ss", "sid_ucp_v1", "ssid_ucp_v1", "n_sdk_version", "s_v_web_id", "webid", "__ac_nonce", "__ac_signature", "fp", "device_web_cpu_core", "device_web_memory_size", "architecture", "hevc_supported" } def update_config_yaml(new_cookies): """将获取到的 Cookie 更新到 config_douyin.yml""" config_data = {} # 1. 读取现有配置 if os.path.exists(config_path): try: with open(config_path, 'r', encoding='utf-8') as f: config_data = yaml.safe_load(f) or {} except Exception as e: print(f"[ Warning ]: Failed to read existing config: {e}") # 2. 更新 cookies 字段 if "cookies" not in config_data: config_data["cookies"] = {} # 过滤出我们需要的键 filtered_cookies = {k: v for k, v in new_cookies.items() if k in REQUIRED_KEYS} config_data["cookies"].update(filtered_cookies) # 同时更新全局 cookie 字符串 - 包含所有捕获到的 cookie 以确保完整性 cookie_str = "; ".join(f"{k}={v}" for k, v in new_cookies.items()) config_data["cookie"] = cookie_str # 3. 写回文件 try: with open(config_path, 'w', encoding='utf-8') as f: yaml.dump(config_data, f, allow_unicode=True, default_flow_style=False, sort_keys=False) print(f"\n[ Success ]: Cookie updated to: {config_path}") print(f"[ Info ]: Captured {len(new_cookies)} total cookies, {len(filtered_cookies)} essential keys saved.") except Exception as e: print(f"[ Error ]: Failed to write config: {e}") # ================================================================= # 3. Playwright 自动化逻辑 # ================================================================= async def main(): try: from playwright.async_api import async_playwright except ImportError: print("[ Error ]: Playwright not installed. Run: pip install playwright && playwright install chromium") return print("\n" + "="*60) print(" Douyin Login & Cookie Fetcher") print("="*60) print("[ Instructions ]:") print("1. Browser will open and navigate to Douyin.") print("2. Please login via QR code scanning.") print("3. After login, return here and press [ENTER] to save cookies.") print("="*60 + "\n") async with async_playwright() as p: # 启动浏览器 browser = await p.chromium.launch(headless=False) context = await browser.new_context( viewport={'width': 1280, 'height': 800}, user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36' ) page = await context.new_page() # 跳转到抖音 # 优化: 使用 'domcontentloaded' 提高在弱网环境下的加载成功率,并增加超时到 90s print("[ Navigating ]: https://www.douyin.com/ ...") try: await page.goto("https://www.douyin.com/", wait_until="domcontentloaded", timeout=90000) except Exception as e: print(f"[ Warning ]: Initial navigation timed out or failed: {e}") print("[ Info ]: Will continue anyway, please check if the page is visible.") # ================================================================= # 自动检测登录 + 手动回车双重保险 # ================================================================= print("\n" + "-"*40) print("[ Waiting ]: Please complete the QR login in the browser.") print("[ Auto-Detect ]: The script will automatically proceed if login is detected.") print("[ Manual ]: If auto-detect fails, type 'go' and press [ENTER] here.") print("-"*40 + "\n") # 尝试自动检测登录成功的元素(比如头像或“发布视频”按钮) login_detected = False for _ in range(120): # 最多等待 120 秒 try: # 检查是否存在登录后的特有元素 # .dy-avatar 是头像,.upload-video-text 是发布视频按钮 if await page.query_selector('.dy-avatar') or await page.query_selector('text=发布视频'): print("[ Success ]: Login detected automatically!") login_detected = True break except: pass await asyncio.sleep(1) if not login_detected: # 如果自动检测没成功,再尝试手动输入 print("[ Timeout ]: Auto-detection timed out. Please ensure you are logged in.") print("[ Action ]: Type anything and press [ENTER] to force capture cookies:") await asyncio.to_thread(sys.stdin.readline) # 获取所有 Cookie all_cookies = await context.cookies() cookie_dict = {c['name']: c['value'] for c in all_cookies} # 尝试捕获动态参数 msToken (如果存在于页面中) try: # 1. 尝试从 localStorage 获取 ms_token = await page.evaluate("() => window.localStorage.getItem('msToken') || ''") # 2. 如果没获取到,尝试从所有 Cookie 中找最新的 msToken if not ms_token: ms_token = cookie_dict.get('msToken', '') # 3. 尝试从页面全局变量中寻找 if not ms_token: ms_token = await page.evaluate("() => window._ROUTER_DATA?.msToken || ''") if ms_token: cookie_dict['msToken'] = ms_token print(f"[ Info ]: Captured msToken: {ms_token[:20]}...") else: print("[ Warning ]: msToken not found in common locations.") except Exception as e: print(f"[ Debug ]: Error capturing msToken: {e}") # 关闭浏览器 await context.close() await browser.close() # 更新配置文件 if cookie_dict: update_config_yaml(cookie_dict) else: print("[ Failed ]: No cookies captured. Did you login successfully?") if __name__ == "__main__": asyncio.run(main())