'commit'

2026-02-27 14:57:13 +08:00
parent 3a0e2c3ec8
commit f665e38bc0
34 changed files with 2396 additions and 2895 deletions
--- a/DouYin/Test/T1_GetCookie.py
+++ b/DouYin/Test/T1_GetCookie.py
@@ -0,0 +1,176 @@
+"""
+Douyin Cookie Fetcher (T1)
+用途: 模拟打开浏览器，用户扫码登录后自动获取 Cookie 并保存到 config_douyin.yml
+"""
+
+import sys
+import os
+import json
+import yaml
+import asyncio
+import io
+from pathlib import Path
+
+# 强制设置控制台输出编码为 UTF-8，解决 Windows 环境下的乱码问题
+if sys.platform.startswith('win'):
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
+
+# =================================================================
+# 1. 环境配置与路径初始化
+# =================================================================
+current_file_path = os.path.abspath(__file__)
+current_dir = os.path.dirname(current_file_path)
+config_path = os.path.join(current_dir, "config_douyin.yml")
+
+# =================================================================
+# 2. Cookie 过滤与更新逻辑
+# =================================================================
+
+# 我们关注的核心 Cookie 键名
+# 进一步扩大范围以包含所有可能的校验 Cookie
+REQUIRED_KEYS = {
+    "msToken", "ttwid", "odin_tt", "passport_csrf_token", "sid_guard", 
+    "sessionid", "sid_tt", "uid_tt", "uid_tt_ss", "sid_ucp_v1", 
+    "ssid_ucp_v1", "n_sdk_version", "s_v_web_id", "webid",
+    "__ac_nonce", "__ac_signature", "fp", "device_web_cpu_core",
+    "device_web_memory_size", "architecture", "hevc_supported"
+}
+
+def update_config_yaml(new_cookies):
+    """将获取到的 Cookie 更新到 config_douyin.yml"""
+    config_data = {}
+    
+    # 1. 读取现有配置
+    if os.path.exists(config_path):
+        try:
+            with open(config_path, 'r', encoding='utf-8') as f:
+                config_data = yaml.safe_load(f) or {}
+        except Exception as e:
+            print(f"[  Warning  ]: Failed to read existing config: {e}")
+
+    # 2. 更新 cookies 字段
+    if "cookies" not in config_data:
+        config_data["cookies"] = {}
+    
+    # 过滤出我们需要的键
+    filtered_cookies = {k: v for k, v in new_cookies.items() if k in REQUIRED_KEYS}
+    config_data["cookies"].update(filtered_cookies)
+    
+    # 同时更新全局 cookie 字符串 - 包含所有捕获到的 cookie 以确保完整性
+    cookie_str = "; ".join(f"{k}={v}" for k, v in new_cookies.items())
+    config_data["cookie"] = cookie_str
+
+    # 3. 写回文件
+    try:
+        with open(config_path, 'w', encoding='utf-8') as f:
+            yaml.dump(config_data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
+        print(f"\n[  Success  ]: Cookie updated to: {config_path}")
+        print(f"[  Info  ]: Captured {len(new_cookies)} total cookies, {len(filtered_cookies)} essential keys saved.")
+    except Exception as e:
+        print(f"[  Error  ]: Failed to write config: {e}")
+
+# =================================================================
+# 3. Playwright 自动化逻辑
+# =================================================================
+
+async def main():
+    try:
+        from playwright.async_api import async_playwright
+    except ImportError:
+        print("[  Error  ]: Playwright not installed. Run: pip install playwright && playwright install chromium")
+        return
+
+    print("\n" + "="*60)
+    print("         Douyin Login & Cookie Fetcher")
+    print("="*60)
+    print("[  Instructions  ]:")
+    print("1. Browser will open and navigate to Douyin.")
+    print("2. Please login via QR code scanning.")
+    print("3. After login, return here and press [ENTER] to save cookies.")
+    print("="*60 + "\n")
+
+    async with async_playwright() as p:
+        # 启动浏览器
+        browser = await p.chromium.launch(headless=False)
+        context = await browser.new_context(
+            viewport={'width': 1280, 'height': 800},
+            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
+        )
+        page = await context.new_page()
+
+        # 跳转到抖音
+        # 优化: 使用 'domcontentloaded' 提高在弱网环境下的加载成功率，并增加超时到 90s
+        print("[  Navigating  ]: https://www.douyin.com/ ...")
+        try:
+            await page.goto("https://www.douyin.com/", wait_until="domcontentloaded", timeout=90000)
+        except Exception as e:
+            print(f"[  Warning  ]: Initial navigation timed out or failed: {e}")
+            print("[  Info  ]: Will continue anyway, please check if the page is visible.")
+
+        # =================================================================
+        # 自动检测登录 + 手动回车双重保险
+        # =================================================================
+        print("\n" + "-"*40)
+        print("[  Waiting  ]: Please complete the QR login in the browser.")
+        print("[  Auto-Detect  ]: The script will automatically proceed if login is detected.")
+        print("[  Manual  ]: If auto-detect fails, type 'go' and press [ENTER] here.")
+        print("-"*40 + "\n")
+
+        # 尝试自动检测登录成功的元素（比如头像或“发布视频”按钮）
+        login_detected = False
+        for _ in range(120):  # 最多等待 120 秒
+            try:
+                # 检查是否存在登录后的特有元素
+                # .dy-avatar 是头像，.upload-video-text 是发布视频按钮
+                if await page.query_selector('.dy-avatar') or await page.query_selector('text=发布视频'):
+                    print("[  Success  ]: Login detected automatically!")
+                    login_detected = True
+                    break
+            except:
+                pass
+            await asyncio.sleep(1)
+            
+        if not login_detected:
+            # 如果自动检测没成功，再尝试手动输入
+            print("[  Timeout  ]: Auto-detection timed out. Please ensure you are logged in.")
+            print("[  Action  ]: Type anything and press [ENTER] to force capture cookies:")
+            await asyncio.to_thread(sys.stdin.readline)
+
+        # 获取所有 Cookie
+        all_cookies = await context.cookies()
+        cookie_dict = {c['name']: c['value'] for c in all_cookies}
+
+        # 尝试捕获动态参数 msToken (如果存在于页面中)
+        try:
+            # 1. 尝试从 localStorage 获取
+            ms_token = await page.evaluate("() => window.localStorage.getItem('msToken') || ''")
+            
+            # 2. 如果没获取到，尝试从所有 Cookie 中找最新的 msToken
+            if not ms_token:
+                ms_token = cookie_dict.get('msToken', '')
+                
+            # 3. 尝试从页面全局变量中寻找
+            if not ms_token:
+                ms_token = await page.evaluate("() => window._ROUTER_DATA?.msToken || ''")
+
+            if ms_token:
+                cookie_dict['msToken'] = ms_token
+                print(f"[  Info  ]: Captured msToken: {ms_token[:20]}...")
+            else:
+                print("[  Warning  ]: msToken not found in common locations.")
+        except Exception as e:
+            print(f"[  Debug  ]: Error capturing msToken: {e}")
+
+        # 关闭浏览器
+        await context.close()
+        await browser.close()
+
+        # 更新配置文件
+        if cookie_dict:
+            update_config_yaml(cookie_dict)
+        else:
+            print("[  Failed  ]: No cookies captured. Did you login successfully?")
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/DouYin/Test/T2_BigV.py
+++ b/DouYin/Test/T2_BigV.py
@@ -0,0 +1,203 @@
+"""
+Douyin User Profile & Video Crawler (T2)
+访问注释中的页面，获取页面中的内容信息（用户信息及最近作品）
+
+URL: https://www.douyin.com/user/MS4wLjABAAAA2P7MeZl0VUsDmCzKbZeLlVGVTDRAuXmvr_zcC6XNqd-6R4n9ssCguSgA-gnBfjUO
+"""
+
+import sys
+import os
+import re
+import json
+import yaml
+import time
+import io
+from datetime import datetime
+
+# 强制设置控制台输出编码为 UTF-8，解决 Windows 环境下的乱码问题
+if sys.platform.startswith('win'):
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
+
+# =================================================================
+# 1. 环境配置与路径初始化
+# =================================================================
+current_file_path = os.path.abspath(__file__)
+current_dir = os.path.dirname(current_file_path)
+douyin_root = os.path.dirname(current_dir)
+
+# 将 DouYin 根目录添加到 sys.path
+if douyin_root not in sys.path:
+    sys.path.append(douyin_root)
+
+# 尝试导入核心爬虫模块
+try:
+    from apiproxy.douyin.douyin import Douyin
+    from apiproxy.douyin import douyin_headers
+except ImportError as e:
+    print(f"[  Error  ]: Failed to import core modules. Error: {e}")
+    sys.exit(1)
+
+# =================================================================
+# 2. 工具函数定义
+# =================================================================
+
+def extract_url_from_file(file_path):
+    """从文件头部的注释中提取抖音用户 URL"""
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+            urls = re.findall(r'https?://(?:www\.)?douyin\.com/user/[a-zA-Z0-9\-_]+', content)
+            return urls[0] if urls else None
+    except Exception as e:
+        print(f"[  Error  ]: Failed to read script file: {e}")
+        return None
+
+def load_config():
+    """从 Test 目录或项目根目录加载 config_douyin.yml 配置"""
+    # 优先检查当前 Test 目录
+    test_config_path = os.path.join(current_dir, "config_douyin.yml")
+    # 其次检查 DouYin 根目录
+    root_config_path = os.path.join(douyin_root, "config_douyin.yml")
+    
+    config_path = test_config_path if os.path.exists(test_config_path) else root_config_path
+    
+    if os.path.exists(config_path):
+        try:
+            with open(config_path, 'r', encoding='utf-8') as f:
+                print(f"[  Info  ]: Loading config from: {os.path.abspath(config_path)}")
+                return yaml.safe_load(f)
+        except Exception as e:
+            print(f"[  Warning  ]: Failed to parse config: {e}")
+    else:
+        print(f"[  Warning  ]: config_douyin.yml not found.")
+    return {}
+
+def setup_cookies(config):
+    """设置 Douyin 请求所需的 Cookie"""
+    # 优先使用完整的全局 cookie 字符串
+    cookie_str = config.get("cookie", "")
+    cookies_dict = config.get("cookies", {})
+    
+    if cookie_str:
+        # 如果有 raw cookie，确保 msToken 也被包含进去（如果 dict 中有的话）
+        if cookies_dict.get("msToken") and "msToken=" not in cookie_str:
+            cookie_str = f"msToken={cookies_dict['msToken']}; " + cookie_str
+        douyin_headers["Cookie"] = cookie_str
+        print(f"[  Info  ]: Using raw cookie string (Length: {len(cookie_str)})")
+    elif cookies_dict:
+        cookie_str = "; ".join(f"{k}={v}" for k, v in cookies_dict.items())
+        douyin_headers["Cookie"] = cookie_str
+        print(f"[  Info  ]: Using combined cookies from dict (Count: {len(cookies_dict)})")
+    
+    if not douyin_headers.get("Cookie"):
+        print("[  Warning  ]: No valid Cookie detected. Most API calls will fail.")
+        print("             Please run T1_GetCookie.py first.")
+
+# =================================================================
+# 3. 核心爬取逻辑
+# =================================================================
+
+def crawl_user_profile(url):
+    """获取用户信息及其最近的作品列表"""
+    dy = Douyin()
+    # 增加接口重试的超时时间
+    dy.timeout = 30
+    
+    print(f"[  Start  ]: Target URL: {url}")
+    
+    # 步骤 1: 解析 URL 获取 sec_uid
+    print("[  Step 1/2  ]: Fetching blogger profile...")
+    try:
+        # 使用 getKey 获取 sec_uid
+        key_type, sec_uid = dy.getKey(url)
+        if key_type != "user" or not sec_uid:
+            print(f"[  Failed  ]: Invalid URL or failed to parse sec_uid. Type: {key_type}")
+            return
+        
+        print(f"            (sec_uid: {sec_uid[:15]}...)")
+        
+        # 获取详细信息
+        # 现在核心类 Douyin 已更新，会自动从 Cookie 中提取并拼接 msToken
+        print(f"[  Step 1/2  ]: Fetching blogger profile...")
+        user_detail = dy.getUserDetailInfo(sec_uid)
+        
+        if not user_detail or user_detail.get("status_code") != 0:
+            print(f"[  Failed  ]: Failed to fetch profile. Status: {user_detail.get('status_code') if user_detail else 'None'}")
+            if user_detail:
+                print(f"            Msg: {user_detail.get('status_msg', 'Unknown error')}")
+            print("            Hint: Please check if Cookie is expired or network is restricted.")
+            return
+    except Exception as e:
+        print(f"[  Error  ]: An unexpected error occurred: {e}")
+        return
+
+    user_data = user_detail.get("user", {})
+    nickname = user_data.get('nickname', 'Unknown')
+    
+    # 打印博主信息
+    print("\n" + "="*60)
+    print(f"Blogger: {nickname}")
+    print("-" * 60)
+    print(f"ID:       {user_data.get('unique_id') or user_data.get('short_id', 'Unknown')}")
+    print(f"Bio:      {user_data.get('signature', 'N/A')}")
+    print(f"Followers:{user_data.get('m_follower_count') or user_data.get('follower_count', 0)}")
+    print(f"Likes:    {user_data.get('total_favorited', 0)}")
+    print(f"Following:{user_data.get('following_count', 0)}")
+    print("="*60 + "\n")
+
+    # 步骤 3: 获取最近的作品列表
+    print(f"[  Step 2/2  ]: Fetching recent works...")
+    aweme_list = dy.getUserInfo(sec_uid, count=10)
+    
+    result_data = {
+        "user_info": user_data,
+        "recent_videos": aweme_list or [],
+        "crawl_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    }
+
+    if aweme_list:
+        print(f"Successfully fetched {len(aweme_list)} videos:\n")
+        for i, aweme in enumerate(aweme_list, 1):
+            ctime = aweme.get('create_time')
+            if isinstance(ctime, (int, float)):
+                ctime_str = datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M')
+            else:
+                ctime_str = str(ctime)
+            
+            desc = aweme.get('desc', 'No Title')
+            desc = (desc[:47] + "...") if len(desc) > 50 else desc
+            
+            stats = aweme.get('statistics', {})
+            print(f"{i:02d}. [{ctime_str}] {desc}")
+            print(f"    ❤️ {stats.get('digg_count', 0):<8} 💬 {stats.get('comment_count', 0):<8} ⭐ {stats.get('collect_count', 0)}")
+            print(f"    🔗 https://www.douyin.com/video/{aweme.get('aweme_id')}\n")
+    else:
+        print("[  Info  ]: No public videos found. Account might be private or API limited.")
+
+    # 步骤 4: 保存数据
+    output_file = os.path.join(current_dir, f"user_data_{sec_uid[:8]}.json")
+    try:
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(result_data, f, ensure_ascii=False, indent=4)
+        print(f"[  Success  ]: Data saved to: {os.path.basename(output_file)}")
+    except Exception as e:
+        print(f"[  Warning  ]: Failed to save data: {e}")
+
+# =================================================================
+# 4. 主入口
+# =================================================================
+
+if __name__ == "__main__":
+    # 提取 URL
+    target_url = extract_url_from_file(current_file_path)
+    
+    if target_url:
+        # 配置环境
+        config = load_config()
+        setup_cookies(config)
+        
+        # 执行爬取
+        crawl_user_profile(target_url)
+    else:
+        print("[  Error  ]: No valid Douyin URL found in file comments.")
--- a/DouYin/Test/config_douyin.yml
+++ b/DouYin/Test/config_douyin.yml
@@ -0,0 +1,16 @@
+cookie: __ac_nonce=069a13bcc00cacbb09800; __ac_signature=_02B4Z6wo00f01EiD5NQAAIDB-WNazgGqWRxIo8BAAHuJ33;
+  ttwid=1%7CxZrCypV7044-lvm6_oK7nlsTEtEn-H_yJXdZRYjRQg0%7C1772174285%7C38825b7026cf7c8724e4070f81748a477168eb58988b8e09b67142bc6441a676;
+  enter_pc_once=1; UIFID_TEMP=630dc87f7218843564944b22829d362b9fabe9a9e3376a5c74988083749b66dffd3737bb90e0acf82e6707c6ee24f7522fbefdc12d5a2fed2eae91b09db43c67688ba5ad520d1d6f6b294f7ef3c3442efbc0a4ff34b6c60edb3ff06d5e120b4a2376474bd59208bc08755ec54934cb80;
+  x-web-secsdk-uid=9d7866f1-5873-4d35-8b7d-7d0117b45b3f; s_v_web_id=verify_mm4ital2_ERFXSBBL_FxRH_4tEN_A3jj_V3VIL6WSh4u6;
+  =douyin.com; device_web_cpu_core=20; device_web_memory_size=8; architecture=amd64;
+  hevc_supported=true; IsDouyinActive=true; home_can_add_dy_2_desktop=%220%22; dy_swidth=1280;
+  dy_sheight=800; stream_recommend_feed_params=%22%7B%5C%22cookie_enabled%5C%22%3Atrue%2C%5C%22screen_width%5C%22%3A1280%2C%5C%22screen_height%5C%22%3A800%2C%5C%22browser_online%5C%22%3Atrue%2C%5C%22cpu_core_num%5C%22%3A20%2C%5C%22device_memory%5C%22%3A8%2C%5C%22downlink%5C%22%3A10%2C%5C%22effective_type%5C%22%3A%5C%224g%5C%22%2C%5C%22round_trip_time%5C%22%3A0%7D%22
+cookies:
+  msToken: my7nuKyrpTVEWOX-n62wR8I5EcvoMKBmvsBMnODLOtG3sn6AsR7q_jEM5jmEenyuwmHpsL25b84VhGcR4nUgv0PepA2zrSUOGHCmZVzpauYpRgbR9svMKjt2-AgNRz
+  ttwid: 1%7CxZrCypV7044-lvm6_oK7nlsTEtEn-H_yJXdZRYjRQg0%7C1772174285%7C38825b7026cf7c8724e4070f81748a477168eb58988b8e09b67142bc6441a676
+  s_v_web_id: verify_mm4ital2_ERFXSBBL_FxRH_4tEN_A3jj_V3VIL6WSh4u6
+  odin_tt: 6f33402fa0952cdea7eaa5226bfe0a2a45ee10bbb138835da6a5383e9eef092f7ad0a1790c0271a090f72c8112875fef3665d50460b79ec302ba56c2b91f52b3bcab4b209cce3f4b7378f995b01a5cce
+path: ./Downloaded/
+thread: 5
+number:
+  post: 10
--- a/DouYin/Test/user_data_MS4wLjAB.json
+++ b/DouYin/Test/user_data_MS4wLjAB.json
--- a/DouYin/apiproxy/init.py
+++ b/DouYin/apiproxy/init.py
@@ -2,4 +2,4 @@
 # -*- coding: utf-8 -*-


-ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
+ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
--- a/DouYin/apiproxy/douyin/init.py
+++ b/DouYin/apiproxy/douyin/init.py
@@ -9,10 +9,10 @@ douyin_headers = {
    'referer': 'https://www.douyin.com/',
    'accept': 'application/json, text/plain, */*',
    'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
-    'accept-encoding': 'gzip, deflate, br',
+    'accept-encoding': 'gzip, deflate',
    'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
    'sec-ch-ua-mobile': '?0',
-    'sec-ch-ua-platform': '"macOS"',
+    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-origin'
--- a/DouYin/apiproxy/douyin/douyin.py
+++ b/DouYin/apiproxy/douyin/douyin.py
@@ -165,7 +165,7 @@ class Douyin(object):
                    # 单作品接口返回 'aweme_detail'
                    # 主页作品接口返回 'aweme_list'->['aweme_detail']
                    # 更新API参数以适应最新接口要求
-                    detail_params = f'aweme_id={aweme_id}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Mac&os_version=10.15.7&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50&update_version_code=170400'
+                    detail_params = f'aweme_id={aweme_id}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Windows&os_version=10&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50&update_version_code=170400'
                    jx_url = self.urls.POST_DETAIL + utils.getXbogus(detail_params)

                    response = requests.get(url=jx_url, headers=douyin_headers, timeout=10)
@@ -280,7 +280,15 @@ class Douyin(object):
            while True:
                try:
                    # 构建请求URL - 添加更多必需参数
-                    base_params = f'sec_user_id={sec_uid}&count={count}&max_cursor={max_cursor}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Mac&os_version=10.15.7&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
+                    base_params = f'sec_user_id={sec_uid}&count={count}&max_cursor={max_cursor}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Windows&os_version=10&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
+
+                    # 尝试从 cookie 中提取 msToken
+                    cookie_str = douyin_headers.get('Cookie', '')
+                    if 'msToken=' in cookie_str:
+                        import re
+                        ms_token_match = re.search(r'msToken=([^;]+)', cookie_str)
+                        if ms_token_match:
+                            base_params += f"&msToken={ms_token_match.group(1)}"

                    if mode == "post":
                        url = self.urls.USER_POST + utils.getXbogus(base_params)
@@ -415,7 +423,7 @@ class Douyin(object):
        while True:
            # 接口不稳定, 有时服务器不返回数据, 需要重新获取
            try:
-                live_params = f'aid=6383&device_platform=web&web_rid={web_rid}&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Mac&os_version=10.15.7&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
+                live_params = f'aid=6383&device_platform=web&web_rid={web_rid}&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Windows&os_version=10&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
                live_api = self.urls.LIVE + utils.getXbogus(live_params)

                response = requests.get(live_api, headers=douyin_headers)
@@ -870,19 +878,39 @@ class Douyin(object):
        while True:
            # 接口不稳定, 有时服务器不返回数据, 需要重新获取
            try:
-                user_detail_params = f'sec_user_id={sec_uid}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Mac&os_version=10.15.7&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
+                user_detail_params = f'sec_user_id={sec_uid}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Windows&os_version=10&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
+                
+                # 尝试从 cookie 中提取 msToken
+                cookie_str = douyin_headers.get('Cookie', '')
+                if 'msToken=' in cookie_str:
+                    import re
+                    ms_token_match = re.search(r'msToken=([^;]+)', cookie_str)
+                    if ms_token_match:
+                        user_detail_params += f"&msToken={ms_token_match.group(1)}"
+
                url = self.urls.USER_DETAIL + utils.getXbogus(user_detail_params)

-                res = requests.get(url=url, headers=douyin_headers)
-                datadict = json.loads(res.text)
-
-                if datadict is not None and datadict["status_code"] == 0:
-                    return datadict
+                res = requests.get(url=url, headers=douyin_headers, timeout=10)
+                
+                if not res.text.strip():
+                    logger.warning(f"getUserDetailInfo: Empty response (Status: {res.status_code})")
+                else:
+                    try:
+                        datadict = json.loads(res.text)
+                        if datadict is not None and datadict.get("status_code") == 0:
+                            return datadict
+                        else:
+                            logger.warning(f"getUserDetailInfo: API error (Status: {datadict.get('status_code')}, Msg: {datadict.get('status_msg')})")
+                    except json.JSONDecodeError:
+                        logger.error(f"getUserDetailInfo: JSON decode error (Status: {res.status_code})")
            except Exception as e:
-                end = time.time()  # 结束时间
-                if end - start > self.timeout:
-                    print("[  提示  ]:重复请求该接口" + str(self.timeout) + "s, 仍然未获取到数据")
-                    return datadict
+                logger.error(f"getUserDetailInfo: Exception: {e}")
+                
+            end = time.time()  # 结束时间
+            if end - start > self.timeout:
+                print("[  提示  ]:重复请求该接口" + str(self.timeout) + "s, 仍然未获取到数据")
+                return datadict
+            time.sleep(1) # 增加小延迟避免请求过快


 if __name__ == "__main__":