This commit is contained in:
HuangHai
2026-02-27 14:57:13 +08:00
parent 3a0e2c3ec8
commit f665e38bc0
34 changed files with 2396 additions and 2895 deletions

176
DouYin/Test/T1_GetCookie.py Normal file
View File

@@ -0,0 +1,176 @@
"""
Douyin Cookie Fetcher (T1)
用途: 模拟打开浏览器,用户扫码登录后自动获取 Cookie 并保存到 config_douyin.yml
"""
import sys
import os
import json
import yaml
import asyncio
import io
from pathlib import Path
# 强制设置控制台输出编码为 UTF-8解决 Windows 环境下的乱码问题
if sys.platform.startswith('win'):
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
# =================================================================
# 1. 环境配置与路径初始化
# =================================================================
current_file_path = os.path.abspath(__file__)
current_dir = os.path.dirname(current_file_path)
config_path = os.path.join(current_dir, "config_douyin.yml")
# =================================================================
# 2. Cookie 过滤与更新逻辑
# =================================================================
# 我们关注的核心 Cookie 键名
# 进一步扩大范围以包含所有可能的校验 Cookie
REQUIRED_KEYS = {
"msToken", "ttwid", "odin_tt", "passport_csrf_token", "sid_guard",
"sessionid", "sid_tt", "uid_tt", "uid_tt_ss", "sid_ucp_v1",
"ssid_ucp_v1", "n_sdk_version", "s_v_web_id", "webid",
"__ac_nonce", "__ac_signature", "fp", "device_web_cpu_core",
"device_web_memory_size", "architecture", "hevc_supported"
}
def update_config_yaml(new_cookies):
"""将获取到的 Cookie 更新到 config_douyin.yml"""
config_data = {}
# 1. 读取现有配置
if os.path.exists(config_path):
try:
with open(config_path, 'r', encoding='utf-8') as f:
config_data = yaml.safe_load(f) or {}
except Exception as e:
print(f"[ Warning ]: Failed to read existing config: {e}")
# 2. 更新 cookies 字段
if "cookies" not in config_data:
config_data["cookies"] = {}
# 过滤出我们需要的键
filtered_cookies = {k: v for k, v in new_cookies.items() if k in REQUIRED_KEYS}
config_data["cookies"].update(filtered_cookies)
# 同时更新全局 cookie 字符串 - 包含所有捕获到的 cookie 以确保完整性
cookie_str = "; ".join(f"{k}={v}" for k, v in new_cookies.items())
config_data["cookie"] = cookie_str
# 3. 写回文件
try:
with open(config_path, 'w', encoding='utf-8') as f:
yaml.dump(config_data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
print(f"\n[ Success ]: Cookie updated to: {config_path}")
print(f"[ Info ]: Captured {len(new_cookies)} total cookies, {len(filtered_cookies)} essential keys saved.")
except Exception as e:
print(f"[ Error ]: Failed to write config: {e}")
# =================================================================
# 3. Playwright 自动化逻辑
# =================================================================
async def main():
try:
from playwright.async_api import async_playwright
except ImportError:
print("[ Error ]: Playwright not installed. Run: pip install playwright && playwright install chromium")
return
print("\n" + "="*60)
print(" Douyin Login & Cookie Fetcher")
print("="*60)
print("[ Instructions ]:")
print("1. Browser will open and navigate to Douyin.")
print("2. Please login via QR code scanning.")
print("3. After login, return here and press [ENTER] to save cookies.")
print("="*60 + "\n")
async with async_playwright() as p:
# 启动浏览器
browser = await p.chromium.launch(headless=False)
context = await browser.new_context(
viewport={'width': 1280, 'height': 800},
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
)
page = await context.new_page()
# 跳转到抖音
# 优化: 使用 'domcontentloaded' 提高在弱网环境下的加载成功率,并增加超时到 90s
print("[ Navigating ]: https://www.douyin.com/ ...")
try:
await page.goto("https://www.douyin.com/", wait_until="domcontentloaded", timeout=90000)
except Exception as e:
print(f"[ Warning ]: Initial navigation timed out or failed: {e}")
print("[ Info ]: Will continue anyway, please check if the page is visible.")
# =================================================================
# 自动检测登录 + 手动回车双重保险
# =================================================================
print("\n" + "-"*40)
print("[ Waiting ]: Please complete the QR login in the browser.")
print("[ Auto-Detect ]: The script will automatically proceed if login is detected.")
print("[ Manual ]: If auto-detect fails, type 'go' and press [ENTER] here.")
print("-"*40 + "\n")
# 尝试自动检测登录成功的元素(比如头像或“发布视频”按钮)
login_detected = False
for _ in range(120): # 最多等待 120 秒
try:
# 检查是否存在登录后的特有元素
# .dy-avatar 是头像,.upload-video-text 是发布视频按钮
if await page.query_selector('.dy-avatar') or await page.query_selector('text=发布视频'):
print("[ Success ]: Login detected automatically!")
login_detected = True
break
except:
pass
await asyncio.sleep(1)
if not login_detected:
# 如果自动检测没成功,再尝试手动输入
print("[ Timeout ]: Auto-detection timed out. Please ensure you are logged in.")
print("[ Action ]: Type anything and press [ENTER] to force capture cookies:")
await asyncio.to_thread(sys.stdin.readline)
# 获取所有 Cookie
all_cookies = await context.cookies()
cookie_dict = {c['name']: c['value'] for c in all_cookies}
# 尝试捕获动态参数 msToken (如果存在于页面中)
try:
# 1. 尝试从 localStorage 获取
ms_token = await page.evaluate("() => window.localStorage.getItem('msToken') || ''")
# 2. 如果没获取到,尝试从所有 Cookie 中找最新的 msToken
if not ms_token:
ms_token = cookie_dict.get('msToken', '')
# 3. 尝试从页面全局变量中寻找
if not ms_token:
ms_token = await page.evaluate("() => window._ROUTER_DATA?.msToken || ''")
if ms_token:
cookie_dict['msToken'] = ms_token
print(f"[ Info ]: Captured msToken: {ms_token[:20]}...")
else:
print("[ Warning ]: msToken not found in common locations.")
except Exception as e:
print(f"[ Debug ]: Error capturing msToken: {e}")
# 关闭浏览器
await context.close()
await browser.close()
# 更新配置文件
if cookie_dict:
update_config_yaml(cookie_dict)
else:
print("[ Failed ]: No cookies captured. Did you login successfully?")
if __name__ == "__main__":
asyncio.run(main())

203
DouYin/Test/T2_BigV.py Normal file
View File

@@ -0,0 +1,203 @@
"""
Douyin User Profile & Video Crawler (T2)
访问注释中的页面,获取页面中的内容信息(用户信息及最近作品)
URL: https://www.douyin.com/user/MS4wLjABAAAA2P7MeZl0VUsDmCzKbZeLlVGVTDRAuXmvr_zcC6XNqd-6R4n9ssCguSgA-gnBfjUO
"""
import sys
import os
import re
import json
import yaml
import time
import io
from datetime import datetime
# 强制设置控制台输出编码为 UTF-8解决 Windows 环境下的乱码问题
if sys.platform.startswith('win'):
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
# =================================================================
# 1. 环境配置与路径初始化
# =================================================================
current_file_path = os.path.abspath(__file__)
current_dir = os.path.dirname(current_file_path)
douyin_root = os.path.dirname(current_dir)
# 将 DouYin 根目录添加到 sys.path
if douyin_root not in sys.path:
sys.path.append(douyin_root)
# 尝试导入核心爬虫模块
try:
from apiproxy.douyin.douyin import Douyin
from apiproxy.douyin import douyin_headers
except ImportError as e:
print(f"[ Error ]: Failed to import core modules. Error: {e}")
sys.exit(1)
# =================================================================
# 2. 工具函数定义
# =================================================================
def extract_url_from_file(file_path):
"""从文件头部的注释中提取抖音用户 URL"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
urls = re.findall(r'https?://(?:www\.)?douyin\.com/user/[a-zA-Z0-9\-_]+', content)
return urls[0] if urls else None
except Exception as e:
print(f"[ Error ]: Failed to read script file: {e}")
return None
def load_config():
"""从 Test 目录或项目根目录加载 config_douyin.yml 配置"""
# 优先检查当前 Test 目录
test_config_path = os.path.join(current_dir, "config_douyin.yml")
# 其次检查 DouYin 根目录
root_config_path = os.path.join(douyin_root, "config_douyin.yml")
config_path = test_config_path if os.path.exists(test_config_path) else root_config_path
if os.path.exists(config_path):
try:
with open(config_path, 'r', encoding='utf-8') as f:
print(f"[ Info ]: Loading config from: {os.path.abspath(config_path)}")
return yaml.safe_load(f)
except Exception as e:
print(f"[ Warning ]: Failed to parse config: {e}")
else:
print(f"[ Warning ]: config_douyin.yml not found.")
return {}
def setup_cookies(config):
"""设置 Douyin 请求所需的 Cookie"""
# 优先使用完整的全局 cookie 字符串
cookie_str = config.get("cookie", "")
cookies_dict = config.get("cookies", {})
if cookie_str:
# 如果有 raw cookie确保 msToken 也被包含进去(如果 dict 中有的话)
if cookies_dict.get("msToken") and "msToken=" not in cookie_str:
cookie_str = f"msToken={cookies_dict['msToken']}; " + cookie_str
douyin_headers["Cookie"] = cookie_str
print(f"[ Info ]: Using raw cookie string (Length: {len(cookie_str)})")
elif cookies_dict:
cookie_str = "; ".join(f"{k}={v}" for k, v in cookies_dict.items())
douyin_headers["Cookie"] = cookie_str
print(f"[ Info ]: Using combined cookies from dict (Count: {len(cookies_dict)})")
if not douyin_headers.get("Cookie"):
print("[ Warning ]: No valid Cookie detected. Most API calls will fail.")
print(" Please run T1_GetCookie.py first.")
# =================================================================
# 3. 核心爬取逻辑
# =================================================================
def crawl_user_profile(url):
"""获取用户信息及其最近的作品列表"""
dy = Douyin()
# 增加接口重试的超时时间
dy.timeout = 30
print(f"[ Start ]: Target URL: {url}")
# 步骤 1: 解析 URL 获取 sec_uid
print("[ Step 1/2 ]: Fetching blogger profile...")
try:
# 使用 getKey 获取 sec_uid
key_type, sec_uid = dy.getKey(url)
if key_type != "user" or not sec_uid:
print(f"[ Failed ]: Invalid URL or failed to parse sec_uid. Type: {key_type}")
return
print(f" (sec_uid: {sec_uid[:15]}...)")
# 获取详细信息
# 现在核心类 Douyin 已更新,会自动从 Cookie 中提取并拼接 msToken
print(f"[ Step 1/2 ]: Fetching blogger profile...")
user_detail = dy.getUserDetailInfo(sec_uid)
if not user_detail or user_detail.get("status_code") != 0:
print(f"[ Failed ]: Failed to fetch profile. Status: {user_detail.get('status_code') if user_detail else 'None'}")
if user_detail:
print(f" Msg: {user_detail.get('status_msg', 'Unknown error')}")
print(" Hint: Please check if Cookie is expired or network is restricted.")
return
except Exception as e:
print(f"[ Error ]: An unexpected error occurred: {e}")
return
user_data = user_detail.get("user", {})
nickname = user_data.get('nickname', 'Unknown')
# 打印博主信息
print("\n" + "="*60)
print(f"Blogger: {nickname}")
print("-" * 60)
print(f"ID: {user_data.get('unique_id') or user_data.get('short_id', 'Unknown')}")
print(f"Bio: {user_data.get('signature', 'N/A')}")
print(f"Followers:{user_data.get('m_follower_count') or user_data.get('follower_count', 0)}")
print(f"Likes: {user_data.get('total_favorited', 0)}")
print(f"Following:{user_data.get('following_count', 0)}")
print("="*60 + "\n")
# 步骤 3: 获取最近的作品列表
print(f"[ Step 2/2 ]: Fetching recent works...")
aweme_list = dy.getUserInfo(sec_uid, count=10)
result_data = {
"user_info": user_data,
"recent_videos": aweme_list or [],
"crawl_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}
if aweme_list:
print(f"Successfully fetched {len(aweme_list)} videos:\n")
for i, aweme in enumerate(aweme_list, 1):
ctime = aweme.get('create_time')
if isinstance(ctime, (int, float)):
ctime_str = datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M')
else:
ctime_str = str(ctime)
desc = aweme.get('desc', 'No Title')
desc = (desc[:47] + "...") if len(desc) > 50 else desc
stats = aweme.get('statistics', {})
print(f"{i:02d}. [{ctime_str}] {desc}")
print(f" ❤️ {stats.get('digg_count', 0):<8} 💬 {stats.get('comment_count', 0):<8}{stats.get('collect_count', 0)}")
print(f" 🔗 https://www.douyin.com/video/{aweme.get('aweme_id')}\n")
else:
print("[ Info ]: No public videos found. Account might be private or API limited.")
# 步骤 4: 保存数据
output_file = os.path.join(current_dir, f"user_data_{sec_uid[:8]}.json")
try:
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(result_data, f, ensure_ascii=False, indent=4)
print(f"[ Success ]: Data saved to: {os.path.basename(output_file)}")
except Exception as e:
print(f"[ Warning ]: Failed to save data: {e}")
# =================================================================
# 4. 主入口
# =================================================================
if __name__ == "__main__":
# 提取 URL
target_url = extract_url_from_file(current_file_path)
if target_url:
# 配置环境
config = load_config()
setup_cookies(config)
# 执行爬取
crawl_user_profile(target_url)
else:
print("[ Error ]: No valid Douyin URL found in file comments.")

View File

@@ -0,0 +1,16 @@
cookie: __ac_nonce=069a13bcc00cacbb09800; __ac_signature=_02B4Z6wo00f01EiD5NQAAIDB-WNazgGqWRxIo8BAAHuJ33;
ttwid=1%7CxZrCypV7044-lvm6_oK7nlsTEtEn-H_yJXdZRYjRQg0%7C1772174285%7C38825b7026cf7c8724e4070f81748a477168eb58988b8e09b67142bc6441a676;
enter_pc_once=1; UIFID_TEMP=630dc87f7218843564944b22829d362b9fabe9a9e3376a5c74988083749b66dffd3737bb90e0acf82e6707c6ee24f7522fbefdc12d5a2fed2eae91b09db43c67688ba5ad520d1d6f6b294f7ef3c3442efbc0a4ff34b6c60edb3ff06d5e120b4a2376474bd59208bc08755ec54934cb80;
x-web-secsdk-uid=9d7866f1-5873-4d35-8b7d-7d0117b45b3f; s_v_web_id=verify_mm4ital2_ERFXSBBL_FxRH_4tEN_A3jj_V3VIL6WSh4u6;
=douyin.com; device_web_cpu_core=20; device_web_memory_size=8; architecture=amd64;
hevc_supported=true; IsDouyinActive=true; home_can_add_dy_2_desktop=%220%22; dy_swidth=1280;
dy_sheight=800; stream_recommend_feed_params=%22%7B%5C%22cookie_enabled%5C%22%3Atrue%2C%5C%22screen_width%5C%22%3A1280%2C%5C%22screen_height%5C%22%3A800%2C%5C%22browser_online%5C%22%3Atrue%2C%5C%22cpu_core_num%5C%22%3A20%2C%5C%22device_memory%5C%22%3A8%2C%5C%22downlink%5C%22%3A10%2C%5C%22effective_type%5C%22%3A%5C%224g%5C%22%2C%5C%22round_trip_time%5C%22%3A0%7D%22
cookies:
msToken: my7nuKyrpTVEWOX-n62wR8I5EcvoMKBmvsBMnODLOtG3sn6AsR7q_jEM5jmEenyuwmHpsL25b84VhGcR4nUgv0PepA2zrSUOGHCmZVzpauYpRgbR9svMKjt2-AgNRz
ttwid: 1%7CxZrCypV7044-lvm6_oK7nlsTEtEn-H_yJXdZRYjRQg0%7C1772174285%7C38825b7026cf7c8724e4070f81748a477168eb58988b8e09b67142bc6441a676
s_v_web_id: verify_mm4ital2_ERFXSBBL_FxRH_4tEN_A3jj_V3VIL6WSh4u6
odin_tt: 6f33402fa0952cdea7eaa5226bfe0a2a45ee10bbb138835da6a5383e9eef092f7ad0a1790c0271a090f72c8112875fef3665d50460b79ec302ba56c2b91f52b3bcab4b209cce3f4b7378f995b01a5cce
path: ./Downloaded/
thread: 5
number:
post: 10

File diff suppressed because it is too large Load Diff

View File

@@ -2,4 +2,4 @@
# -*- coding: utf-8 -*-
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'

View File

@@ -9,10 +9,10 @@ douyin_headers = {
'referer': 'https://www.douyin.com/',
'accept': 'application/json, text/plain, */*',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'accept-encoding': 'gzip, deflate, br',
'accept-encoding': 'gzip, deflate',
'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin'

View File

@@ -165,7 +165,7 @@ class Douyin(object):
# 单作品接口返回 'aweme_detail'
# 主页作品接口返回 'aweme_list'->['aweme_detail']
# 更新API参数以适应最新接口要求
detail_params = f'aweme_id={aweme_id}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Mac&os_version=10.15.7&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50&update_version_code=170400'
detail_params = f'aweme_id={aweme_id}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Windows&os_version=10&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50&update_version_code=170400'
jx_url = self.urls.POST_DETAIL + utils.getXbogus(detail_params)
response = requests.get(url=jx_url, headers=douyin_headers, timeout=10)
@@ -280,7 +280,15 @@ class Douyin(object):
while True:
try:
# 构建请求URL - 添加更多必需参数
base_params = f'sec_user_id={sec_uid}&count={count}&max_cursor={max_cursor}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Mac&os_version=10.15.7&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
base_params = f'sec_user_id={sec_uid}&count={count}&max_cursor={max_cursor}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Windows&os_version=10&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
# 尝试从 cookie 中提取 msToken
cookie_str = douyin_headers.get('Cookie', '')
if 'msToken=' in cookie_str:
import re
ms_token_match = re.search(r'msToken=([^;]+)', cookie_str)
if ms_token_match:
base_params += f"&msToken={ms_token_match.group(1)}"
if mode == "post":
url = self.urls.USER_POST + utils.getXbogus(base_params)
@@ -415,7 +423,7 @@ class Douyin(object):
while True:
# 接口不稳定, 有时服务器不返回数据, 需要重新获取
try:
live_params = f'aid=6383&device_platform=web&web_rid={web_rid}&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Mac&os_version=10.15.7&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
live_params = f'aid=6383&device_platform=web&web_rid={web_rid}&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Windows&os_version=10&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
live_api = self.urls.LIVE + utils.getXbogus(live_params)
response = requests.get(live_api, headers=douyin_headers)
@@ -870,19 +878,39 @@ class Douyin(object):
while True:
# 接口不稳定, 有时服务器不返回数据, 需要重新获取
try:
user_detail_params = f'sec_user_id={sec_uid}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Mac&os_version=10.15.7&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
user_detail_params = f'sec_user_id={sec_uid}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Windows&os_version=10&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
# 尝试从 cookie 中提取 msToken
cookie_str = douyin_headers.get('Cookie', '')
if 'msToken=' in cookie_str:
import re
ms_token_match = re.search(r'msToken=([^;]+)', cookie_str)
if ms_token_match:
user_detail_params += f"&msToken={ms_token_match.group(1)}"
url = self.urls.USER_DETAIL + utils.getXbogus(user_detail_params)
res = requests.get(url=url, headers=douyin_headers)
datadict = json.loads(res.text)
if datadict is not None and datadict["status_code"] == 0:
return datadict
res = requests.get(url=url, headers=douyin_headers, timeout=10)
if not res.text.strip():
logger.warning(f"getUserDetailInfo: Empty response (Status: {res.status_code})")
else:
try:
datadict = json.loads(res.text)
if datadict is not None and datadict.get("status_code") == 0:
return datadict
else:
logger.warning(f"getUserDetailInfo: API error (Status: {datadict.get('status_code')}, Msg: {datadict.get('status_msg')})")
except json.JSONDecodeError:
logger.error(f"getUserDetailInfo: JSON decode error (Status: {res.status_code})")
except Exception as e:
end = time.time() # 结束时间
if end - start > self.timeout:
print("[ 提示 ]:重复请求该接口" + str(self.timeout) + "s, 仍然未获取到数据")
return datadict
logger.error(f"getUserDetailInfo: Exception: {e}")
end = time.time() # 结束时间
if end - start > self.timeout:
print("[ 提示 ]:重复请求该接口" + str(self.timeout) + "s, 仍然未获取到数据")
return datadict
time.sleep(1) # 增加小延迟避免请求过快
if __name__ == "__main__":