'commit'
This commit is contained in:
176
DouYin/Test/T1_GetCookie.py
Normal file
176
DouYin/Test/T1_GetCookie.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""
|
||||
Douyin Cookie Fetcher (T1)
|
||||
用途: 模拟打开浏览器,用户扫码登录后自动获取 Cookie 并保存到 config_douyin.yml
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import yaml
|
||||
import asyncio
|
||||
import io
|
||||
from pathlib import Path
|
||||
|
||||
# 强制设置控制台输出编码为 UTF-8,解决 Windows 环境下的乱码问题
|
||||
if sys.platform.startswith('win'):
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||||
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
|
||||
|
||||
# =================================================================
|
||||
# 1. 环境配置与路径初始化
|
||||
# =================================================================
|
||||
current_file_path = os.path.abspath(__file__)
|
||||
current_dir = os.path.dirname(current_file_path)
|
||||
config_path = os.path.join(current_dir, "config_douyin.yml")
|
||||
|
||||
# =================================================================
|
||||
# 2. Cookie 过滤与更新逻辑
|
||||
# =================================================================
|
||||
|
||||
# 我们关注的核心 Cookie 键名
|
||||
# 进一步扩大范围以包含所有可能的校验 Cookie
|
||||
REQUIRED_KEYS = {
|
||||
"msToken", "ttwid", "odin_tt", "passport_csrf_token", "sid_guard",
|
||||
"sessionid", "sid_tt", "uid_tt", "uid_tt_ss", "sid_ucp_v1",
|
||||
"ssid_ucp_v1", "n_sdk_version", "s_v_web_id", "webid",
|
||||
"__ac_nonce", "__ac_signature", "fp", "device_web_cpu_core",
|
||||
"device_web_memory_size", "architecture", "hevc_supported"
|
||||
}
|
||||
|
||||
def update_config_yaml(new_cookies):
|
||||
"""将获取到的 Cookie 更新到 config_douyin.yml"""
|
||||
config_data = {}
|
||||
|
||||
# 1. 读取现有配置
|
||||
if os.path.exists(config_path):
|
||||
try:
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
config_data = yaml.safe_load(f) or {}
|
||||
except Exception as e:
|
||||
print(f"[ Warning ]: Failed to read existing config: {e}")
|
||||
|
||||
# 2. 更新 cookies 字段
|
||||
if "cookies" not in config_data:
|
||||
config_data["cookies"] = {}
|
||||
|
||||
# 过滤出我们需要的键
|
||||
filtered_cookies = {k: v for k, v in new_cookies.items() if k in REQUIRED_KEYS}
|
||||
config_data["cookies"].update(filtered_cookies)
|
||||
|
||||
# 同时更新全局 cookie 字符串 - 包含所有捕获到的 cookie 以确保完整性
|
||||
cookie_str = "; ".join(f"{k}={v}" for k, v in new_cookies.items())
|
||||
config_data["cookie"] = cookie_str
|
||||
|
||||
# 3. 写回文件
|
||||
try:
|
||||
with open(config_path, 'w', encoding='utf-8') as f:
|
||||
yaml.dump(config_data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
|
||||
print(f"\n[ Success ]: Cookie updated to: {config_path}")
|
||||
print(f"[ Info ]: Captured {len(new_cookies)} total cookies, {len(filtered_cookies)} essential keys saved.")
|
||||
except Exception as e:
|
||||
print(f"[ Error ]: Failed to write config: {e}")
|
||||
|
||||
# =================================================================
|
||||
# 3. Playwright 自动化逻辑
|
||||
# =================================================================
|
||||
|
||||
async def main():
|
||||
try:
|
||||
from playwright.async_api import async_playwright
|
||||
except ImportError:
|
||||
print("[ Error ]: Playwright not installed. Run: pip install playwright && playwright install chromium")
|
||||
return
|
||||
|
||||
print("\n" + "="*60)
|
||||
print(" Douyin Login & Cookie Fetcher")
|
||||
print("="*60)
|
||||
print("[ Instructions ]:")
|
||||
print("1. Browser will open and navigate to Douyin.")
|
||||
print("2. Please login via QR code scanning.")
|
||||
print("3. After login, return here and press [ENTER] to save cookies.")
|
||||
print("="*60 + "\n")
|
||||
|
||||
async with async_playwright() as p:
|
||||
# 启动浏览器
|
||||
browser = await p.chromium.launch(headless=False)
|
||||
context = await browser.new_context(
|
||||
viewport={'width': 1280, 'height': 800},
|
||||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
|
||||
)
|
||||
page = await context.new_page()
|
||||
|
||||
# 跳转到抖音
|
||||
# 优化: 使用 'domcontentloaded' 提高在弱网环境下的加载成功率,并增加超时到 90s
|
||||
print("[ Navigating ]: https://www.douyin.com/ ...")
|
||||
try:
|
||||
await page.goto("https://www.douyin.com/", wait_until="domcontentloaded", timeout=90000)
|
||||
except Exception as e:
|
||||
print(f"[ Warning ]: Initial navigation timed out or failed: {e}")
|
||||
print("[ Info ]: Will continue anyway, please check if the page is visible.")
|
||||
|
||||
# =================================================================
|
||||
# 自动检测登录 + 手动回车双重保险
|
||||
# =================================================================
|
||||
print("\n" + "-"*40)
|
||||
print("[ Waiting ]: Please complete the QR login in the browser.")
|
||||
print("[ Auto-Detect ]: The script will automatically proceed if login is detected.")
|
||||
print("[ Manual ]: If auto-detect fails, type 'go' and press [ENTER] here.")
|
||||
print("-"*40 + "\n")
|
||||
|
||||
# 尝试自动检测登录成功的元素(比如头像或“发布视频”按钮)
|
||||
login_detected = False
|
||||
for _ in range(120): # 最多等待 120 秒
|
||||
try:
|
||||
# 检查是否存在登录后的特有元素
|
||||
# .dy-avatar 是头像,.upload-video-text 是发布视频按钮
|
||||
if await page.query_selector('.dy-avatar') or await page.query_selector('text=发布视频'):
|
||||
print("[ Success ]: Login detected automatically!")
|
||||
login_detected = True
|
||||
break
|
||||
except:
|
||||
pass
|
||||
await asyncio.sleep(1)
|
||||
|
||||
if not login_detected:
|
||||
# 如果自动检测没成功,再尝试手动输入
|
||||
print("[ Timeout ]: Auto-detection timed out. Please ensure you are logged in.")
|
||||
print("[ Action ]: Type anything and press [ENTER] to force capture cookies:")
|
||||
await asyncio.to_thread(sys.stdin.readline)
|
||||
|
||||
# 获取所有 Cookie
|
||||
all_cookies = await context.cookies()
|
||||
cookie_dict = {c['name']: c['value'] for c in all_cookies}
|
||||
|
||||
# 尝试捕获动态参数 msToken (如果存在于页面中)
|
||||
try:
|
||||
# 1. 尝试从 localStorage 获取
|
||||
ms_token = await page.evaluate("() => window.localStorage.getItem('msToken') || ''")
|
||||
|
||||
# 2. 如果没获取到,尝试从所有 Cookie 中找最新的 msToken
|
||||
if not ms_token:
|
||||
ms_token = cookie_dict.get('msToken', '')
|
||||
|
||||
# 3. 尝试从页面全局变量中寻找
|
||||
if not ms_token:
|
||||
ms_token = await page.evaluate("() => window._ROUTER_DATA?.msToken || ''")
|
||||
|
||||
if ms_token:
|
||||
cookie_dict['msToken'] = ms_token
|
||||
print(f"[ Info ]: Captured msToken: {ms_token[:20]}...")
|
||||
else:
|
||||
print("[ Warning ]: msToken not found in common locations.")
|
||||
except Exception as e:
|
||||
print(f"[ Debug ]: Error capturing msToken: {e}")
|
||||
|
||||
# 关闭浏览器
|
||||
await context.close()
|
||||
await browser.close()
|
||||
|
||||
# 更新配置文件
|
||||
if cookie_dict:
|
||||
update_config_yaml(cookie_dict)
|
||||
else:
|
||||
print("[ Failed ]: No cookies captured. Did you login successfully?")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
203
DouYin/Test/T2_BigV.py
Normal file
203
DouYin/Test/T2_BigV.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"""
|
||||
Douyin User Profile & Video Crawler (T2)
|
||||
访问注释中的页面,获取页面中的内容信息(用户信息及最近作品)
|
||||
|
||||
URL: https://www.douyin.com/user/MS4wLjABAAAA2P7MeZl0VUsDmCzKbZeLlVGVTDRAuXmvr_zcC6XNqd-6R4n9ssCguSgA-gnBfjUO
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import yaml
|
||||
import time
|
||||
import io
|
||||
from datetime import datetime
|
||||
|
||||
# 强制设置控制台输出编码为 UTF-8,解决 Windows 环境下的乱码问题
|
||||
if sys.platform.startswith('win'):
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||||
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
|
||||
|
||||
# =================================================================
|
||||
# 1. 环境配置与路径初始化
|
||||
# =================================================================
|
||||
current_file_path = os.path.abspath(__file__)
|
||||
current_dir = os.path.dirname(current_file_path)
|
||||
douyin_root = os.path.dirname(current_dir)
|
||||
|
||||
# 将 DouYin 根目录添加到 sys.path
|
||||
if douyin_root not in sys.path:
|
||||
sys.path.append(douyin_root)
|
||||
|
||||
# 尝试导入核心爬虫模块
|
||||
try:
|
||||
from apiproxy.douyin.douyin import Douyin
|
||||
from apiproxy.douyin import douyin_headers
|
||||
except ImportError as e:
|
||||
print(f"[ Error ]: Failed to import core modules. Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# =================================================================
|
||||
# 2. 工具函数定义
|
||||
# =================================================================
|
||||
|
||||
def extract_url_from_file(file_path):
|
||||
"""从文件头部的注释中提取抖音用户 URL"""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
urls = re.findall(r'https?://(?:www\.)?douyin\.com/user/[a-zA-Z0-9\-_]+', content)
|
||||
return urls[0] if urls else None
|
||||
except Exception as e:
|
||||
print(f"[ Error ]: Failed to read script file: {e}")
|
||||
return None
|
||||
|
||||
def load_config():
|
||||
"""从 Test 目录或项目根目录加载 config_douyin.yml 配置"""
|
||||
# 优先检查当前 Test 目录
|
||||
test_config_path = os.path.join(current_dir, "config_douyin.yml")
|
||||
# 其次检查 DouYin 根目录
|
||||
root_config_path = os.path.join(douyin_root, "config_douyin.yml")
|
||||
|
||||
config_path = test_config_path if os.path.exists(test_config_path) else root_config_path
|
||||
|
||||
if os.path.exists(config_path):
|
||||
try:
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
print(f"[ Info ]: Loading config from: {os.path.abspath(config_path)}")
|
||||
return yaml.safe_load(f)
|
||||
except Exception as e:
|
||||
print(f"[ Warning ]: Failed to parse config: {e}")
|
||||
else:
|
||||
print(f"[ Warning ]: config_douyin.yml not found.")
|
||||
return {}
|
||||
|
||||
def setup_cookies(config):
|
||||
"""设置 Douyin 请求所需的 Cookie"""
|
||||
# 优先使用完整的全局 cookie 字符串
|
||||
cookie_str = config.get("cookie", "")
|
||||
cookies_dict = config.get("cookies", {})
|
||||
|
||||
if cookie_str:
|
||||
# 如果有 raw cookie,确保 msToken 也被包含进去(如果 dict 中有的话)
|
||||
if cookies_dict.get("msToken") and "msToken=" not in cookie_str:
|
||||
cookie_str = f"msToken={cookies_dict['msToken']}; " + cookie_str
|
||||
douyin_headers["Cookie"] = cookie_str
|
||||
print(f"[ Info ]: Using raw cookie string (Length: {len(cookie_str)})")
|
||||
elif cookies_dict:
|
||||
cookie_str = "; ".join(f"{k}={v}" for k, v in cookies_dict.items())
|
||||
douyin_headers["Cookie"] = cookie_str
|
||||
print(f"[ Info ]: Using combined cookies from dict (Count: {len(cookies_dict)})")
|
||||
|
||||
if not douyin_headers.get("Cookie"):
|
||||
print("[ Warning ]: No valid Cookie detected. Most API calls will fail.")
|
||||
print(" Please run T1_GetCookie.py first.")
|
||||
|
||||
# =================================================================
|
||||
# 3. 核心爬取逻辑
|
||||
# =================================================================
|
||||
|
||||
def crawl_user_profile(url):
|
||||
"""获取用户信息及其最近的作品列表"""
|
||||
dy = Douyin()
|
||||
# 增加接口重试的超时时间
|
||||
dy.timeout = 30
|
||||
|
||||
print(f"[ Start ]: Target URL: {url}")
|
||||
|
||||
# 步骤 1: 解析 URL 获取 sec_uid
|
||||
print("[ Step 1/2 ]: Fetching blogger profile...")
|
||||
try:
|
||||
# 使用 getKey 获取 sec_uid
|
||||
key_type, sec_uid = dy.getKey(url)
|
||||
if key_type != "user" or not sec_uid:
|
||||
print(f"[ Failed ]: Invalid URL or failed to parse sec_uid. Type: {key_type}")
|
||||
return
|
||||
|
||||
print(f" (sec_uid: {sec_uid[:15]}...)")
|
||||
|
||||
# 获取详细信息
|
||||
# 现在核心类 Douyin 已更新,会自动从 Cookie 中提取并拼接 msToken
|
||||
print(f"[ Step 1/2 ]: Fetching blogger profile...")
|
||||
user_detail = dy.getUserDetailInfo(sec_uid)
|
||||
|
||||
if not user_detail or user_detail.get("status_code") != 0:
|
||||
print(f"[ Failed ]: Failed to fetch profile. Status: {user_detail.get('status_code') if user_detail else 'None'}")
|
||||
if user_detail:
|
||||
print(f" Msg: {user_detail.get('status_msg', 'Unknown error')}")
|
||||
print(" Hint: Please check if Cookie is expired or network is restricted.")
|
||||
return
|
||||
except Exception as e:
|
||||
print(f"[ Error ]: An unexpected error occurred: {e}")
|
||||
return
|
||||
|
||||
user_data = user_detail.get("user", {})
|
||||
nickname = user_data.get('nickname', 'Unknown')
|
||||
|
||||
# 打印博主信息
|
||||
print("\n" + "="*60)
|
||||
print(f"Blogger: {nickname}")
|
||||
print("-" * 60)
|
||||
print(f"ID: {user_data.get('unique_id') or user_data.get('short_id', 'Unknown')}")
|
||||
print(f"Bio: {user_data.get('signature', 'N/A')}")
|
||||
print(f"Followers:{user_data.get('m_follower_count') or user_data.get('follower_count', 0)}")
|
||||
print(f"Likes: {user_data.get('total_favorited', 0)}")
|
||||
print(f"Following:{user_data.get('following_count', 0)}")
|
||||
print("="*60 + "\n")
|
||||
|
||||
# 步骤 3: 获取最近的作品列表
|
||||
print(f"[ Step 2/2 ]: Fetching recent works...")
|
||||
aweme_list = dy.getUserInfo(sec_uid, count=10)
|
||||
|
||||
result_data = {
|
||||
"user_info": user_data,
|
||||
"recent_videos": aweme_list or [],
|
||||
"crawl_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
}
|
||||
|
||||
if aweme_list:
|
||||
print(f"Successfully fetched {len(aweme_list)} videos:\n")
|
||||
for i, aweme in enumerate(aweme_list, 1):
|
||||
ctime = aweme.get('create_time')
|
||||
if isinstance(ctime, (int, float)):
|
||||
ctime_str = datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M')
|
||||
else:
|
||||
ctime_str = str(ctime)
|
||||
|
||||
desc = aweme.get('desc', 'No Title')
|
||||
desc = (desc[:47] + "...") if len(desc) > 50 else desc
|
||||
|
||||
stats = aweme.get('statistics', {})
|
||||
print(f"{i:02d}. [{ctime_str}] {desc}")
|
||||
print(f" ❤️ {stats.get('digg_count', 0):<8} 💬 {stats.get('comment_count', 0):<8} ⭐ {stats.get('collect_count', 0)}")
|
||||
print(f" 🔗 https://www.douyin.com/video/{aweme.get('aweme_id')}\n")
|
||||
else:
|
||||
print("[ Info ]: No public videos found. Account might be private or API limited.")
|
||||
|
||||
# 步骤 4: 保存数据
|
||||
output_file = os.path.join(current_dir, f"user_data_{sec_uid[:8]}.json")
|
||||
try:
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(result_data, f, ensure_ascii=False, indent=4)
|
||||
print(f"[ Success ]: Data saved to: {os.path.basename(output_file)}")
|
||||
except Exception as e:
|
||||
print(f"[ Warning ]: Failed to save data: {e}")
|
||||
|
||||
# =================================================================
|
||||
# 4. 主入口
|
||||
# =================================================================
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 提取 URL
|
||||
target_url = extract_url_from_file(current_file_path)
|
||||
|
||||
if target_url:
|
||||
# 配置环境
|
||||
config = load_config()
|
||||
setup_cookies(config)
|
||||
|
||||
# 执行爬取
|
||||
crawl_user_profile(target_url)
|
||||
else:
|
||||
print("[ Error ]: No valid Douyin URL found in file comments.")
|
||||
16
DouYin/Test/config_douyin.yml
Normal file
16
DouYin/Test/config_douyin.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
cookie: __ac_nonce=069a13bcc00cacbb09800; __ac_signature=_02B4Z6wo00f01EiD5NQAAIDB-WNazgGqWRxIo8BAAHuJ33;
|
||||
ttwid=1%7CxZrCypV7044-lvm6_oK7nlsTEtEn-H_yJXdZRYjRQg0%7C1772174285%7C38825b7026cf7c8724e4070f81748a477168eb58988b8e09b67142bc6441a676;
|
||||
enter_pc_once=1; UIFID_TEMP=630dc87f7218843564944b22829d362b9fabe9a9e3376a5c74988083749b66dffd3737bb90e0acf82e6707c6ee24f7522fbefdc12d5a2fed2eae91b09db43c67688ba5ad520d1d6f6b294f7ef3c3442efbc0a4ff34b6c60edb3ff06d5e120b4a2376474bd59208bc08755ec54934cb80;
|
||||
x-web-secsdk-uid=9d7866f1-5873-4d35-8b7d-7d0117b45b3f; s_v_web_id=verify_mm4ital2_ERFXSBBL_FxRH_4tEN_A3jj_V3VIL6WSh4u6;
|
||||
=douyin.com; device_web_cpu_core=20; device_web_memory_size=8; architecture=amd64;
|
||||
hevc_supported=true; IsDouyinActive=true; home_can_add_dy_2_desktop=%220%22; dy_swidth=1280;
|
||||
dy_sheight=800; stream_recommend_feed_params=%22%7B%5C%22cookie_enabled%5C%22%3Atrue%2C%5C%22screen_width%5C%22%3A1280%2C%5C%22screen_height%5C%22%3A800%2C%5C%22browser_online%5C%22%3Atrue%2C%5C%22cpu_core_num%5C%22%3A20%2C%5C%22device_memory%5C%22%3A8%2C%5C%22downlink%5C%22%3A10%2C%5C%22effective_type%5C%22%3A%5C%224g%5C%22%2C%5C%22round_trip_time%5C%22%3A0%7D%22
|
||||
cookies:
|
||||
msToken: my7nuKyrpTVEWOX-n62wR8I5EcvoMKBmvsBMnODLOtG3sn6AsR7q_jEM5jmEenyuwmHpsL25b84VhGcR4nUgv0PepA2zrSUOGHCmZVzpauYpRgbR9svMKjt2-AgNRz
|
||||
ttwid: 1%7CxZrCypV7044-lvm6_oK7nlsTEtEn-H_yJXdZRYjRQg0%7C1772174285%7C38825b7026cf7c8724e4070f81748a477168eb58988b8e09b67142bc6441a676
|
||||
s_v_web_id: verify_mm4ital2_ERFXSBBL_FxRH_4tEN_A3jj_V3VIL6WSh4u6
|
||||
odin_tt: 6f33402fa0952cdea7eaa5226bfe0a2a45ee10bbb138835da6a5383e9eef092f7ad0a1790c0271a090f72c8112875fef3665d50460b79ec302ba56c2b91f52b3bcab4b209cce3f4b7378f995b01a5cce
|
||||
path: ./Downloaded/
|
||||
thread: 5
|
||||
number:
|
||||
post: 10
|
||||
1935
DouYin/Test/user_data_MS4wLjAB.json
Normal file
1935
DouYin/Test/user_data_MS4wLjAB.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -2,4 +2,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
|
||||
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
|
||||
@@ -9,10 +9,10 @@ douyin_headers = {
|
||||
'referer': 'https://www.douyin.com/',
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'accept-encoding': 'gzip, deflate, br',
|
||||
'accept-encoding': 'gzip, deflate',
|
||||
'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"macOS"',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-origin'
|
||||
|
||||
@@ -165,7 +165,7 @@ class Douyin(object):
|
||||
# 单作品接口返回 'aweme_detail'
|
||||
# 主页作品接口返回 'aweme_list'->['aweme_detail']
|
||||
# 更新API参数以适应最新接口要求
|
||||
detail_params = f'aweme_id={aweme_id}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Mac&os_version=10.15.7&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50&update_version_code=170400'
|
||||
detail_params = f'aweme_id={aweme_id}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Windows&os_version=10&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50&update_version_code=170400'
|
||||
jx_url = self.urls.POST_DETAIL + utils.getXbogus(detail_params)
|
||||
|
||||
response = requests.get(url=jx_url, headers=douyin_headers, timeout=10)
|
||||
@@ -280,7 +280,15 @@ class Douyin(object):
|
||||
while True:
|
||||
try:
|
||||
# 构建请求URL - 添加更多必需参数
|
||||
base_params = f'sec_user_id={sec_uid}&count={count}&max_cursor={max_cursor}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Mac&os_version=10.15.7&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
|
||||
base_params = f'sec_user_id={sec_uid}&count={count}&max_cursor={max_cursor}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Windows&os_version=10&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
|
||||
|
||||
# 尝试从 cookie 中提取 msToken
|
||||
cookie_str = douyin_headers.get('Cookie', '')
|
||||
if 'msToken=' in cookie_str:
|
||||
import re
|
||||
ms_token_match = re.search(r'msToken=([^;]+)', cookie_str)
|
||||
if ms_token_match:
|
||||
base_params += f"&msToken={ms_token_match.group(1)}"
|
||||
|
||||
if mode == "post":
|
||||
url = self.urls.USER_POST + utils.getXbogus(base_params)
|
||||
@@ -415,7 +423,7 @@ class Douyin(object):
|
||||
while True:
|
||||
# 接口不稳定, 有时服务器不返回数据, 需要重新获取
|
||||
try:
|
||||
live_params = f'aid=6383&device_platform=web&web_rid={web_rid}&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Mac&os_version=10.15.7&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
|
||||
live_params = f'aid=6383&device_platform=web&web_rid={web_rid}&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Windows&os_version=10&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
|
||||
live_api = self.urls.LIVE + utils.getXbogus(live_params)
|
||||
|
||||
response = requests.get(live_api, headers=douyin_headers)
|
||||
@@ -870,19 +878,39 @@ class Douyin(object):
|
||||
while True:
|
||||
# 接口不稳定, 有时服务器不返回数据, 需要重新获取
|
||||
try:
|
||||
user_detail_params = f'sec_user_id={sec_uid}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Mac&os_version=10.15.7&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
|
||||
user_detail_params = f'sec_user_id={sec_uid}&device_platform=webapp&aid=6383&channel=channel_pc_web&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=122.0.0.0&browser_online=true&engine_name=Blink&engine_version=122.0.0.0&os_name=Windows&os_version=10&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50'
|
||||
|
||||
# 尝试从 cookie 中提取 msToken
|
||||
cookie_str = douyin_headers.get('Cookie', '')
|
||||
if 'msToken=' in cookie_str:
|
||||
import re
|
||||
ms_token_match = re.search(r'msToken=([^;]+)', cookie_str)
|
||||
if ms_token_match:
|
||||
user_detail_params += f"&msToken={ms_token_match.group(1)}"
|
||||
|
||||
url = self.urls.USER_DETAIL + utils.getXbogus(user_detail_params)
|
||||
|
||||
res = requests.get(url=url, headers=douyin_headers)
|
||||
datadict = json.loads(res.text)
|
||||
|
||||
if datadict is not None and datadict["status_code"] == 0:
|
||||
return datadict
|
||||
res = requests.get(url=url, headers=douyin_headers, timeout=10)
|
||||
|
||||
if not res.text.strip():
|
||||
logger.warning(f"getUserDetailInfo: Empty response (Status: {res.status_code})")
|
||||
else:
|
||||
try:
|
||||
datadict = json.loads(res.text)
|
||||
if datadict is not None and datadict.get("status_code") == 0:
|
||||
return datadict
|
||||
else:
|
||||
logger.warning(f"getUserDetailInfo: API error (Status: {datadict.get('status_code')}, Msg: {datadict.get('status_msg')})")
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"getUserDetailInfo: JSON decode error (Status: {res.status_code})")
|
||||
except Exception as e:
|
||||
end = time.time() # 结束时间
|
||||
if end - start > self.timeout:
|
||||
print("[ 提示 ]:重复请求该接口" + str(self.timeout) + "s, 仍然未获取到数据")
|
||||
return datadict
|
||||
logger.error(f"getUserDetailInfo: Exception: {e}")
|
||||
|
||||
end = time.time() # 结束时间
|
||||
if end - start > self.timeout:
|
||||
print("[ 提示 ]:重复请求该接口" + str(self.timeout) + "s, 仍然未获取到数据")
|
||||
return datadict
|
||||
time.sleep(1) # 增加小延迟避免请求过快
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user