36 lines
1.2 KiB
Python
36 lines
1.2 KiB
Python
|
|
import requests
|
|
import re
|
|
import json
|
|
|
|
def test_html():
|
|
url = "https://www.douyin.com/video/7592981059516583202"
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
|
|
'Referer': 'https://www.douyin.com/',
|
|
'Cookie': 's_v_web_id=verify_...; ttwid=...' # I might need real cookies
|
|
}
|
|
|
|
try:
|
|
res = requests.get(url, headers=headers, timeout=10)
|
|
print(f"Status Code: {res.status_code}")
|
|
|
|
if res.status_code == 200:
|
|
# Look for RENDER_DATA
|
|
match = re.search(r'<script id="RENDER_DATA" type="application/json">(.+?)</script>', res.text)
|
|
if match:
|
|
print("Found RENDER_DATA!")
|
|
data = json.loads(requests.utils.unquote(match.group(1)))
|
|
print(data.keys())
|
|
else:
|
|
print("RENDER_DATA not found.")
|
|
# Save to file to inspect
|
|
with open("debug_douyin.html", "w", encoding="utf-8") as f:
|
|
f.write(res.text)
|
|
print("Saved to debug_douyin.html")
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
test_html()
|