Files
aiData/DouYin/batch_download.py
HuangHai f2f7a38210 'commit'
2026-01-20 19:06:36 +08:00

103 lines
3.7 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import time
import re
from VideoDownloader import VideoDownloader
def main():
url_file = r"d:\dsWork\aiData\DouYin\Url.txt"
save_dir = r"d:\dsWork\aiData\DouYin\DownloadedVideos"
if not os.path.exists(url_file):
print(f"Error: File not found: {url_file}")
return
if not os.path.exists(save_dir):
os.makedirs(save_dir)
with VideoDownloader() as downloader:
with open(url_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
print(f"Found {len(lines)} lines in {url_file}")
count = 0
failed_urls = []
# Log to file
log_file = open(os.path.join(save_dir, "batch_log.txt"), "a", encoding="utf-8")
log_file.write(f"\n--- Batch Download Started at {time.strftime('%Y-%m-%d %H:%M:%S')} ---\n")
try:
for i, line in enumerate(lines):
line = line.strip()
if not line:
continue
msg = f"[{i+1}/{len(lines)}] Processing line..."
print(msg)
log_file.write(msg + "\n")
log_file.flush()
# Extract URL using regex (matches https://v.douyin.com/...)
match = re.search(r'https?://v\.douyin\.com/[a-zA-Z0-9\-_]+/?', line)
if match:
url = match.group(0)
msg = f" Found URL: {url}"
print(msg)
log_file.write(msg + "\n")
log_file.flush()
success = False
# Retry logic
for attempt in range(3):
try:
success = downloader.download(url, save_dir)
if success:
break
msg = f" Attempt {attempt+1} failed. Retrying in 2s..."
print(msg)
log_file.write(msg + "\n")
log_file.flush()
time.sleep(2)
except Exception as e:
msg = f" Error on attempt {attempt+1}: {e}"
print(msg)
log_file.write(msg + "\n")
log_file.flush()
time.sleep(2)
if success:
count += 1
log_file.write(f" SUCCESS: {url}\n")
else:
msg = f" FAILED to download: {url}"
print(msg)
failed_urls.append(url)
log_file.write(msg + "\n")
# Sleep to be nice
time.sleep(1)
else:
msg = f" No valid URL found in line: {line[:50]}..."
print(msg)
log_file.write(msg + "\n")
except Exception as e:
msg = f"CRITICAL ERROR in batch loop: {e}"
print(msg)
log_file.write(msg + "\n")
finally:
log_file.write(f"--- Batch Download Ended at {time.strftime('%Y-%m-%d %H:%M:%S')} ---\n")
log_file.close()
print(f"Batch download completed. Successfully downloaded {count} videos.")
if failed_urls:
print(f"Failed to download {len(failed_urls)} videos:")
for u in failed_urls:
print(f" {u}")
if __name__ == "__main__":
main()