Files
aiData/DouYin/cleanup_bad_files.py
HuangHai f2f7a38210 'commit'
2026-01-20 19:06:36 +08:00

94 lines
2.9 KiB
Python

import os
import subprocess
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("Cleanup")
def check_file(filepath):
try:
# Check streams
cmd = [
"ffprobe", "-v", "error",
"-show_entries", "stream=codec_type,codec_name",
"-of", "default=noprint_wrappers=1:nokey=1",
filepath
]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
lines = result.stdout.strip().split('\n')
has_video = False
is_hevc = False
# ffprobe output alternates: codec_type then codec_name (or vice versa depending on -show_entries order?)
# Actually -show_entries stream=codec_type,codec_name output is like:
# audio
# aac
# video
# h264
# Let's parse robustly
content = result.stdout
if "video" in content:
has_video = True
if "hevc" in content:
is_hevc = True
return has_video, is_hevc
except Exception as e:
logger.error(f"Error checking {filepath}: {e}")
return True, False # Assume ok to avoid deleting good files on error
def convert_to_h264(filepath):
try:
directory = os.path.dirname(filepath)
filename = os.path.basename(filepath)
name, ext = os.path.splitext(filename)
temp_filepath = os.path.join(directory, f"{name}_temp{ext}")
logger.info(f"Converting {filename} to H.264...")
cmd = [
"ffmpeg", "-i", filepath,
"-c:v", "libx264", "-c:a", "copy",
"-y", temp_filepath
]
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
os.remove(filepath)
os.rename(temp_filepath, filepath)
logger.info(f"Converted: {filename}")
return True
except Exception as e:
logger.error(f"Failed to convert {filepath}: {e}")
if os.path.exists(temp_filepath):
os.remove(temp_filepath)
return False
def main():
directory = r"d:\dsWork\aiData\DouYin\DownloadedVideos"
files = [f for f in os.listdir(directory) if f.endswith(".mp4")]
for filename in files:
filepath = os.path.join(directory, filename)
has_video, is_hevc = check_file(filepath)
if not has_video:
logger.warning(f"[DELETE] Audio only (no video stream): {filename}")
try:
os.remove(filepath)
except Exception as e:
logger.error(f"Failed to delete {filename}: {e}")
elif is_hevc:
logger.info(f"[CONVERT] HEVC detected: {filename}")
convert_to_h264(filepath)
else:
# logger.info(f"[OK] {filename}")
pass
if __name__ == "__main__":
main()