54 lines
1.9 KiB
Python
54 lines
1.9 KiB
Python
|
||
import sys
|
||
import os
|
||
import re
|
||
|
||
# Mock class since we can't import easily without full env
|
||
class DouYinDownloaderMock:
|
||
def extract_title_from_text(self, text: str) -> str:
|
||
"""
|
||
Extract title from share text by removing URLs and common prefixes
|
||
"""
|
||
# 1. Remove URLs
|
||
clean_text = re.sub(r'http[s]?://\S+', '', text)
|
||
|
||
# 2. Remove "Copy open Douyin..." prefix patterns
|
||
# Example: "3.00 12/28 d@n.dN VYZ:/ 复制打开抖音极速版,看看【聚合能研的作品】..."
|
||
# Pattern: Any chars + "复制打开抖音" + any chars + ",看看"
|
||
clean_text = re.sub(r'.*?复制打开抖音.*?,看看', '', clean_text)
|
||
|
||
# 3. Remove 【...】 if it's at the start (usually author name)
|
||
clean_text = re.sub(r'^\s*【.*?】', '', clean_text)
|
||
|
||
# 4. Clean up whitespace
|
||
clean_text = clean_text.strip()
|
||
|
||
return clean_text if clean_text else "Unknown Title"
|
||
|
||
def test():
|
||
downloader = DouYinDownloaderMock()
|
||
|
||
# Case 1: User example
|
||
text1 = "3.00 12/28 d@n.dN VYZ:/ 复制打开抖音极速版,看看【聚合能研的作品】2026年,电力市场的 “大洗牌” 正式开始 告别... https://v.douyin.com/gHWfWVgDVRo/"
|
||
title1 = downloader.extract_title_from_text(text1)
|
||
print(f"Input 1: {text1}")
|
||
print(f"Title 1: {title1}")
|
||
print("-" * 20)
|
||
|
||
# Case 2: Pure text
|
||
text2 = "2026年,电力市场的 “大洗牌” 正式开始 告别..."
|
||
title2 = downloader.extract_title_from_text(text2)
|
||
print(f"Input 2: {text2}")
|
||
print(f"Title 2: {title2}")
|
||
print("-" * 20)
|
||
|
||
# Case 3: Text with URL only
|
||
text3 = "Check this out https://v.douyin.com/abc/"
|
||
title3 = downloader.extract_title_from_text(text3)
|
||
print(f"Input 3: {text3}")
|
||
print(f"Title 3: {title3}")
|
||
print("-" * 20)
|
||
|
||
if __name__ == "__main__":
|
||
test()
|