This commit is contained in:
HuangHai
2026-01-18 18:59:17 +08:00
parent 155d44ff4f
commit 6655e0cc29
26 changed files with 199 additions and 69 deletions

View File

@@ -15,7 +15,7 @@ if project_root not in sys.path:
import uiautomator2 as u2
from Apps.AiTeJiYiChong import Kit
from Apps.AiTeJiYiChong.Kit import take_screenshot, setup_logger
from Apps.AiTeJiYiChong.Kit import take_screenshot, setup_logger, get_name_md5
from Apps.AiTeJiYiChong.ReadImageKit import ReadImageKit
from Apps.AiTeJiYiChong.FirstPageKit import run_ocr_rect
from Util.RedisKit import RedisKit
@@ -177,7 +177,7 @@ async def get_station_list(d, service, max_stations_count=MAX_STATIONS_COUNT):
should_back_to_list = True
detail_uuid = f"detail_{station_name}_{image_uuid}"
detail_uuid = f"detail_{get_name_md5(station_name)}_{image_uuid}"
detail_path = take_screenshot(d, detail_uuid, save_dir=TEMP_IMAGE_DIR)
logger.info(f"已启动后台分析详情页: {station_name}")
@@ -268,7 +268,7 @@ async def get_station_list(d, service, max_stations_count=MAX_STATIONS_COUNT):
logger.info("正在从顶部开始向下逐页截图...")
max_scroll_down_pages = 10
for p_idx in range(1, max_scroll_down_pages + 1):
p_shot = take_screenshot(d, f"price_scroll_{p_idx}_{station_name}", save_dir=TEMP_IMAGE_DIR)
p_shot = take_screenshot(d, f"price_scroll_{p_idx}_{get_name_md5(station_name)}", save_dir=TEMP_IMAGE_DIR)
before_dn_md5 = Kit.get_image_content_md5(p_shot)
d.swipe_ext("up", scale=0.8)

View File

@@ -35,6 +35,13 @@ def get_file_md5(file_path):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def get_name_md5(name):
if not name:
return "unknown"
if not isinstance(name, str):
name = str(name)
return hashlib.md5(name.encode("utf-8")).hexdigest()
def get_image_content_md5(file_path, top_ratio=0.1, bottom_ratio=0.1):
"""

View File

@@ -7,7 +7,7 @@ import json
import cv2
from Apps.TeLaiDian.Kit import (
take_screenshot, get_image_content_md5, clean_station_name,
setup_logger, read_image, save_image, detect_warm_popup_xczs_cv
setup_logger, read_image, save_image, detect_warm_popup_xczs_cv, get_name_md5
)
from Apps.TeLaiDian.ReadImageKit import ReadImageKit
from Apps.TeLaiDian.FirstPageKit import run_ocr_rect
@@ -208,6 +208,8 @@ class TeLaiDianCrawler(BaseCrawler):
remaining = max_to_crawl - current_idx
logger.info(f"--- [进度: {current_idx}/{max_to_crawl}, 剩余: {remaining}] 处理场站: {name} (坐标: {point}, 距离: {distance}) ---")
file_tag = get_name_md5(name)
# 组装基础场站信息,便于详情页逻辑使用
station_info = {
"name": name,
@@ -223,7 +225,7 @@ class TeLaiDianCrawler(BaseCrawler):
await asyncio.sleep(WAIT_DETAIL_PAGE_LOAD)
# 截图验证是否进入详情页
detail_check_path = take_screenshot(d, f"tld_detail_check_{int(time.time())}.jpg")
detail_check_path = take_screenshot(d, f"tld_detail_check_{file_tag}_{int(time.time())}.jpg")
logger.info(f"详情页快照已保存: {detail_check_path}")
# 简单验证:如果标题包含 "我的卡券"、"优惠券"、"新人福利" 等,说明点错了
@@ -300,7 +302,7 @@ class TeLaiDianCrawler(BaseCrawler):
"""
在详情页提取价格和状态信息
"""
first_screen_path = take_screenshot(d, f"tld_detail_basic_{int(time.time())}.jpg")
first_screen_path = take_screenshot(d, f"tld_detail_basic_{get_name_md5(station_name)}_{int(time.time())}.jpg")
station_name = station_info.get("name")
address = station_info.get("address")
distance = station_info.get("distance")
@@ -456,7 +458,7 @@ class TeLaiDianCrawler(BaseCrawler):
max_scroll_down_pages = 8
for p_idx in range(1, max_scroll_down_pages + 1):
# 截图当前页
p_shot = take_screenshot(d, f"tld_detail_price_{int(time.time())}_{p_idx}.jpg")
p_shot = take_screenshot(d, f"tld_detail_price_{get_name_md5(station_name)}_{int(time.time())}_{p_idx}.jpg")
# 检查是否还能向下滚动
before_dn_md5 = get_image_content_md5(p_shot)

View File

@@ -48,6 +48,13 @@ def get_file_md5(file_path):
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def get_name_md5(name):
if not name:
return "unknown"
if not isinstance(name, str):
name = str(name)
return hashlib.md5(name.encode("utf-8")).hexdigest()
def get_image_content_md5(file_path, top_ratio=0.1, bottom_ratio=0.1):
"""

View File

@@ -1,7 +1,7 @@
# 采集配置
SCROLL_DISTANCE_RATIO = 0.5
MAX_STATIONS_COUNT = 20
MAX_STATIONS_COUNT = 100
FIRST_RUN_ONLY_ONE_STATION = False
REDIS_STATION_EXPIRE = 120
DATA_RETENTION_DAYS = 365

View File

@@ -5,7 +5,7 @@ import sys
import json
import time
from datetime import datetime
from Apps.YeLiTe.Kit import take_screenshot, clean_station_name, get_image_content_md5, detect_price_info_container_cv, setup_logger
from Apps.YeLiTe.Kit import take_screenshot, clean_station_name, get_image_content_md5, detect_price_info_container_cv, setup_logger, get_name_md5
from Apps.YeLiTe.ReadImageKit import ReadImageKit
from Apps.YeLiTe.FirstPageKit import run_ocr_rect
from Apps.YeLiTe.Service import YeLiTeService
@@ -127,7 +127,8 @@ class YeLiTeCrawler(BaseCrawler):
remaining = max_to_crawl - current_idx
logger.info(f"--- [进度: {current_idx}/{max_to_crawl}, 剩余: {remaining}] 发现新场站: {name} (坐标: {point}, 距离: {distance}) ---")
before_click_path = take_screenshot(d, f"before_{clean_station_name(name)}")
file_tag = get_name_md5(name)
before_click_path = take_screenshot(d, f"before_{file_tag}")
before_md5 = get_image_content_md5(before_click_path, top_ratio=SAFE_EXCLUDE_RATIO, bottom_ratio=BOTTOM_SAFE_EXCLUDE_RATIO)
await asyncio.sleep(0.5)
@@ -135,7 +136,7 @@ class YeLiTeCrawler(BaseCrawler):
await asyncio.sleep(WAIT_DETAIL_PAGE_LOAD)
# 分析详情页 (采用异步后台模式)
detail_shot = take_screenshot(d, f"detail_{clean_station_name(name)}_{int(time.time())}")
detail_shot = take_screenshot(d, f"detail_{file_tag}_{int(time.time())}")
after_md5 = get_image_content_md5(detail_shot, top_ratio=SAFE_EXCLUDE_RATIO, bottom_ratio=BOTTOM_SAFE_EXCLUDE_RATIO)
# 清理临时对比图
@@ -150,48 +151,51 @@ class YeLiTeCrawler(BaseCrawler):
# 再次截图检查
if os.path.exists(detail_shot): os.remove(detail_shot)
detail_shot = take_screenshot(d, f"detail_{clean_station_name(name)}_{int(time.time())}")
detail_shot = take_screenshot(d, f"detail_{file_tag}_{int(time.time())}")
after_md5 = get_image_content_md5(detail_shot, top_ratio=SAFE_EXCLUDE_RATIO, bottom_ratio=BOTTOM_SAFE_EXCLUDE_RATIO)
if before_md5 != after_md5:
logger.info(f"成功进入详情页: {name}")
total_piles = None
free_piles = None
piles_detail = None
address_detail = None
parking_info = None
if isinstance(piles, list):
total_sum = 0
free_sum = 0
for p in piles:
if not isinstance(p, dict):
continue
t = p.get("total")
f = p.get("idle")
try:
if t is not None:
total_sum += int(t)
except Exception:
pass
try:
if f is not None:
free_sum += int(f)
except Exception:
pass
if total_sum > 0:
total_piles = total_sum
free_piles = free_sum
piles_detail = piles
try:
total_piles = None
free_piles = None
if isinstance(piles, list):
total_sum = 0
free_sum = 0
for p in piles:
if not isinstance(p, dict):
continue
t = p.get("total")
f = p.get("idle")
try:
if t is not None:
total_sum += int(t)
except Exception:
pass
try:
if f is not None:
free_sum += int(f)
except Exception:
pass
if total_sum > 0:
total_piles = total_sum
free_piles = free_sum
await self.service.save_station_profile_and_status(
station_name=name,
address=None,
total_piles=total_piles,
free_piles=free_piles,
piles_detail=None,
parking_info=None,
distance=distance
)
basic_info = await self.read_image_kit.analyze_detail_basic_info(detail_shot)
if isinstance(basic_info, dict):
addr = basic_info.get("address")
if addr:
address_detail = addr
park = basic_info.get("parking_info")
if park:
parking_info = park
except Exception as e:
logger.warning(f"兜底写入场站基础信息失败: {name}, {e}")
logger.warning(f"详情页基础信息识别失败: {e}")
# --- 新增:点击“阶段性电价”按钮以获取完整电价列表 ---
# 使用 OCR 探测价格入口
dqdf_pos = detect_price_info_container_cv(detail_shot)
@@ -230,7 +234,7 @@ class YeLiTeCrawler(BaseCrawler):
logger.info("正在从顶部开始向下逐页截图...")
max_scroll_down_pages = 10
for p_idx in range(1, max_scroll_down_pages + 1):
p_shot = take_screenshot(d, f"detail_price_{clean_station_name(name)}_{int(time.time())}_{p_idx}")
p_shot = take_screenshot(d, f"detail_price_{file_tag}_{int(time.time())}_{p_idx}")
before_dn_md5 = get_image_content_md5(p_shot)
d.swipe(scroll_x, scroll_bottom_y, scroll_x, scroll_top_y, 0.2)
@@ -256,7 +260,18 @@ class YeLiTeCrawler(BaseCrawler):
# --------------------------------------------------
# 启动后台任务处理详情页
task = asyncio.create_task(self.analyze_detail_background(name, detail_shots, distance=distance))
task = asyncio.create_task(
self.analyze_detail_background(
name,
detail_shots,
address=address_detail,
distance=distance,
total_piles=total_piles,
free_piles=free_piles,
piles_detail=piles_detail,
parking_info=parking_info,
)
)
background_tasks.append(task)
processed_count += 1
@@ -305,7 +320,17 @@ class YeLiTeCrawler(BaseCrawler):
return processed_count
async def analyze_detail_background(self, station_name, image_paths, address=None, distance=None):
async def analyze_detail_background(
self,
station_name,
image_paths,
address=None,
distance=None,
total_piles=None,
free_piles=None,
piles_detail=None,
parking_info=None,
):
"""
后台异步分析详情页 (支持多张截图合并)
"""
@@ -341,7 +366,16 @@ class YeLiTeCrawler(BaseCrawler):
unique_prices.sort(key=lambda x: x.get('start', '00:00'))
if unique_prices:
await self.service.process_price_detail_data(station_name, unique_prices, address=address, distance=distance)
await self.service.process_price_detail_data(
station_name,
unique_prices,
address=address,
distance=distance,
total_piles=total_piles,
free_piles=free_piles,
piles_detail=piles_detail,
parking_info=parking_info,
)
logger.info(f"场站 {station_name} 价格分析完成并入库 (记录数: {len(unique_prices)}, 地址: {address}, 距离: {distance})")
else:
logger.warning(f"场站 {station_name} 未识别到价格信息")

View File

@@ -51,6 +51,13 @@ def get_file_md5(file_path):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def get_name_md5(name):
if not name:
return "unknown"
if not isinstance(name, str):
name = str(name)
return hashlib.md5(name.encode("utf-8")).hexdigest()
def get_image_content_md5(file_path, top_ratio=0.1, bottom_ratio=0.1):
"""
计算图片核心内容的 MD5 值(排除状态栏和导航栏)

View File

@@ -98,6 +98,28 @@ class ReadImageKit:
logger.error(f"Failed VLM Response: {res_text}")
return []
async def analyze_detail_basic_info(self, image_path):
prompt = """
分析这张充电站详情页首屏截图,提取以下信息并返回 JSON
{
"name": "场站名称",
"address": "完整地址",
"parking_info": "停车收费信息"
}
name 为页面标题中的场站名称address 为定位图标附近的完整地址parking_info 为页面中与停车收费相关的文字。如果某项无法识别,请将该字段设为 null。
只返回纯 JSON 对象,不要包含额外说明文字。
"""
try:
res_text = await self.vlm.analyze_image(image_path, prompt)
json_str = self.vlm.extract_json(res_text)
data = json.loads(json_str)
if isinstance(data, dict):
return data
return {}
except Exception as e:
logger.error(f"分析详情页基础信息失败: {e}")
return {}
@classmethod
async def detect_ad_popup(cls, image_path, device_info=None):
"""

View File

@@ -16,6 +16,7 @@ from Model.StationProfile import StationProfile
from Model.StationStatus import StationStatus
from Model.StationPriceSchedule import StationPriceSchedule
from Apps.YeLiTe.Config.Setting import PRICE_FLATTEN_TO_24H
import re
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
@@ -89,7 +90,17 @@ class YeLiTeService:
except Exception as e:
logger.error(f"更新任务结束日志失败: {e}")
async def process_price_detail_data(self, station_name, hourly_schedule, address=None, distance=None) -> bool:
async def process_price_detail_data(
self,
station_name,
hourly_schedule,
address=None,
distance=None,
total_piles=None,
free_piles=None,
piles_detail=None,
parking_info=None,
) -> bool:
if not station_name or not hourly_schedule:
return False
@@ -98,6 +109,54 @@ class YeLiTeService:
schedule_to_save = hourly_schedule
standardized_piles = []
total = total_piles or 0
free = free_piles or 0
if isinstance(piles_detail, list):
for idx, p in enumerate(piles_detail):
try:
t = int(p.get("total", 0))
f = int(p.get("idle", p.get("free", 0)))
total += t
free += f
standardized_piles.append(
{
"pile_no": f"G{idx+1}",
"type": p.get("type", "未知"),
"power": "",
"status_text": f"空闲{f}/总{t}",
"remark": "列表页忙闲",
}
)
except Exception:
continue
elif isinstance(piles_detail, str):
piles_str = piles_detail
nums = re.findall(r"\d+", piles_str)
if len(nums) >= 2:
try:
f_val = int(nums[-2])
t_val = int(nums[-1])
free += f_val
total += t_val
standardized_piles.append(
{
"pile_no": "G1",
"type": "未知",
"power": "",
"status_text": piles_str,
"remark": "列表页忙闲(字符串)",
}
)
except Exception:
pass
if standardized_piles:
total_piles = total
free_piles = free
piles_detail = standardized_piles
use_flatten = (PRICE_FLATTEN_TO_24H_GLOBAL or PRICE_FLATTEN_TO_24H) and isinstance(hourly_schedule, list)
if use_flatten:
@@ -181,11 +240,12 @@ class YeLiTeService:
session=session,
id=status_id,
station_hash=station_hash,
total_piles=None,
free_piles=None,
piles_detail_json=None,
current_price=current_price_info.get("price"),
total_piles=total_piles,
free_piles=free_piles,
piles_detail_json=piles_detail,
parking_info=parking_info,
distance=distance,
current_price=current_price_info.get("price"),
valid_start_time=now,
)

View File

@@ -53,7 +53,7 @@ REDIS_MAX_CONNECTIONS = 200
# 临时图片存储路径
TEMP_IMAGE_DIR = r"d:\dsWork\aiData\Output"
# 分时价格全局配置
# False: 默认按各个供应商自己的 PRICE_FLATTEN_TO_24H 决定
# True: 强制所有供应商都铺平成 24 小时整点数组
PRICE_FLATTEN_TO_24H_GLOBAL = False
# 分时价格全局配置
# False: 默认按各个供应商自己的 PRICE_FLATTEN_TO_24H 决定
# True: 强制所有供应商都铺平成 24 小时整点数组
PRICE_FLATTEN_TO_24H_GLOBAL = True

View File

@@ -162,11 +162,7 @@ async def main():
mode_text = "全量清理" if mode_choice == MODE_ALL else "仅清理历史"
print(f"\n🚀 即将对 {len(selected_vendors)} 个供应商执行 [{mode_text}] 操作...")
confirm = input("确认执行吗?(y/n): ").strip().lower()
if confirm != 'y':
print("已取消操作。")
return
# 初始化资源
db = Db(db_url=DB_URL)
await db.init_db()

View File

@@ -1,5 +0,0 @@
import uiautomator2 as u2
# pip install findit
d = u2.connect()
d.image.click("../Template/X1.jpg")

Binary file not shown.