'commit'
This commit is contained in:
@@ -15,7 +15,7 @@ if project_root not in sys.path:
|
||||
|
||||
import uiautomator2 as u2
|
||||
from Apps.AiTeJiYiChong import Kit
|
||||
from Apps.AiTeJiYiChong.Kit import take_screenshot, setup_logger
|
||||
from Apps.AiTeJiYiChong.Kit import take_screenshot, setup_logger, get_name_md5
|
||||
from Apps.AiTeJiYiChong.ReadImageKit import ReadImageKit
|
||||
from Apps.AiTeJiYiChong.FirstPageKit import run_ocr_rect
|
||||
from Util.RedisKit import RedisKit
|
||||
@@ -177,7 +177,7 @@ async def get_station_list(d, service, max_stations_count=MAX_STATIONS_COUNT):
|
||||
|
||||
should_back_to_list = True
|
||||
|
||||
detail_uuid = f"detail_{station_name}_{image_uuid}"
|
||||
detail_uuid = f"detail_{get_name_md5(station_name)}_{image_uuid}"
|
||||
detail_path = take_screenshot(d, detail_uuid, save_dir=TEMP_IMAGE_DIR)
|
||||
|
||||
logger.info(f"已启动后台分析详情页: {station_name}")
|
||||
@@ -268,7 +268,7 @@ async def get_station_list(d, service, max_stations_count=MAX_STATIONS_COUNT):
|
||||
logger.info("正在从顶部开始向下逐页截图...")
|
||||
max_scroll_down_pages = 10
|
||||
for p_idx in range(1, max_scroll_down_pages + 1):
|
||||
p_shot = take_screenshot(d, f"price_scroll_{p_idx}_{station_name}", save_dir=TEMP_IMAGE_DIR)
|
||||
p_shot = take_screenshot(d, f"price_scroll_{p_idx}_{get_name_md5(station_name)}", save_dir=TEMP_IMAGE_DIR)
|
||||
|
||||
before_dn_md5 = Kit.get_image_content_md5(p_shot)
|
||||
d.swipe_ext("up", scale=0.8)
|
||||
|
||||
@@ -35,6 +35,13 @@ def get_file_md5(file_path):
|
||||
hash_md5.update(chunk)
|
||||
return hash_md5.hexdigest()
|
||||
|
||||
def get_name_md5(name):
|
||||
if not name:
|
||||
return "unknown"
|
||||
if not isinstance(name, str):
|
||||
name = str(name)
|
||||
return hashlib.md5(name.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def get_image_content_md5(file_path, top_ratio=0.1, bottom_ratio=0.1):
|
||||
"""
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -7,7 +7,7 @@ import json
|
||||
import cv2
|
||||
from Apps.TeLaiDian.Kit import (
|
||||
take_screenshot, get_image_content_md5, clean_station_name,
|
||||
setup_logger, read_image, save_image, detect_warm_popup_xczs_cv
|
||||
setup_logger, read_image, save_image, detect_warm_popup_xczs_cv, get_name_md5
|
||||
)
|
||||
from Apps.TeLaiDian.ReadImageKit import ReadImageKit
|
||||
from Apps.TeLaiDian.FirstPageKit import run_ocr_rect
|
||||
@@ -208,6 +208,8 @@ class TeLaiDianCrawler(BaseCrawler):
|
||||
remaining = max_to_crawl - current_idx
|
||||
logger.info(f"--- [进度: {current_idx}/{max_to_crawl}, 剩余: {remaining}] 处理场站: {name} (坐标: {point}, 距离: {distance}) ---")
|
||||
|
||||
file_tag = get_name_md5(name)
|
||||
|
||||
# 组装基础场站信息,便于详情页逻辑使用
|
||||
station_info = {
|
||||
"name": name,
|
||||
@@ -223,7 +225,7 @@ class TeLaiDianCrawler(BaseCrawler):
|
||||
await asyncio.sleep(WAIT_DETAIL_PAGE_LOAD)
|
||||
|
||||
# 截图验证是否进入详情页
|
||||
detail_check_path = take_screenshot(d, f"tld_detail_check_{int(time.time())}.jpg")
|
||||
detail_check_path = take_screenshot(d, f"tld_detail_check_{file_tag}_{int(time.time())}.jpg")
|
||||
logger.info(f"详情页快照已保存: {detail_check_path}")
|
||||
|
||||
# 简单验证:如果标题包含 "我的卡券"、"优惠券"、"新人福利" 等,说明点错了
|
||||
@@ -300,7 +302,7 @@ class TeLaiDianCrawler(BaseCrawler):
|
||||
"""
|
||||
在详情页提取价格和状态信息
|
||||
"""
|
||||
first_screen_path = take_screenshot(d, f"tld_detail_basic_{int(time.time())}.jpg")
|
||||
first_screen_path = take_screenshot(d, f"tld_detail_basic_{get_name_md5(station_name)}_{int(time.time())}.jpg")
|
||||
station_name = station_info.get("name")
|
||||
address = station_info.get("address")
|
||||
distance = station_info.get("distance")
|
||||
@@ -456,7 +458,7 @@ class TeLaiDianCrawler(BaseCrawler):
|
||||
max_scroll_down_pages = 8
|
||||
for p_idx in range(1, max_scroll_down_pages + 1):
|
||||
# 截图当前页
|
||||
p_shot = take_screenshot(d, f"tld_detail_price_{int(time.time())}_{p_idx}.jpg")
|
||||
p_shot = take_screenshot(d, f"tld_detail_price_{get_name_md5(station_name)}_{int(time.time())}_{p_idx}.jpg")
|
||||
|
||||
# 检查是否还能向下滚动
|
||||
before_dn_md5 = get_image_content_md5(p_shot)
|
||||
|
||||
@@ -48,6 +48,13 @@ def get_file_md5(file_path):
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
hash_md5.update(chunk)
|
||||
return hash_md5.hexdigest()
|
||||
|
||||
def get_name_md5(name):
|
||||
if not name:
|
||||
return "unknown"
|
||||
if not isinstance(name, str):
|
||||
name = str(name)
|
||||
return hashlib.md5(name.encode("utf-8")).hexdigest()
|
||||
|
||||
def get_image_content_md5(file_path, top_ratio=0.1, bottom_ratio=0.1):
|
||||
"""
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,7 +1,7 @@
|
||||
|
||||
# 采集配置
|
||||
SCROLL_DISTANCE_RATIO = 0.5
|
||||
MAX_STATIONS_COUNT = 20
|
||||
MAX_STATIONS_COUNT = 100
|
||||
FIRST_RUN_ONLY_ONE_STATION = False
|
||||
REDIS_STATION_EXPIRE = 120
|
||||
DATA_RETENTION_DAYS = 365
|
||||
|
||||
Binary file not shown.
@@ -5,7 +5,7 @@ import sys
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime
|
||||
from Apps.YeLiTe.Kit import take_screenshot, clean_station_name, get_image_content_md5, detect_price_info_container_cv, setup_logger
|
||||
from Apps.YeLiTe.Kit import take_screenshot, clean_station_name, get_image_content_md5, detect_price_info_container_cv, setup_logger, get_name_md5
|
||||
from Apps.YeLiTe.ReadImageKit import ReadImageKit
|
||||
from Apps.YeLiTe.FirstPageKit import run_ocr_rect
|
||||
from Apps.YeLiTe.Service import YeLiTeService
|
||||
@@ -127,7 +127,8 @@ class YeLiTeCrawler(BaseCrawler):
|
||||
remaining = max_to_crawl - current_idx
|
||||
logger.info(f"--- [进度: {current_idx}/{max_to_crawl}, 剩余: {remaining}] 发现新场站: {name} (坐标: {point}, 距离: {distance}) ---")
|
||||
|
||||
before_click_path = take_screenshot(d, f"before_{clean_station_name(name)}")
|
||||
file_tag = get_name_md5(name)
|
||||
before_click_path = take_screenshot(d, f"before_{file_tag}")
|
||||
before_md5 = get_image_content_md5(before_click_path, top_ratio=SAFE_EXCLUDE_RATIO, bottom_ratio=BOTTOM_SAFE_EXCLUDE_RATIO)
|
||||
|
||||
await asyncio.sleep(0.5)
|
||||
@@ -135,7 +136,7 @@ class YeLiTeCrawler(BaseCrawler):
|
||||
await asyncio.sleep(WAIT_DETAIL_PAGE_LOAD)
|
||||
|
||||
# 分析详情页 (采用异步后台模式)
|
||||
detail_shot = take_screenshot(d, f"detail_{clean_station_name(name)}_{int(time.time())}")
|
||||
detail_shot = take_screenshot(d, f"detail_{file_tag}_{int(time.time())}")
|
||||
after_md5 = get_image_content_md5(detail_shot, top_ratio=SAFE_EXCLUDE_RATIO, bottom_ratio=BOTTOM_SAFE_EXCLUDE_RATIO)
|
||||
|
||||
# 清理临时对比图
|
||||
@@ -150,48 +151,51 @@ class YeLiTeCrawler(BaseCrawler):
|
||||
|
||||
# 再次截图检查
|
||||
if os.path.exists(detail_shot): os.remove(detail_shot)
|
||||
detail_shot = take_screenshot(d, f"detail_{clean_station_name(name)}_{int(time.time())}")
|
||||
detail_shot = take_screenshot(d, f"detail_{file_tag}_{int(time.time())}")
|
||||
after_md5 = get_image_content_md5(detail_shot, top_ratio=SAFE_EXCLUDE_RATIO, bottom_ratio=BOTTOM_SAFE_EXCLUDE_RATIO)
|
||||
|
||||
if before_md5 != after_md5:
|
||||
logger.info(f"成功进入详情页: {name}")
|
||||
|
||||
total_piles = None
|
||||
free_piles = None
|
||||
piles_detail = None
|
||||
address_detail = None
|
||||
parking_info = None
|
||||
if isinstance(piles, list):
|
||||
total_sum = 0
|
||||
free_sum = 0
|
||||
for p in piles:
|
||||
if not isinstance(p, dict):
|
||||
continue
|
||||
t = p.get("total")
|
||||
f = p.get("idle")
|
||||
try:
|
||||
if t is not None:
|
||||
total_sum += int(t)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if f is not None:
|
||||
free_sum += int(f)
|
||||
except Exception:
|
||||
pass
|
||||
if total_sum > 0:
|
||||
total_piles = total_sum
|
||||
free_piles = free_sum
|
||||
piles_detail = piles
|
||||
|
||||
try:
|
||||
total_piles = None
|
||||
free_piles = None
|
||||
if isinstance(piles, list):
|
||||
total_sum = 0
|
||||
free_sum = 0
|
||||
for p in piles:
|
||||
if not isinstance(p, dict):
|
||||
continue
|
||||
t = p.get("total")
|
||||
f = p.get("idle")
|
||||
try:
|
||||
if t is not None:
|
||||
total_sum += int(t)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if f is not None:
|
||||
free_sum += int(f)
|
||||
except Exception:
|
||||
pass
|
||||
if total_sum > 0:
|
||||
total_piles = total_sum
|
||||
free_piles = free_sum
|
||||
await self.service.save_station_profile_and_status(
|
||||
station_name=name,
|
||||
address=None,
|
||||
total_piles=total_piles,
|
||||
free_piles=free_piles,
|
||||
piles_detail=None,
|
||||
parking_info=None,
|
||||
distance=distance
|
||||
)
|
||||
basic_info = await self.read_image_kit.analyze_detail_basic_info(detail_shot)
|
||||
if isinstance(basic_info, dict):
|
||||
addr = basic_info.get("address")
|
||||
if addr:
|
||||
address_detail = addr
|
||||
park = basic_info.get("parking_info")
|
||||
if park:
|
||||
parking_info = park
|
||||
except Exception as e:
|
||||
logger.warning(f"兜底写入场站基础信息失败: {name}, {e}")
|
||||
|
||||
logger.warning(f"详情页基础信息识别失败: {e}")
|
||||
# --- 新增:点击“阶段性电价”按钮以获取完整电价列表 ---
|
||||
# 使用 OCR 探测价格入口
|
||||
dqdf_pos = detect_price_info_container_cv(detail_shot)
|
||||
@@ -230,7 +234,7 @@ class YeLiTeCrawler(BaseCrawler):
|
||||
logger.info("正在从顶部开始向下逐页截图...")
|
||||
max_scroll_down_pages = 10
|
||||
for p_idx in range(1, max_scroll_down_pages + 1):
|
||||
p_shot = take_screenshot(d, f"detail_price_{clean_station_name(name)}_{int(time.time())}_{p_idx}")
|
||||
p_shot = take_screenshot(d, f"detail_price_{file_tag}_{int(time.time())}_{p_idx}")
|
||||
|
||||
before_dn_md5 = get_image_content_md5(p_shot)
|
||||
d.swipe(scroll_x, scroll_bottom_y, scroll_x, scroll_top_y, 0.2)
|
||||
@@ -256,7 +260,18 @@ class YeLiTeCrawler(BaseCrawler):
|
||||
# --------------------------------------------------
|
||||
|
||||
# 启动后台任务处理详情页
|
||||
task = asyncio.create_task(self.analyze_detail_background(name, detail_shots, distance=distance))
|
||||
task = asyncio.create_task(
|
||||
self.analyze_detail_background(
|
||||
name,
|
||||
detail_shots,
|
||||
address=address_detail,
|
||||
distance=distance,
|
||||
total_piles=total_piles,
|
||||
free_piles=free_piles,
|
||||
piles_detail=piles_detail,
|
||||
parking_info=parking_info,
|
||||
)
|
||||
)
|
||||
background_tasks.append(task)
|
||||
|
||||
processed_count += 1
|
||||
@@ -305,7 +320,17 @@ class YeLiTeCrawler(BaseCrawler):
|
||||
|
||||
return processed_count
|
||||
|
||||
async def analyze_detail_background(self, station_name, image_paths, address=None, distance=None):
|
||||
async def analyze_detail_background(
|
||||
self,
|
||||
station_name,
|
||||
image_paths,
|
||||
address=None,
|
||||
distance=None,
|
||||
total_piles=None,
|
||||
free_piles=None,
|
||||
piles_detail=None,
|
||||
parking_info=None,
|
||||
):
|
||||
"""
|
||||
后台异步分析详情页 (支持多张截图合并)
|
||||
"""
|
||||
@@ -341,7 +366,16 @@ class YeLiTeCrawler(BaseCrawler):
|
||||
unique_prices.sort(key=lambda x: x.get('start', '00:00'))
|
||||
|
||||
if unique_prices:
|
||||
await self.service.process_price_detail_data(station_name, unique_prices, address=address, distance=distance)
|
||||
await self.service.process_price_detail_data(
|
||||
station_name,
|
||||
unique_prices,
|
||||
address=address,
|
||||
distance=distance,
|
||||
total_piles=total_piles,
|
||||
free_piles=free_piles,
|
||||
piles_detail=piles_detail,
|
||||
parking_info=parking_info,
|
||||
)
|
||||
logger.info(f"场站 {station_name} 价格分析完成并入库 (记录数: {len(unique_prices)}, 地址: {address}, 距离: {distance})")
|
||||
else:
|
||||
logger.warning(f"场站 {station_name} 未识别到价格信息")
|
||||
|
||||
@@ -51,6 +51,13 @@ def get_file_md5(file_path):
|
||||
hash_md5.update(chunk)
|
||||
return hash_md5.hexdigest()
|
||||
|
||||
def get_name_md5(name):
|
||||
if not name:
|
||||
return "unknown"
|
||||
if not isinstance(name, str):
|
||||
name = str(name)
|
||||
return hashlib.md5(name.encode("utf-8")).hexdigest()
|
||||
|
||||
def get_image_content_md5(file_path, top_ratio=0.1, bottom_ratio=0.1):
|
||||
"""
|
||||
计算图片核心内容的 MD5 值(排除状态栏和导航栏)
|
||||
|
||||
@@ -98,6 +98,28 @@ class ReadImageKit:
|
||||
logger.error(f"Failed VLM Response: {res_text}")
|
||||
return []
|
||||
|
||||
async def analyze_detail_basic_info(self, image_path):
|
||||
prompt = """
|
||||
分析这张充电站详情页首屏截图,提取以下信息并返回 JSON:
|
||||
{
|
||||
"name": "场站名称",
|
||||
"address": "完整地址",
|
||||
"parking_info": "停车收费信息"
|
||||
}
|
||||
name 为页面标题中的场站名称,address 为定位图标附近的完整地址,parking_info 为页面中与停车收费相关的文字。如果某项无法识别,请将该字段设为 null。
|
||||
只返回纯 JSON 对象,不要包含额外说明文字。
|
||||
"""
|
||||
try:
|
||||
res_text = await self.vlm.analyze_image(image_path, prompt)
|
||||
json_str = self.vlm.extract_json(res_text)
|
||||
data = json.loads(json_str)
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
return {}
|
||||
except Exception as e:
|
||||
logger.error(f"分析详情页基础信息失败: {e}")
|
||||
return {}
|
||||
|
||||
@classmethod
|
||||
async def detect_ad_popup(cls, image_path, device_info=None):
|
||||
"""
|
||||
|
||||
@@ -16,6 +16,7 @@ from Model.StationProfile import StationProfile
|
||||
from Model.StationStatus import StationStatus
|
||||
from Model.StationPriceSchedule import StationPriceSchedule
|
||||
from Apps.YeLiTe.Config.Setting import PRICE_FLATTEN_TO_24H
|
||||
import re
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -89,7 +90,17 @@ class YeLiTeService:
|
||||
except Exception as e:
|
||||
logger.error(f"更新任务结束日志失败: {e}")
|
||||
|
||||
async def process_price_detail_data(self, station_name, hourly_schedule, address=None, distance=None) -> bool:
|
||||
async def process_price_detail_data(
|
||||
self,
|
||||
station_name,
|
||||
hourly_schedule,
|
||||
address=None,
|
||||
distance=None,
|
||||
total_piles=None,
|
||||
free_piles=None,
|
||||
piles_detail=None,
|
||||
parking_info=None,
|
||||
) -> bool:
|
||||
if not station_name or not hourly_schedule:
|
||||
return False
|
||||
|
||||
@@ -98,6 +109,54 @@ class YeLiTeService:
|
||||
|
||||
schedule_to_save = hourly_schedule
|
||||
|
||||
standardized_piles = []
|
||||
total = total_piles or 0
|
||||
free = free_piles or 0
|
||||
|
||||
if isinstance(piles_detail, list):
|
||||
for idx, p in enumerate(piles_detail):
|
||||
try:
|
||||
t = int(p.get("total", 0))
|
||||
f = int(p.get("idle", p.get("free", 0)))
|
||||
total += t
|
||||
free += f
|
||||
standardized_piles.append(
|
||||
{
|
||||
"pile_no": f"G{idx+1}",
|
||||
"type": p.get("type", "未知"),
|
||||
"power": "",
|
||||
"status_text": f"空闲{f}/总{t}",
|
||||
"remark": "列表页忙闲",
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
elif isinstance(piles_detail, str):
|
||||
piles_str = piles_detail
|
||||
nums = re.findall(r"\d+", piles_str)
|
||||
if len(nums) >= 2:
|
||||
try:
|
||||
f_val = int(nums[-2])
|
||||
t_val = int(nums[-1])
|
||||
free += f_val
|
||||
total += t_val
|
||||
standardized_piles.append(
|
||||
{
|
||||
"pile_no": "G1",
|
||||
"type": "未知",
|
||||
"power": "",
|
||||
"status_text": piles_str,
|
||||
"remark": "列表页忙闲(字符串)",
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if standardized_piles:
|
||||
total_piles = total
|
||||
free_piles = free
|
||||
piles_detail = standardized_piles
|
||||
|
||||
use_flatten = (PRICE_FLATTEN_TO_24H_GLOBAL or PRICE_FLATTEN_TO_24H) and isinstance(hourly_schedule, list)
|
||||
|
||||
if use_flatten:
|
||||
@@ -181,11 +240,12 @@ class YeLiTeService:
|
||||
session=session,
|
||||
id=status_id,
|
||||
station_hash=station_hash,
|
||||
total_piles=None,
|
||||
free_piles=None,
|
||||
piles_detail_json=None,
|
||||
current_price=current_price_info.get("price"),
|
||||
total_piles=total_piles,
|
||||
free_piles=free_piles,
|
||||
piles_detail_json=piles_detail,
|
||||
parking_info=parking_info,
|
||||
distance=distance,
|
||||
current_price=current_price_info.get("price"),
|
||||
valid_start_time=now,
|
||||
)
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -53,7 +53,7 @@ REDIS_MAX_CONNECTIONS = 200
|
||||
# 临时图片存储路径
|
||||
TEMP_IMAGE_DIR = r"d:\dsWork\aiData\Output"
|
||||
|
||||
# 分时价格全局配置
|
||||
# False: 默认按各个供应商自己的 PRICE_FLATTEN_TO_24H 决定
|
||||
# True: 强制所有供应商都铺平成 24 小时整点数组
|
||||
PRICE_FLATTEN_TO_24H_GLOBAL = False
|
||||
# 分时价格全局配置
|
||||
# False: 默认按各个供应商自己的 PRICE_FLATTEN_TO_24H 决定
|
||||
# True: 强制所有供应商都铺平成 24 小时整点数组
|
||||
PRICE_FLATTEN_TO_24H_GLOBAL = True
|
||||
|
||||
Binary file not shown.
@@ -162,11 +162,7 @@ async def main():
|
||||
|
||||
mode_text = "全量清理" if mode_choice == MODE_ALL else "仅清理历史"
|
||||
print(f"\n🚀 即将对 {len(selected_vendors)} 个供应商执行 [{mode_text}] 操作...")
|
||||
confirm = input("确认执行吗?(y/n): ").strip().lower()
|
||||
if confirm != 'y':
|
||||
print("已取消操作。")
|
||||
return
|
||||
|
||||
|
||||
# 初始化资源
|
||||
db = Db(db_url=DB_URL)
|
||||
await db.init_db()
|
||||
@@ -1,5 +0,0 @@
|
||||
import uiautomator2 as u2
|
||||
# pip install findit
|
||||
d = u2.connect()
|
||||
|
||||
d.image.click("../Template/X1.jpg")
|
||||
BIN
Tools/__pycache__/T6_ClearHistory.cpython-310.pyc
Normal file
BIN
Tools/__pycache__/T6_ClearHistory.cpython-310.pyc
Normal file
Binary file not shown.
Reference in New Issue
Block a user