diff --git a/Apps/TeLaiDian/Crawler.py b/Apps/TeLaiDian/Crawler.py index a2685b1..028f37e 100644 --- a/Apps/TeLaiDian/Crawler.py +++ b/Apps/TeLaiDian/Crawler.py @@ -208,6 +208,15 @@ class TeLaiDianCrawler(BaseCrawler): remaining = max_to_crawl - current_idx logger.info(f"--- [进度: {current_idx}/{max_to_crawl}, 剩余: {remaining}] 处理场站: {name} (坐标: {point}, 距离: {distance}) ---") + # 组装基础场站信息,便于详情页逻辑使用 + station_info = { + "name": name, + "distance": distance, + "rect": rect, + "click_point": point, + "busy_list": piles, + } + # 点击进入详情 d.click(point[0], point[1]) logger.info(f"已点击场站 '{name}',等待 {WAIT_DETAIL_PAGE_LOAD}s 加载详情页...") @@ -230,7 +239,7 @@ class TeLaiDianCrawler(BaseCrawler): continue # 爬取详情 - await self.crawl_detail_logic(d, station) + await self.crawl_detail_logic(d, station_info) if os.path.exists(detail_check_path): os.remove(detail_check_path) # 标记为已处理 diff --git a/Apps/TeLaiDian/FirstPageKit.py b/Apps/TeLaiDian/FirstPageKit.py index 8e722e5..6f84794 100644 --- a/Apps/TeLaiDian/FirstPageKit.py +++ b/Apps/TeLaiDian/FirstPageKit.py @@ -16,6 +16,7 @@ from Util.LlmUtil import get_llm_response logger = setup_logger("TeLaiDian.FirstPageKit") +TEXT_TOP_RATIO = SAFE_EXCLUDE_RATIO * 0.8 NON_STATION_KEYWORDS = [ "地图", @@ -116,21 +117,31 @@ async def run_ocr_rect(image_path, log_path=None): status = "keep" reasons = [] + txt = text or "" if prob < 0.3: status = "drop" reasons.append("prob<0.3") - if not text: + if not txt: status = "drop" reasons.append("empty_text") - if cy_norm < SAFE_EXCLUDE_RATIO: + has_station_kw = False + has_distance_kw = False + if txt: + if ("充电站" in txt) or ("超快充" in txt) or ("快充" in txt) or ("慢充" in txt): + has_station_kw = True + if ("km" in txt) or ("m" in txt): + has_distance_kw = True + if cy_norm < TEXT_TOP_RATIO and not ( + (has_station_kw and len(txt) >= 4) or has_distance_kw + ): status = "drop" reasons.append("top_safe_zone") if cy_norm > (1 - BOTTOM_SAFE_EXCLUDE_RATIO): status = "drop" reasons.append("bottom_safe_zone") - if status == "keep" and text: + if status == "keep" and txt: for kw in NON_STATION_KEYWORDS: - if kw and kw in text: + if kw and kw in txt: status = "drop" reasons.append("non_station_keyword") break @@ -325,10 +336,13 @@ async def run_ocr_rect(image_path, log_path=None): prev_y2 = None anchor_ratio = 0.15 for idx, st in enumerate(stations, start=1): + name = st.get("station_name") py = st["anchor_py"] - if py < effective_top: - log_detail(f"锚点位于顶部保护区,丢弃: {st.get('station_name')} py={py}") - continue + anchor_norm = st.get("anchor_point_norm") or {} + try: + ay = float(anchor_norm.get("y", py / h)) + except Exception: + ay = py / h y1 = int(py - box_h * anchor_ratio) y2 = y1 + box_h if min_gap is not None: @@ -343,9 +357,6 @@ async def run_ocr_rect(image_path, log_path=None): if y2 > h: y2 = h y1 = y2 - box_h - if y1 < effective_top: - y1 = effective_top - y2 = y1 + box_h if y2 > effective_bottom: y2 = effective_bottom y1 = y2 - box_h @@ -355,15 +366,13 @@ async def run_ocr_rect(image_path, log_path=None): y2 += shift if y2 > effective_bottom: if idx == len(stations): - min_h = int(box_h * 0.5) new_y1 = prev_y2 + 1 new_y2 = effective_bottom - if new_y2 - new_y1 >= min_h: - y1 = new_y1 - y2 = new_y2 - else: + if new_y2 <= new_y1: log_detail(f"底部空间不足,丢弃: {st.get('station_name')}") continue + y1 = new_y1 + y2 = new_y2 else: log_detail(f"避免重叠无法放置,丢弃: {st.get('station_name')}") continue @@ -391,7 +400,7 @@ async def run_ocr_rect(image_path, log_path=None): ) try: - draw_rectangles(image_path, rects, click_points) + draw_rectangles(image_path, rects, click_points, save_vl=False) except Exception as e: log_detail(f"绘制调试矩形失败: {e}") @@ -416,8 +425,12 @@ async def run_ocr_rect(image_path, log_path=None): async def run_batch_in_dir(image_dir, log_file=None): img_files = [] for name in os.listdir(image_dir): - if name.lower().endswith(".jpg") or name.lower().endswith(".png"): - img_files.append(os.path.join(image_dir, name)) + lower = name.lower() + if not (lower.endswith(".jpg") or lower.endswith(".png")): + continue + if "_flag" in lower or "_vl" in lower: + continue + img_files.append(os.path.join(image_dir, name)) img_files.sort() if not img_files: diff --git a/Apps/TeLaiDian/Kit.py b/Apps/TeLaiDian/Kit.py index 1f9b767..6391334 100644 --- a/Apps/TeLaiDian/Kit.py +++ b/Apps/TeLaiDian/Kit.py @@ -93,7 +93,7 @@ def save_image(path, img): logger.error(f"Error saving image {path}: {e}") return False -def draw_rectangles(image_path, bboxes=None, click_points=None): +def draw_rectangles(image_path, bboxes=None, click_points=None, save_vl=True): """ 使用 OpenCV 在图片上绘制矩形框和点击点,生成 _vl.jpg 和 _flag.jpg - _vl.jpg: 仅包含矩形框,供视觉模型参考 @@ -108,18 +108,18 @@ def draw_rectangles(image_path, bboxes=None, click_points=None): if img is None: return image_path - # 1. 先绘制矩形框 (VL 版) - vl_img = img.copy() - if bboxes: - for box in bboxes: - if len(box) == 4: - cv2.rectangle(vl_img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), DEBUG_BOX_COLOR, DEBUG_BOX_THICKNESS) - - vl_path = image_path.replace(".jpg", "_vl.jpg") - save_image(vl_path, vl_img) - - # 2. 在 VL 版基础上绘制点击点 (Flag 版) - flag_img = vl_img.copy() + base_img = img.copy() + if bboxes: + for box in bboxes: + if len(box) == 4: + cv2.rectangle(base_img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), DEBUG_BOX_COLOR, DEBUG_BOX_THICKNESS) + + vl_path = None + if save_vl: + vl_path = image_path.replace(".jpg", "_vl.jpg") + save_image(vl_path, base_img) + + flag_img = base_img.copy() if click_points: for p in click_points: if len(p) == 2: @@ -131,7 +131,7 @@ def draw_rectangles(image_path, bboxes=None, click_points=None): flag_path = image_path.replace(".jpg", "_flag.jpg") save_image(flag_path, flag_img) - return vl_path, flag_path + return vl_path or flag_path, flag_path except Exception as e: logger.error(f"绘制诊断图片失败: {e}") return image_path, image_path diff --git a/Apps/TeLaiDian/__pycache__/FirstPageKit.cpython-310.pyc b/Apps/TeLaiDian/__pycache__/FirstPageKit.cpython-310.pyc index 3be1edd..f9d8495 100644 Binary files a/Apps/TeLaiDian/__pycache__/FirstPageKit.cpython-310.pyc and b/Apps/TeLaiDian/__pycache__/FirstPageKit.cpython-310.pyc differ diff --git a/Apps/TelaiDian/__pycache__/Crawler.cpython-310.pyc b/Apps/TelaiDian/__pycache__/Crawler.cpython-310.pyc index 7beeafb..e2fbda1 100644 Binary files a/Apps/TelaiDian/__pycache__/Crawler.cpython-310.pyc and b/Apps/TelaiDian/__pycache__/Crawler.cpython-310.pyc differ diff --git a/Apps/TelaiDian/__pycache__/Kit.cpython-310.pyc b/Apps/TelaiDian/__pycache__/Kit.cpython-310.pyc index f9ae26b..b58806f 100644 Binary files a/Apps/TelaiDian/__pycache__/Kit.cpython-310.pyc and b/Apps/TelaiDian/__pycache__/Kit.cpython-310.pyc differ diff --git a/Apps/TelaiDian/__pycache__/Opener.cpython-310.pyc b/Apps/TelaiDian/__pycache__/Opener.cpython-310.pyc index 35070dc..0793b4f 100644 Binary files a/Apps/TelaiDian/__pycache__/Opener.cpython-310.pyc and b/Apps/TelaiDian/__pycache__/Opener.cpython-310.pyc differ diff --git a/Test/TestFirstPage/T_TeLaiDian_FirstPageScrollTest.py b/Test/TestFirstPage/T_TeLaiDian_FirstPageScrollTest.py index 01060fc..93c1e2f 100644 --- a/Test/TestFirstPage/T_TeLaiDian_FirstPageScrollTest.py +++ b/Test/TestFirstPage/T_TeLaiDian_FirstPageScrollTest.py @@ -62,8 +62,12 @@ async def run_capture_sequence(d, base_dir: str, pages: int = 5): async def offline_validate(base_dir: str): img_files = [] for name in os.listdir(base_dir): - if name.lower().endswith(".jpg"): - img_files.append(os.path.join(base_dir, name)) + lower = name.lower() + if not lower.endswith(".jpg"): + continue + if "_flag" in lower or "_vl" in lower: + continue + img_files.append(os.path.join(base_dir, name)) img_files.sort() if not img_files: diff --git a/Test/TestFirstPage/__pycache__/T_TeLaiDian_FirstPageScrollTest.cpython-310.pyc b/Test/TestFirstPage/__pycache__/T_TeLaiDian_FirstPageScrollTest.cpython-310.pyc index 3dfc0e3..86e2c35 100644 Binary files a/Test/TestFirstPage/__pycache__/T_TeLaiDian_FirstPageScrollTest.cpython-310.pyc and b/Test/TestFirstPage/__pycache__/T_TeLaiDian_FirstPageScrollTest.cpython-310.pyc differ