'commit'
This commit is contained in:
@@ -208,6 +208,15 @@ class TeLaiDianCrawler(BaseCrawler):
|
|||||||
remaining = max_to_crawl - current_idx
|
remaining = max_to_crawl - current_idx
|
||||||
logger.info(f"--- [进度: {current_idx}/{max_to_crawl}, 剩余: {remaining}] 处理场站: {name} (坐标: {point}, 距离: {distance}) ---")
|
logger.info(f"--- [进度: {current_idx}/{max_to_crawl}, 剩余: {remaining}] 处理场站: {name} (坐标: {point}, 距离: {distance}) ---")
|
||||||
|
|
||||||
|
# 组装基础场站信息,便于详情页逻辑使用
|
||||||
|
station_info = {
|
||||||
|
"name": name,
|
||||||
|
"distance": distance,
|
||||||
|
"rect": rect,
|
||||||
|
"click_point": point,
|
||||||
|
"busy_list": piles,
|
||||||
|
}
|
||||||
|
|
||||||
# 点击进入详情
|
# 点击进入详情
|
||||||
d.click(point[0], point[1])
|
d.click(point[0], point[1])
|
||||||
logger.info(f"已点击场站 '{name}',等待 {WAIT_DETAIL_PAGE_LOAD}s 加载详情页...")
|
logger.info(f"已点击场站 '{name}',等待 {WAIT_DETAIL_PAGE_LOAD}s 加载详情页...")
|
||||||
@@ -230,7 +239,7 @@ class TeLaiDianCrawler(BaseCrawler):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# 爬取详情
|
# 爬取详情
|
||||||
await self.crawl_detail_logic(d, station)
|
await self.crawl_detail_logic(d, station_info)
|
||||||
if os.path.exists(detail_check_path): os.remove(detail_check_path)
|
if os.path.exists(detail_check_path): os.remove(detail_check_path)
|
||||||
|
|
||||||
# 标记为已处理
|
# 标记为已处理
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ from Util.LlmUtil import get_llm_response
|
|||||||
|
|
||||||
|
|
||||||
logger = setup_logger("TeLaiDian.FirstPageKit")
|
logger = setup_logger("TeLaiDian.FirstPageKit")
|
||||||
|
TEXT_TOP_RATIO = SAFE_EXCLUDE_RATIO * 0.8
|
||||||
|
|
||||||
NON_STATION_KEYWORDS = [
|
NON_STATION_KEYWORDS = [
|
||||||
"地图",
|
"地图",
|
||||||
@@ -116,21 +117,31 @@ async def run_ocr_rect(image_path, log_path=None):
|
|||||||
|
|
||||||
status = "keep"
|
status = "keep"
|
||||||
reasons = []
|
reasons = []
|
||||||
|
txt = text or ""
|
||||||
if prob < 0.3:
|
if prob < 0.3:
|
||||||
status = "drop"
|
status = "drop"
|
||||||
reasons.append("prob<0.3")
|
reasons.append("prob<0.3")
|
||||||
if not text:
|
if not txt:
|
||||||
status = "drop"
|
status = "drop"
|
||||||
reasons.append("empty_text")
|
reasons.append("empty_text")
|
||||||
if cy_norm < SAFE_EXCLUDE_RATIO:
|
has_station_kw = False
|
||||||
|
has_distance_kw = False
|
||||||
|
if txt:
|
||||||
|
if ("充电站" in txt) or ("超快充" in txt) or ("快充" in txt) or ("慢充" in txt):
|
||||||
|
has_station_kw = True
|
||||||
|
if ("km" in txt) or ("m" in txt):
|
||||||
|
has_distance_kw = True
|
||||||
|
if cy_norm < TEXT_TOP_RATIO and not (
|
||||||
|
(has_station_kw and len(txt) >= 4) or has_distance_kw
|
||||||
|
):
|
||||||
status = "drop"
|
status = "drop"
|
||||||
reasons.append("top_safe_zone")
|
reasons.append("top_safe_zone")
|
||||||
if cy_norm > (1 - BOTTOM_SAFE_EXCLUDE_RATIO):
|
if cy_norm > (1 - BOTTOM_SAFE_EXCLUDE_RATIO):
|
||||||
status = "drop"
|
status = "drop"
|
||||||
reasons.append("bottom_safe_zone")
|
reasons.append("bottom_safe_zone")
|
||||||
if status == "keep" and text:
|
if status == "keep" and txt:
|
||||||
for kw in NON_STATION_KEYWORDS:
|
for kw in NON_STATION_KEYWORDS:
|
||||||
if kw and kw in text:
|
if kw and kw in txt:
|
||||||
status = "drop"
|
status = "drop"
|
||||||
reasons.append("non_station_keyword")
|
reasons.append("non_station_keyword")
|
||||||
break
|
break
|
||||||
@@ -325,10 +336,13 @@ async def run_ocr_rect(image_path, log_path=None):
|
|||||||
prev_y2 = None
|
prev_y2 = None
|
||||||
anchor_ratio = 0.15
|
anchor_ratio = 0.15
|
||||||
for idx, st in enumerate(stations, start=1):
|
for idx, st in enumerate(stations, start=1):
|
||||||
|
name = st.get("station_name")
|
||||||
py = st["anchor_py"]
|
py = st["anchor_py"]
|
||||||
if py < effective_top:
|
anchor_norm = st.get("anchor_point_norm") or {}
|
||||||
log_detail(f"锚点位于顶部保护区,丢弃: {st.get('station_name')} py={py}")
|
try:
|
||||||
continue
|
ay = float(anchor_norm.get("y", py / h))
|
||||||
|
except Exception:
|
||||||
|
ay = py / h
|
||||||
y1 = int(py - box_h * anchor_ratio)
|
y1 = int(py - box_h * anchor_ratio)
|
||||||
y2 = y1 + box_h
|
y2 = y1 + box_h
|
||||||
if min_gap is not None:
|
if min_gap is not None:
|
||||||
@@ -343,9 +357,6 @@ async def run_ocr_rect(image_path, log_path=None):
|
|||||||
if y2 > h:
|
if y2 > h:
|
||||||
y2 = h
|
y2 = h
|
||||||
y1 = y2 - box_h
|
y1 = y2 - box_h
|
||||||
if y1 < effective_top:
|
|
||||||
y1 = effective_top
|
|
||||||
y2 = y1 + box_h
|
|
||||||
if y2 > effective_bottom:
|
if y2 > effective_bottom:
|
||||||
y2 = effective_bottom
|
y2 = effective_bottom
|
||||||
y1 = y2 - box_h
|
y1 = y2 - box_h
|
||||||
@@ -355,15 +366,13 @@ async def run_ocr_rect(image_path, log_path=None):
|
|||||||
y2 += shift
|
y2 += shift
|
||||||
if y2 > effective_bottom:
|
if y2 > effective_bottom:
|
||||||
if idx == len(stations):
|
if idx == len(stations):
|
||||||
min_h = int(box_h * 0.5)
|
|
||||||
new_y1 = prev_y2 + 1
|
new_y1 = prev_y2 + 1
|
||||||
new_y2 = effective_bottom
|
new_y2 = effective_bottom
|
||||||
if new_y2 - new_y1 >= min_h:
|
if new_y2 <= new_y1:
|
||||||
y1 = new_y1
|
|
||||||
y2 = new_y2
|
|
||||||
else:
|
|
||||||
log_detail(f"底部空间不足,丢弃: {st.get('station_name')}")
|
log_detail(f"底部空间不足,丢弃: {st.get('station_name')}")
|
||||||
continue
|
continue
|
||||||
|
y1 = new_y1
|
||||||
|
y2 = new_y2
|
||||||
else:
|
else:
|
||||||
log_detail(f"避免重叠无法放置,丢弃: {st.get('station_name')}")
|
log_detail(f"避免重叠无法放置,丢弃: {st.get('station_name')}")
|
||||||
continue
|
continue
|
||||||
@@ -391,7 +400,7 @@ async def run_ocr_rect(image_path, log_path=None):
|
|||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
draw_rectangles(image_path, rects, click_points)
|
draw_rectangles(image_path, rects, click_points, save_vl=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log_detail(f"绘制调试矩形失败: {e}")
|
log_detail(f"绘制调试矩形失败: {e}")
|
||||||
|
|
||||||
@@ -416,8 +425,12 @@ async def run_ocr_rect(image_path, log_path=None):
|
|||||||
async def run_batch_in_dir(image_dir, log_file=None):
|
async def run_batch_in_dir(image_dir, log_file=None):
|
||||||
img_files = []
|
img_files = []
|
||||||
for name in os.listdir(image_dir):
|
for name in os.listdir(image_dir):
|
||||||
if name.lower().endswith(".jpg") or name.lower().endswith(".png"):
|
lower = name.lower()
|
||||||
img_files.append(os.path.join(image_dir, name))
|
if not (lower.endswith(".jpg") or lower.endswith(".png")):
|
||||||
|
continue
|
||||||
|
if "_flag" in lower or "_vl" in lower:
|
||||||
|
continue
|
||||||
|
img_files.append(os.path.join(image_dir, name))
|
||||||
img_files.sort()
|
img_files.sort()
|
||||||
|
|
||||||
if not img_files:
|
if not img_files:
|
||||||
|
|||||||
@@ -93,7 +93,7 @@ def save_image(path, img):
|
|||||||
logger.error(f"Error saving image {path}: {e}")
|
logger.error(f"Error saving image {path}: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def draw_rectangles(image_path, bboxes=None, click_points=None):
|
def draw_rectangles(image_path, bboxes=None, click_points=None, save_vl=True):
|
||||||
"""
|
"""
|
||||||
使用 OpenCV 在图片上绘制矩形框和点击点,生成 _vl.jpg 和 _flag.jpg
|
使用 OpenCV 在图片上绘制矩形框和点击点,生成 _vl.jpg 和 _flag.jpg
|
||||||
- _vl.jpg: 仅包含矩形框,供视觉模型参考
|
- _vl.jpg: 仅包含矩形框,供视觉模型参考
|
||||||
@@ -108,18 +108,18 @@ def draw_rectangles(image_path, bboxes=None, click_points=None):
|
|||||||
if img is None:
|
if img is None:
|
||||||
return image_path
|
return image_path
|
||||||
|
|
||||||
# 1. 先绘制矩形框 (VL 版)
|
base_img = img.copy()
|
||||||
vl_img = img.copy()
|
if bboxes:
|
||||||
if bboxes:
|
for box in bboxes:
|
||||||
for box in bboxes:
|
if len(box) == 4:
|
||||||
if len(box) == 4:
|
cv2.rectangle(base_img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), DEBUG_BOX_COLOR, DEBUG_BOX_THICKNESS)
|
||||||
cv2.rectangle(vl_img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), DEBUG_BOX_COLOR, DEBUG_BOX_THICKNESS)
|
|
||||||
|
vl_path = None
|
||||||
vl_path = image_path.replace(".jpg", "_vl.jpg")
|
if save_vl:
|
||||||
save_image(vl_path, vl_img)
|
vl_path = image_path.replace(".jpg", "_vl.jpg")
|
||||||
|
save_image(vl_path, base_img)
|
||||||
# 2. 在 VL 版基础上绘制点击点 (Flag 版)
|
|
||||||
flag_img = vl_img.copy()
|
flag_img = base_img.copy()
|
||||||
if click_points:
|
if click_points:
|
||||||
for p in click_points:
|
for p in click_points:
|
||||||
if len(p) == 2:
|
if len(p) == 2:
|
||||||
@@ -131,7 +131,7 @@ def draw_rectangles(image_path, bboxes=None, click_points=None):
|
|||||||
flag_path = image_path.replace(".jpg", "_flag.jpg")
|
flag_path = image_path.replace(".jpg", "_flag.jpg")
|
||||||
save_image(flag_path, flag_img)
|
save_image(flag_path, flag_img)
|
||||||
|
|
||||||
return vl_path, flag_path
|
return vl_path or flag_path, flag_path
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"绘制诊断图片失败: {e}")
|
logger.error(f"绘制诊断图片失败: {e}")
|
||||||
return image_path, image_path
|
return image_path, image_path
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -62,8 +62,12 @@ async def run_capture_sequence(d, base_dir: str, pages: int = 5):
|
|||||||
async def offline_validate(base_dir: str):
|
async def offline_validate(base_dir: str):
|
||||||
img_files = []
|
img_files = []
|
||||||
for name in os.listdir(base_dir):
|
for name in os.listdir(base_dir):
|
||||||
if name.lower().endswith(".jpg"):
|
lower = name.lower()
|
||||||
img_files.append(os.path.join(base_dir, name))
|
if not lower.endswith(".jpg"):
|
||||||
|
continue
|
||||||
|
if "_flag" in lower or "_vl" in lower:
|
||||||
|
continue
|
||||||
|
img_files.append(os.path.join(base_dir, name))
|
||||||
img_files.sort()
|
img_files.sort()
|
||||||
|
|
||||||
if not img_files:
|
if not img_files:
|
||||||
|
|||||||
Binary file not shown.
Reference in New Issue
Block a user