'commit'
This commit is contained in:
@@ -241,14 +241,14 @@ async def get_station_list(d, service, max_stations_count=MAX_STATIONS_COUNT):
|
|||||||
entered_price_page = True
|
entered_price_page = True
|
||||||
price_screenshots = []
|
price_screenshots = []
|
||||||
|
|
||||||
logger.info("正在向上滚动价格列表到顶部...")
|
logger.info("正在向上滚动价格列表到顶部 (快速多次滚动以尽快看到 00:00)...")
|
||||||
max_scroll_up_to_top = 8
|
max_scroll_up_to_top = 10
|
||||||
for i in range(max_scroll_up_to_top):
|
for i in range(max_scroll_up_to_top):
|
||||||
before_scroll_path = take_screenshot(d, f"aite_price_up_{i}", save_dir=TEMP_IMAGE_DIR)
|
before_scroll_path = take_screenshot(d, f"aite_price_up_{i}", save_dir=TEMP_IMAGE_DIR)
|
||||||
before_scroll_md5 = Kit.get_image_content_md5(before_scroll_path)
|
before_scroll_md5 = Kit.get_image_content_md5(before_scroll_path)
|
||||||
|
|
||||||
d.swipe_ext("down", scale=0.85)
|
d.swipe_ext("down", scale=0.85)
|
||||||
await asyncio.sleep(1.5)
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
after_scroll_path = take_screenshot(d, f"aite_price_up_after_{i}", save_dir=TEMP_IMAGE_DIR)
|
after_scroll_path = take_screenshot(d, f"aite_price_up_after_{i}", save_dir=TEMP_IMAGE_DIR)
|
||||||
after_scroll_md5 = Kit.get_image_content_md5(after_scroll_path)
|
after_scroll_md5 = Kit.get_image_content_md5(after_scroll_path)
|
||||||
@@ -261,13 +261,13 @@ async def get_station_list(d, service, max_stations_count=MAX_STATIONS_COUNT):
|
|||||||
break
|
break
|
||||||
|
|
||||||
logger.info("正在从顶部开始向下逐页截图...")
|
logger.info("正在从顶部开始向下逐页截图...")
|
||||||
max_scroll_down_pages = 8
|
max_scroll_down_pages = 10
|
||||||
for p_idx in range(1, max_scroll_down_pages + 1):
|
for p_idx in range(1, max_scroll_down_pages + 1):
|
||||||
p_shot = take_screenshot(d, f"price_scroll_{p_idx}_{station_name}", save_dir=TEMP_IMAGE_DIR)
|
p_shot = take_screenshot(d, f"price_scroll_{p_idx}_{station_name}", save_dir=TEMP_IMAGE_DIR)
|
||||||
|
|
||||||
before_dn_md5 = Kit.get_image_content_md5(p_shot)
|
before_dn_md5 = Kit.get_image_content_md5(p_shot)
|
||||||
d.swipe_ext("up", scale=0.8)
|
d.swipe_ext("up", scale=0.8)
|
||||||
await asyncio.sleep(1.2)
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
check_dn_path = take_screenshot(d, f"check_dn_{p_idx}", save_dir=TEMP_IMAGE_DIR)
|
check_dn_path = take_screenshot(d, f"check_dn_{p_idx}", save_dir=TEMP_IMAGE_DIR)
|
||||||
after_dn_md5 = Kit.get_image_content_md5(check_dn_path)
|
after_dn_md5 = Kit.get_image_content_md5(check_dn_path)
|
||||||
|
|||||||
Binary file not shown.
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
# 采集配置
|
# 采集配置
|
||||||
SCROLL_DISTANCE_RATIO = 0.3
|
SCROLL_DISTANCE_RATIO = 0.3
|
||||||
MAX_STATIONS_COUNT = 20
|
MAX_STATIONS_COUNT = 1
|
||||||
FIRST_RUN_ONLY_ONE_STATION = False
|
FIRST_RUN_ONLY_ONE_STATION = False
|
||||||
REDIS_STATION_EXPIRE = 120
|
REDIS_STATION_EXPIRE = 120
|
||||||
|
|
||||||
|
|||||||
@@ -409,14 +409,14 @@ class TeLaiDianCrawler(BaseCrawler):
|
|||||||
await asyncio.sleep(1.0)
|
await asyncio.sleep(1.0)
|
||||||
|
|
||||||
# 1. 向上滚动到顶部(不断下拉直到看到最上面的 00:00)
|
# 1. 向上滚动到顶部(不断下拉直到看到最上面的 00:00)
|
||||||
logger.info("正在向上滚动价格列表到顶部...")
|
logger.info("正在向上滚动价格列表到顶部 (快速多次滚动以尽快看到 00:00)...")
|
||||||
max_scroll_up_to_top = 8
|
max_scroll_up_to_top = 10
|
||||||
for i in range(max_scroll_up_to_top):
|
for i in range(max_scroll_up_to_top):
|
||||||
before_scroll_path = take_screenshot(d, f"scroll_up_{i}.jpg")
|
before_scroll_path = take_screenshot(d, f"scroll_up_{i}.jpg")
|
||||||
before_scroll_md5 = get_image_content_md5(before_scroll_path)
|
before_scroll_md5 = get_image_content_md5(before_scroll_path)
|
||||||
|
|
||||||
d.swipe_ext("down", scale=0.85)
|
d.swipe_ext("down", scale=0.85)
|
||||||
await asyncio.sleep(1.5)
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
after_scroll_path = take_screenshot(d, f"scroll_up_after_{i}.jpg")
|
after_scroll_path = take_screenshot(d, f"scroll_up_after_{i}.jpg")
|
||||||
after_scroll_md5 = get_image_content_md5(after_scroll_path)
|
after_scroll_md5 = get_image_content_md5(after_scroll_path)
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
@@ -318,14 +318,14 @@ async def get_station_list(d, service, uploader, max_stations_count=MAX_STATIONS
|
|||||||
price_screenshots = []
|
price_screenshots = []
|
||||||
|
|
||||||
# 1. 向下滚动到底 (根据用户反馈:只有不断向下滚动,才能看到00点的)
|
# 1. 向下滚动到底 (根据用户反馈:只有不断向下滚动,才能看到00点的)
|
||||||
logger.info("正在向下滚动价格列表到底部...")
|
logger.info("正在向下滚动价格列表到底部 (快速多次滚动以尽快看到 00:00)...")
|
||||||
max_scroll_down = 6
|
max_scroll_down = 10
|
||||||
for i in range(max_scroll_down):
|
for i in range(max_scroll_down):
|
||||||
before_scroll_path = take_screenshot(d, f"scroll_dn_{i}", save_dir=TEMP_IMAGE_DIR)
|
before_scroll_path = take_screenshot(d, f"scroll_dn_{i}", save_dir=TEMP_IMAGE_DIR)
|
||||||
before_scroll_md5 = Kit.get_image_content_md5(before_scroll_path)
|
before_scroll_md5 = Kit.get_image_content_md5(before_scroll_path)
|
||||||
|
|
||||||
d.swipe_ext("up", scale=0.8)
|
d.swipe_ext("up", scale=0.8)
|
||||||
await asyncio.sleep(1.2)
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
after_scroll_path = take_screenshot(d, f"scroll_dn_after_{i}", save_dir=TEMP_IMAGE_DIR)
|
after_scroll_path = take_screenshot(d, f"scroll_dn_after_{i}", save_dir=TEMP_IMAGE_DIR)
|
||||||
after_scroll_md5 = Kit.get_image_content_md5(after_scroll_path)
|
after_scroll_md5 = Kit.get_image_content_md5(after_scroll_path)
|
||||||
@@ -340,7 +340,7 @@ async def get_station_list(d, service, uploader, max_stations_count=MAX_STATIONS
|
|||||||
|
|
||||||
# 2. 向上滚动并逐页截图 (从底向上抓取)
|
# 2. 向上滚动并逐页截图 (从底向上抓取)
|
||||||
logger.info("正在向上滚动价格列表并逐页截图...")
|
logger.info("正在向上滚动价格列表并逐页截图...")
|
||||||
max_scroll_up = 8
|
max_scroll_up = 10
|
||||||
for p_idx in range(1, max_scroll_up + 1):
|
for p_idx in range(1, max_scroll_up + 1):
|
||||||
# 截图当前页
|
# 截图当前页
|
||||||
p_uuid = f"{hashlib.md5(station_name.encode('utf-8')).hexdigest()}_p_{p_idx}"
|
p_uuid = f"{hashlib.md5(station_name.encode('utf-8')).hexdigest()}_p_{p_idx}"
|
||||||
@@ -349,8 +349,8 @@ async def get_station_list(d, service, uploader, max_stations_count=MAX_STATIONS
|
|||||||
|
|
||||||
# 检查是否还能向上滚动
|
# 检查是否还能向上滚动
|
||||||
before_up_md5 = Kit.get_image_content_md5(p_path)
|
before_up_md5 = Kit.get_image_content_md5(p_path)
|
||||||
d.swipe_ext("down", scale=0.85) # 向上滚动 = 手势向下
|
d.swipe_ext("down", scale=0.85)
|
||||||
await asyncio.sleep(1.5)
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
# 检查是否还有新内容
|
# 检查是否还有新内容
|
||||||
check_up_path = take_screenshot(d, f"check_up_{p_idx}", save_dir=TEMP_IMAGE_DIR)
|
check_up_path = take_screenshot(d, f"check_up_{p_idx}", save_dir=TEMP_IMAGE_DIR)
|
||||||
|
|||||||
Binary file not shown.
@@ -185,14 +185,14 @@ class YiLaiTeCrawler(BaseCrawler):
|
|||||||
if os.path.exists(detail_shot): os.remove(detail_shot)
|
if os.path.exists(detail_shot): os.remove(detail_shot)
|
||||||
|
|
||||||
# 1. 向下滚动到底 (根据用户反馈:只有不断向下滚动,才能看到00点的)
|
# 1. 向下滚动到底 (根据用户反馈:只有不断向下滚动,才能看到00点的)
|
||||||
logger.info("正在向下滚动价格列表到底部...")
|
logger.info("正在向下滚动价格列表到底部 (快速多次滚动以尽快看到 00:00)...")
|
||||||
max_scroll_down = 6
|
max_scroll_down = 10
|
||||||
for i in range(max_scroll_down):
|
for i in range(max_scroll_down):
|
||||||
before_scroll_path = take_screenshot(d, f"scroll_dn_{i}")
|
before_scroll_path = take_screenshot(d, f"scroll_dn_{i}")
|
||||||
before_scroll_md5 = get_image_content_md5(before_scroll_path)
|
before_scroll_md5 = get_image_content_md5(before_scroll_path)
|
||||||
|
|
||||||
d.swipe_ext("up", scale=0.8) # 向下滚动 = 手势向上
|
d.swipe_ext("up", scale=0.8)
|
||||||
await asyncio.sleep(1.2)
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
after_scroll_path = take_screenshot(d, f"scroll_dn_after_{i}")
|
after_scroll_path = take_screenshot(d, f"scroll_dn_after_{i}")
|
||||||
after_scroll_md5 = get_image_content_md5(after_scroll_path)
|
after_scroll_md5 = get_image_content_md5(after_scroll_path)
|
||||||
@@ -207,15 +207,15 @@ class YiLaiTeCrawler(BaseCrawler):
|
|||||||
|
|
||||||
# 2. 向上滚动并逐页截图 (从底向上抓取)
|
# 2. 向上滚动并逐页截图 (从底向上抓取)
|
||||||
logger.info("正在向上滚动价格列表并逐页截图...")
|
logger.info("正在向上滚动价格列表并逐页截图...")
|
||||||
max_scroll_up = 8
|
max_scroll_up = 10
|
||||||
for p_idx in range(1, max_scroll_up + 1):
|
for p_idx in range(1, max_scroll_up + 1):
|
||||||
# 截图当前页
|
# 截图当前页
|
||||||
p_shot = take_screenshot(d, f"detail_price_{clean_station_name(name)}_{int(time.time())}_{p_idx}")
|
p_shot = take_screenshot(d, f"detail_price_{clean_station_name(name)}_{int(time.time())}_{p_idx}")
|
||||||
|
|
||||||
# 检查是否还能向上滚动
|
# 检查是否还能向上滚动
|
||||||
before_up_md5 = get_image_content_md5(p_shot)
|
before_up_md5 = get_image_content_md5(p_shot)
|
||||||
d.swipe_ext("down", scale=0.85) # 向上滚动 = 手势向下
|
d.swipe_ext("down", scale=0.85)
|
||||||
await asyncio.sleep(1.5)
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
# 检查是否还有新内容
|
# 检查是否还有新内容
|
||||||
check_up_shot = take_screenshot(d, f"check_up_{p_idx}")
|
check_up_shot = take_screenshot(d, f"check_up_{p_idx}")
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user