1. 自动化连接 driver对象获取与关闭
import socket
import subprocess
import random
import time
import atexitimport psutil
from appium import webdriver
from appium.options.android import UiAutomator2Optionsfrom selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.actions.action_builder import ActionBuilder
from selenium.webdriver.common.actions import interaction
from selenium.webdriver.common.actions.pointer_input import PointerInput
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC# 通过端口查找进行进行关闭
def kill_processes_by_port(port):killed = Falsefor proc in psutil.process_iter(['pid', 'name']):try:for conn in proc.net_connections():if conn.status == 'LISTEN' and conn.laddr.port == port:print(f"结束进程: PID={proc.pid}, 名称={proc.name()}")proc.kill()killed = Trueexcept (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):continueif not killed:print(f"未找到占用端口 {port} 的进程")# 获取当前空闲端口,仅限于4000-5000之间的空闲端口
def get_free_port():ip = '127.0.0.1's = socket.socket(socket.AF_INET, socket.SOCK_STREAM)port_list = [i for i in range(4000, 5000)]while 1:port = random.choice(port_list)try:s.connect((ip, port))s.shutdown(2)except:return portdef get_driver(uuid: str) -> tuple[int, webdriver.Remote]:port = get_free_port()print(f"当前空闲端口:appium -p {port} ")cmd = subprocess.Popen(f'start /b appium -p {port}', shell=True, creationflags=subprocess.CREATE_NO_WINDOW)atexit.register(kill_processes_by_port, port)time.sleep(5)print("cmd: ", cmd.pid, cmd)capabilities = {"platformName": "Android","automationName": "uiautomator2","newCommandTimeout": 600,"deviceName": uuid,"noReset": True,"skipDeviceInitialization": True,"skipServerInstallation": True,"autoGrantPermissions": True,"enableMultiWindows": True}appium_server_url = f'http://127.0.0.1:{port}'uiauto = UiAutomator2Options().load_capabilities(capabilities)driver = webdriver.Remote(appium_server_url, options=uiauto)driver.update_settings({"waitForIdleTimeout": 5})return port, driverif __name__ == '__main__':# 获取uu模拟器手机进行连接port, driver = get_driver("127.0.0.1:16384")
2. 手机点击操作
2.1. 通过xpath定位元素点击
import socket
import subprocess
import random
import time
import atexitimport psutil
from appium import webdriver
from appium.options.android import UiAutomator2Optionsfrom selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.actions.action_builder import ActionBuilder
from selenium.webdriver.common.actions import interaction
from selenium.webdriver.common.actions.pointer_input import PointerInput
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC# 有重试机制,最多重试三次查找元素进行点击,防止手机加载慢,点击成功后进行休眠指定时间等待手机进行反应
def xpath_get_ele_click(driver: webdriver.Remote, xpath: str, timeout: int = 5, ele_name: str = "元素", sleep_time: float = 0.5):error = Nonefor i in range(3):try:ele = WebDriverWait(driver, timeout).until(EC.presence_of_element_located((By.XPATH, f'{xpath}')))if ele.is_enabled():ele.click()print(f"开始点击{ele_name}")time.sleep(sleep_time)else:print(f"{ele_name}无法点击")error = Nonebreakexcept Exception as e:error = etime.sleep(sleep_time)if error is not None:raise error
2.2. 通过x, y像素位置进行定位点击
import socket
import subprocess
import random
import time
import atexitimport psutil
from appium import webdriver
from appium.options.android import UiAutomator2Optionsfrom selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.actions.action_builder import ActionBuilder
from selenium.webdriver.common.actions import interaction
from selenium.webdriver.common.actions.pointer_input import PointerInput
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC# 执行坐标点击
def click_by_coordinates(driver, x, y, delay_after: int = None, **kwargs):pointer_input = PointerInput(interaction.POINTER_TOUCH, "touch")# 创建 ActionBuilderactions = ActionBuilder(driver, mouse=pointer_input)# 构建动作链actions.pointer_action.move_to_location(x, y).pointer_down().pointer_up()# 执行动作actions.perform()actions.clear_actions()if delay_after:time.sleep(delay_after)
2.3. 通过图片识别进行定位点击
有些时候,在手机上无法获取某些区域要点击的位置的xml,例如通过webkit加载的,这时候就可以通过提前截图下来的icon对当前界面进行ocr识别,然后找到坐标定位进行点击
from pathlib import Pathfrom appium import webdriver
import cv2from utils import click_by_coordinates, get_driverclass ImageLocator:def __init__(self, driver: webdriver.Remote):self.d = driverself.screenshot_dir = Path("./")self.screenshot_dir.mkdir(exist_ok=True)def take_screenshot(self, filename="screen.png"):"""使用uiautomator2获取屏幕截图"""screenshot_path = self.screenshot_dir / filenameself.d.save_screenshot(str(screenshot_path))return screenshot_pathdef find_template_position(self, template_path, main_image_path=None, threshold=0.9):"""在截图中查找模板位置:param template_path: 模板图片路径:param main_image_path: 主截图路径(如果为None则自动截取新图):param threshold: 匹配阈值(0-1):return: 匹配坐标信息 或 None"""# 获取当前屏幕截图if main_image_path is None:main_image_path = self.take_screenshot()# 读取图片并校验main_img = cv2.imread(str(main_image_path))template = cv2.imread(str(template_path))if main_img is None or template is None:raise FileNotFoundError("图片加载失败,请检查路径是否正确")# 转换为灰度图main_gray = cv2.cvtColor(main_img, cv2.COLOR_BGR2GRAY)template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)# 执行模板匹配res = cv2.matchTemplate(main_gray, template_gray, cv2.TM_CCOEFF_NORMED)# 获取最佳匹配位置min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)if max_val < threshold:return None # 未找到匹配项# 计算中心点坐标h, w = template_gray.shapetop_left = max_loccenter_x = top_left[0] + w // 2center_y = top_left[1] + h // 2height, width, _ = template.shapereturn {"screen_path": str(main_image_path),"template_path": str(template_path),"confidence": float(max_val),"position": (center_x, center_y),"rectangle": (top_left, (top_left[0] + w, top_left[1] + h)),"height": height,"width": width}def click_by_image(self, template_path, threshold=0.8, retry=3, offset=0):"""通过图片匹配点击元素"""for _ in range(retry):result = self.find_template_position(template_path, threshold=threshold)if result:x, y = result['position']if offset == 1:x, y = result['rectangle'][0]if offset == 2:x, y = result['rectangle'][0]x += result['width']elif offset == 3:x, y = result['rectangle'][0]y += result['height']elif offset == 4:x, y = result['rectangle'][1]click_by_coordinates(self.d, x, y)return Trueself.take_screenshot() # 更新截图return False# 使用示例
if __name__ == "__main__":port, driver = get_driver("127.0.0.1:16384")# 初始化连接设备locator = ImageLocator(driver) # 无序列号默认连接当前设备# 定义模板路径(需要提前截取目标元素),需要注意,要识别的图片元素截图需要在手机里面进行截取template_path = Path("./more_icon.png")# 查找并点击元素, threshold: 相识度,0.8代表80%相似, offset 为识别到的要点击的元素在图片上的位置: 左上, 右上,左下,右下等;if locator.click_by_image(template_path, offset=0):print("点击成功")else:print("未找到目标元素")
3. 手机元素判断
有时候无法获取当前页面元素,但又需要判断当前页面是否包含某些元素或内容的时候,这种情况就可以使用cv2 + numpy进行判断
from appium.webdriver import Remote
import cv2
import numpy as npfrom utils import get_driver# 判断图片是否包含另一张图片
def img_ocr_exist(driver: Remote, contrast_img: cv2.typing.MatLike, threshold=0.8) -> bool:# 获取屏幕截图二进制数据screenshot_data = driver.get_screenshot_as_png()# 将二进制数据转换为numpy数组np_array = np.frombuffer(screenshot_data, dtype=np.uint8)# 使用OpenCV解码为图像对象(BGR格式)main_image = cv2.imdecode(np_array, flags=cv2.IMREAD_COLOR)# 转换为灰度图main_gray = cv2.cvtColor(main_image, cv2.COLOR_BGR2GRAY)template_gray = cv2.cvtColor(contrast_img, cv2.COLOR_BGR2GRAY)# 执行模板匹配res = cv2.matchTemplate(main_gray, template_gray, cv2.TM_CCOEFF_NORMED)# 获取最佳匹配位置min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)if max_val >= threshold:return Truereturn False# 使用示例
if __name__ == "__main__":port, driver = get_driver("127.0.0.1:16384")back_img = cv2.imread("./utils/icon/back_icon.png")is_exist = img_ocr_exist(driver, back_img)if is_exist:print("当前图片在手机上是存在的")else:print("当前图片在手机上不存在")