import ddddocr from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.edge.options import Options from selenium.webdriver.edge.service import Service as EdgeService import re import time def getWeb(): global edge edge_options = Options() edge_options.add_experimental_option("excludeSwitches", ["enable-logging'"]) edge_options.add_experimental_option("excludeSwitches", ["enable-automation"]) edge_options.add_experimental_option('useAutomationExtension', False) #edge_options.add_argument('headless') service = EdgeService(executable_path=r'D:\222\msedgedriver.exe') edge = webdriver.Edge(service=service, options=edge_options) edge.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": """         Object.defineProperty(navigator, 'webdriver', {           get: () => undefined         })       """ }) def getSoGou_link(WX_ID): url = f'https://weixin.sogou.com/weixin?type=1&s_from=input&query={WX_ID}' edge.get(url) print(">>> 开始识别验证码") while True: try: edge.find_element(By.CSS_SELECTOR ,' #seccodeImage').screenshot('tempVerification_code.png') except: break ocr = ddddocr.DdddOcr(show_ad=False) with open('tempVerification_code.png', 'rb') as f: img_bytes = f.read() Verification_code = (ocr.classification(img_bytes)).upper() print('>>> 识别成功!') print(Verification_code) edge.find_element(By.NAME, 'c').send_keys(Verification_code) time.sleep(0.5) edge.find_element(By.ID, 'submit').click() time.sleep(1) try: edge.find_element(By.ID, 'change-img').click() time.sleep(0.5) except: break WX_link_list = edge.find_elements(By.XPATH, '//*[@id="sogou_vr_11002301_box_0"]/dl/dd/a') if len(WX_link_list) == 0 : return False return WX_link_list[0].get_attribute('href') def getWX_html(link): edge.get(link) time.sleep(3) try: txt = edge.find_element(By.XPATH, '//*[@id="img-content"]').text print(txt.split('\n')) except: pass try: img_list = edge.find_elements(By.TAG_NAME, 'img') for i in img_list: if i.get_attribute('data-src') != None: print(i.get_attribute('data-src')) except: pass try: video_list = edge.find_elements(By.TAG_NAME, 'video') for i in video_list: print(i.get_attribute('src')) except: pass if __name__ == "__main__": with open('id.txt', 'r', encoding='UTF-8') as file: id_data = file.read() id = re.compile(r'\'query\' \=\> \'(.*)\'').findall(id_data) getWeb() for x in id: print('--'*60) link = getSoGou_link(WX_ID=x) if link == False: print(">>> Error:没有文章") continue print(link) getWX_html(link=link)