handles=driver.window_handles for handle in handles: if handle!=driver.current_window_handle: driver.close() driver.switch_to.window(handle) driver.execute_script("window.scrollBy(0,3000)") time.sleep(5) driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[1]/div[2]/div[1]/div/div/div[3]/div[1]/div/a[1]').click() time.sleep(5)
handles=driver.window_handles for handle in handles: if handle!=driver.current_window_handle: driver.close() driver.switch_to.window(handle) time.sleep(10)
time.sleep(5) handles=driver.window_handles for handle in handles: #关闭之前的界面 if handle!=driver.current_window_handle: driver.close() driver.switch_to.window(handle) time.sleep(10) finally: time.sleep(30) driver.quit()
handles=driver.window_handles for handle in handles: if handle!=driver.current_window_handle: driver.close() driver.switch_to.window(handle) driver.execute_script("window.scrollBy(0,3000)") time.sleep(5) driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[1]/div[2]/div[1]/div/div/div[3]/div[1]/div/a[1]').click() time.sleep(5)
handles=driver.window_handles for handle in handles: if handle!=driver.current_window_handle: driver.close() driver.switch_to.window(handle) time.sleep(10)
time.sleep(5) handles=driver.window_handles for handle in handles: if handle!=driver.current_window_handle: driver.close() driver.switch_to.window(handle) time.sleep(10)
driver.find_element_by_xpath('//*[@id="Pl_Core_UserInfo__6"]/div[2]/div[1]/div/a/span').click() #点击“查看更多”获取更多信息 time.sleep(5) handles=driver.window_handles for handle in handles: #关闭之前的界面 if handle!=driver.current_window_handle: driver.close() driver.switch_to.window(handle) time.sleep(10) source=driver.page_source #获取页面源代码 withopen("information.txt","w",encoding='utf-8') as f: f.write(str(source).translate(non_bmp_map)) #这里的处理方式是为了能让中文正确编码写入文件之中
withopen("information.txt","r",encoding='utf-8') as f: data=f.read()
html=etree.HTML(data)
for i inrange(1,10): tmp1=html.xpath('//*[@id="Pl_Official_PersonalInfo__54"]/div[1]/div/div[2]/div/ul/li[%d]/span[1]'%i) tmp=html.xpath('//*[@id="Pl_Official_PersonalInfo__54"]/div[1]/div/div[2]/div/ul/li[%d]/span[2]'%i) for j inrange(len(tmp)): print(str(tmp1[j].text).strip()+' : ' +str(tmp[j].text).strip())
for i inrange(1,10): tmp1=html.xpath('//*[@id="Pl_Official_PersonalInfo__54"]/div[2]/div/div[2]/div/ul/li[%d]/span[1]'%i) tmp=html.xpath('//*[@id="Pl_Official_PersonalInfo__54"]/div[2]/div/div[2]/div/ul/li[%d]/span[2]'%i)
for j inrange(len(tmp)): print(str(tmp1[j].text).strip()+' : ' +str(tmp[j].text).strip())
for i inrange(1,10): tmp1=html.xpath('//*[@id="Pl_Official_PersonalInfo__54"]/div[3]/div/div[2]/div/ul/li[%d]/span[1]'%i) tmp=html.xpath('//*[@id="Pl_Official_PersonalInfo__54"]/div[3]/div/div[2]/div/ul/li[%d]/span[2]/a'%i)
for j inrange(len(tmp)): print(str(tmp1[j].text).strip()+' : ' +str(tmp[j].text).strip())
xpath='//span[@class="pt_detail"]/a[@target="_blank"]/text()' a=[] b=html.xpath(xpath) res=[] for i inrange(len(b)): res+=b[i].split() print("标签信息",res)