1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
| from selenium import webdriver import time import sys
non_bmp_map=dict.fromkeys(range(0x10000,sys.maxunicode+1),0xfffd) if __name__ == '__main__':
chrome_driver='C:\\Users\\lenovo\\Anaconda3\\Lib\\site-packages\\chromedriver.exe' driver = webdriver.Chrome(executable_path = chrome_driver) driver.get('https://www.baidu.com') try: driver.find_element_by_xpath('//*[@id="kw"]').click() driver.find_element_by_xpath('//*[@id="kw"]').send_keys('微博') time.sleep(3) driver.find_element_by_xpath('//*[@id="su"]').click() time.sleep(2) driver.find_element_by_xpath('//*[@id="2"]/div/div[1]/h3/a').click() time.sleep(10)
handles=driver.window_handles for handle in handles: if handle!=driver.current_window_handle: driver.close() driver.switch_to.window(handle) driver.execute_script("window.scrollBy(0,3000)") time.sleep(5) driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[1]/div[2]/div[1]/div/div/div[3]/div[1]/div/a[1]').click() time.sleep(5) driver.find_element_by_xpath('//*[@id="app"]/div[4]/div[1]/div/div[2]/div/div/div[5]/a[1]').click()
handles=driver.window_handles for handle in handles: if handle!=driver.current_window_handle: driver.close() driver.switch_to.window(handle) time.sleep(10) driver.find_element_by_id("loginname").click() driver.find_element_by_id("loginname").send_keys("你的账号") driver.find_element_by_xpath('//*[@id="pl_login_form"]/div/div[3]/div[2]/div/span').click() driver.find_element_by_xpath('//*[@id="pl_login_form"]/div/div[3]/div[2]/div/input').send_keys('你的密码') time.sleep(5) driver.find_element_by_class_name('W_btn_a').click() time.sleep(5) driver.find_element_by_xpath('//*[@id="dmCheck"]').click() time.sleep(1) driver.find_element_by_id('send_dm_btn').click() time.sleep(20) driver.find_element_by_xpath('//*[@id="plc_top"]/div/div/div[2]/input').click() time.sleep(20) driver.find_element_by_xpath('//*[@id="plc_top"]/div/div/div[2]/input').send_keys("随心") time.sleep(10) driver.find_element_by_xpath('//*[@id="plc_top"]/div/div/div[2]/a').click() time.sleep(2) driver.find_element_by_xpath('/html/body/div[1]/div[2]/ul/li[2]/a').click() driver.find_element_by_xpath('//*[@id="pl_user_feedList"]/div[2]/div[2]/div/a[1]').click() time.sleep(2) handles=driver.window_handles for handle in handles: if handle!=driver.current_window_handle: driver.close() driver.switch_to.window(handle) time.sleep(10) try: for k in range(1,11): driver.execute_script("window.scrollBy(0,200)") try: blog1=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__18"]/div/div[{k+1}]/div[1]') except: try: blog1=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__17"]/div/div[{k+1}]/div[1]') except: blog1=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__19"]/div/div[{k+1}]/div[1]') text1=blog1.text print(text1) try: zhuanfa1=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__18"]/div/div[{k+1}]/div[2]/div/ul/li[2]/a/span/span/span/em[2]') except: try: zhuanfa1=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__17"]/div/div[{k+1}]/div[2]/div/ul/li[2]/a/span/span/span/em[2]') except: zhuanfa1=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__19"]/div/div[{k+1}]/div[2]/div/ul/li[2]/a/span/span/span/em[2]') text2=zhuanfa1.text print(text2) try: pinglun=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__18"]/div/div[{k+1}]/div[2]/div/ul/li[3]/a/span/span/span/em[2]') except: try: pinglun=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__17"]/div/div[{k+1}]/div[2]/div/ul/li[3]/a/span/span/span/em[2]') except: pinglun=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__19"]/div/div[{k+1}]/div[2]/div/ul/li[3]/a/span/span/span/em[2]') text3=pinglun.text print(text3) try: dianzan1=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__18"]/div/div[{k+1}]/div[2]/div/ul/li[4]/a/span/span/span/em[2]') except: try: dianzan1=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__17"]/div/div[{k+1}]/div[2]/div/ul/li[4]/a/span/span/span/em[2]') except: dianzan1=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__19"]/div/div[{k+1}]/div[2]/div/ul/li[4]/a/span/span/span/em[2]') text4=dianzan1.text print(text4) time.sleep(5) try: button=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__18"]/div/div[{k+1}]/div[2]/div/ul/li[3]/a') except: try: button=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__17"]/div/div[{k+1}]/div[2]/div/ul/li[3]/a') except: button=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__19"]/div/div[{k+1}]/div[2]/div/ul/li[3]/a') driver.execute_script("arguments[0].click();",button) time.sleep(5) if text3!="评论": if int(text3)<=7: for i in range(1,int(text3)+1): try: pinglun2=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__18"]/div/div[{k+1}]/div[3]/div/div/div[3]/div/div/div[{i}]/div[2]/div[1]') print(pinglun2.text) time2=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__18"]/div/div[{k+1}]/div[3]/div/div/div[3]/div/div/div[{i}]/div[2]/div[3]/div[2]') print(time2.text) dianzan2=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__18"]/div/div[{k+1}]/div[3]/div/div/div[3]/div/div/div[{i}]/div[2]/div[3]/div[1]/ul/li[4]/span/a/span/em[2]') print(dianzan2.text) except: try: pinglun2=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__17"]/div/div[{k+1}]/div[3]/div/div/div[3]/div/div/div[{i}]/div[2]/div[1]') print(pinglun2.text) time2=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__17"]/div/div[{k+1}]/div[3]/div/div/div[3]/div/div/div[{i}]/div[2]/div[3]/div[2]') print(time2.text) dianzan2=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__17"]/div/div[{k+1}]/div[3]/div/div/div[3]/div/div/div[{i}]/div[2]/div[3]/div[1]/ul/li[4]/span/a/span/em[2]') print(dianzan2.text) except: pinglun2=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__19"]/div/div[{k+1}]/div[3]/div/div/div[3]/div/div/div[{i}]/div[2]/div[1]') print(pinglun2.text) time2=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__19"]/div/div[{k+1}]/div[3]/div/div/div[3]/div/div/div[{i}]/div[2]/div[3]/div[2]') print(time2.text) dianzan2=driver.find_element_by_xpath(f'//*[@id="Pl_Official_MyProfileFeed__19"]/div/div[{k+1}]/div[3]/div/div/div[3]/div/div/div[{i}]/div[2]/div[3]/div[1]/ul/li[4]/span/a/span/em[2]') print(dianzan2.text) try: try: driver.find_element_by_xpath('//*[@id="Pl_Official_MyProfileFeed__18"]/div/div[2]/div[3]/div/div/div[3]/div[2]/div/a').click() except: try: driver.find_element_by_xpath('//*[@id="Pl_Official_MyProfileFeed__17"]/div/div[2]/div[3]/div/div/div[3]/div[2]/div/a').click() except: driver.find_element_by_xpath('//*[@id="Pl_Official_MyProfileFeed__19"]/div/div[2]/div[3]/div/div/div[3]/div[2]/div/a').click() driver.switch_to.window(driver.window_handles[-1]) time.sleep(5) for i in range(1,11): pinglunn=driver.find_element_by_xpath(f'//*[@id="Pl_Official_WeiboDetail__70"]/div/div/div/div[4]/div/div[3]/div[2]/div/div/div[{i}]/div[2]/div[1]') print(pinglunn.text) try: pinglun_t=driver.find_element_by_xpath(f'//*[@id="Pl_Official_WeiboDetail__70"]/div/div/div/div[4]/div/div[3]/div[2]/div/div/div[{i}]/div[2]/div[3]/div[2]') print(pinglun_t.text) except: pinglun_t=driver.find_element_by_xpath(f'//*[@id="Pl_Official_WeiboDetail__70"]/div/div/div/div[4]/div/div[3]/div[2]/div/div/div[{i}]/div[2]/div[4]/div[2]') print(pinglun_t.text) try: pldz=driver.find_element_by_xpath(f'//*[@id="Pl_Official_WeiboDetail__70"]/div/div/div/div[4]/div/div[3]/div[2]/div/div/div[{i}]/div[2]/div[3]/div[1]/ul/li[4]/span/a/span/em[2]') print(pldz.text) except: pldz=driver.find_element_by_xpath(f'//*[@id="Pl_Official_WeiboDetail__70"]/div/div/div/div[4]/div/div[3]/div[2]/div/div/div[{i}]/div[2]/div[4]/div[1]/ul/li[4]/span/a/span/em[2]') print(pldz.text) time.sleep(1) driver.close() driver.switch_to.window(driver.window_handles[0]) time.sleep(2) except: continue except: print("finish blog")
time.sleep(3) driver.find_element_by_xpath('//*[@id="Pl_Core_UserInfo__6"]/div[2]/div[1]/div/a/span').click() time.sleep(5) handles=driver.window_handles for handle in handles: if handle!=driver.current_window_handle: driver.close() driver.switch_to.window(handle) time.sleep(10) source=driver.page_source with open("information1.txt","w",encoding='utf-8') as f: f.write(str(source).translate(non_bmp_map)) driver.find_element_by_xpath('//*[@id="Pl_Core_T8CustomTriColumn__50"]/div/div/div/table/tbody/tr/td[1]/a/strong').click() time.sleep(3) for i in range(1,11): try: driver.find_element_by_xpath(f'//*[@id="Pl_Official_HisRelation__56"]/div/div/div/div[2]/div[1]/ul/li[{i}]/dl/dd[1]/div[1]/a[1]').click() time.sleep(1) except: continue guanzhu=driver.window_handles[1:] for i in range(len(guanzhu)): time.sleep(1) driver.switch_to.window(guanzhu[len(guanzhu)-1-i]) try: time.sleep(3) try: guanzhu1=driver.find_element_by_xpath('//*[@id="Pl_Core_T8CustomTriColumn__3"]/div/div/div/table/tbody/tr/td[1]/strong') print(guanzhu1.text) fans1=driver.find_element_by_xpath('//*[@id="Pl_Core_T8CustomTriColumn__3"]/div/div/div/table/tbody/tr/td[2]/strong') print(fans1.text) blogs=driver.find_element_by_xpath('//*[@id="Pl_Core_T8CustomTriColumn__3"]/div/div/div/table/tbody/tr/td[3]/strong') print(blogs.text) except: guanzhu1=driver.find_element_by_xpath('//*[@id="Pl_Core_T8CustomTriColumn__3"]/div/div/div/table/tbody/tr/td[1]/a/strong') print(guanzhu1.text) fans1=driver.find_element_by_xpath('//*[@id="Pl_Core_T8CustomTriColumn__3"]/div/div/div/table/tbody/tr/td[2]/a/strong') print(fans1.text) blogs=driver.find_element_by_xpath('//*[@id="Pl_Core_T8CustomTriColumn__3"]/div/div/div/table/tbody/tr/td[3]/a/strong') print(blogs.text) driver.find_element_by_xpath('//*[@id="Pl_Core_UserInfo__6"]/div[2]/div[1]/div/a').click() time.sleep(3) source=driver.page_source with open(f"关注_{i}.txt","w",encoding='utf-8') as f: f.write(str(source).translate(non_bmp_map)) time.sleep(1) driver.close() except: print("no info") driver.close()
time.sleep(2) driver.switch_to.window(driver.window_handles[0]) driver.find_element_by_xpath('//*[@id="Pl_Official_HisRelationNav__55"]/div/div[2]/div[1]/div/div/div/div/ul/li[2]/a').click() time.sleep(5) for i in range(1,11): try: driver.find_element_by_xpath(f'//*[@id="Pl_Official_HisRelation__56"]/div/div/div/div[2]/div[1]/ul/li[{i}]/dl/dd[1]/div[1]/a[1]').click() time.sleep(1) except: continue fans=driver.window_handles[1:] for i in range(len(fans)): time.sleep(1) driver.switch_to.window(fans[len(fans)-1-i]) try: time.sleep(3) guanzhu1=driver.find_element_by_xpath('//*[@id="Pl_Core_T8CustomTriColumn__3"]/div/div/div/table/tbody/tr/td[1]/a/strong') print(guanzhu1.text) fans1=driver.find_element_by_xpath('//*[@id="Pl_Core_T8CustomTriColumn__3"]/div/div/div/table/tbody/tr/td[2]/a/strong') print(fans1.text) blogs=driver.find_element_by_xpath('//*[@id="Pl_Core_T8CustomTriColumn__3"]/div/div/div/table/tbody/tr/td[3]/a/strong') print(blogs.text) driver.find_element_by_xpath('//*[@id="Pl_Core_UserInfo__6"]/div[2]/div[1]/div/a').click() time.sleep(3) source=driver.page_source with open(f"粉丝_{i}.txt","w",encoding='utf-8') as f: f.write(str(source).translate(non_bmp_map)) time.sleep(1) driver.close() except: print("no info") driver.close()
time.sleep(2) finally: time.sleep(30) driver.quit()
|