首页 > 解决方案 > 如何使用漂亮的汤在 Twitter 上刮掉转发者?

问题描述

from bs4 import BeautifulSoup
from selenium import webdriver
import requests
import lxml
import openpyxl as op

# from lxml

html_text = 'https://twitter.com/videogamedeals/status/1352325118261948418/retweets'


#

driver = webdriver.


----------
## Heading ##

Chrome(
    executable_path='C:/Users/atif/Downloads/chromedriver.exe')
# driver.implicitly_wait(30)
driver.get(html_text)
html = driver.page_source
soup = BeautifulSoup(html, 'lxml')

# body = soup.body

# titles = headers.find_all('a', class_='title fw500 ellipsis')
# for h in headers:
#     # title = h.find('a', class_='title fw500 ellipsis').text
#     print(h.a['href'])

# a_links = body.find_all("a")
names = soup.find_all(
    "a.css-4rbku5 css-18t94o4 css-1dbjc4n r-1loqt21 r-1wbh5a2 r-dnmrzs r-1ny4l3l")

print(len(names))

**> 显示此错误 =>

[17548:22900:0415/160654.715:ERROR:device_event_log_impl.cc(214)] [16:06:54.715] 蓝牙:>bluetooth_adapter_winrt.cc:1162 RequestRadioAccessAsync 失败:RadioAccessStatus::DeniedByUserWill 不能>能够更改无线电功率。 0 <- 结果为零**

标签: pythonseleniumweb-scrapingbeautifulsoup

解决方案


实际上,您可以仅使用 selenium 而不使用 BeautifulSoup 来获取名称,这是代码:

from seleniumwire import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from bs4 import BeautifulSoup
import requests
import lxml
import openpyxl as op





driver = webdriver.Chrome(ChromeDriverManager().install())


# from lxml

html_text = 'https://twitter.com/videogamedeals/status/1352325118261948418/retweets'


# driver.implicitly_wait(30)
driver.get(html_text)
time.sleep(20)

names = driver.find_elements_by_xpath('//span[@class="css-901oao css-16my406 r-poiln3 r-bcqeeo r-qvutc0"]//span[@class="css-901oao css-16my406 r-poiln3 r-bcqeeo r-qvutc0"]')

for name in names:
    print(name.text)

推荐阅读