python - 易趣出售的物品刮硒分页
问题描述
我能够抓取 eBay 销售商品的第一页,所以我尝试了分页,这就是我所拥有的:
ebay_url = 'https://www.ebay.com/sch/i.html?_from=R40&_nkw=oakley+sunglasses&_sacat=0&Brand=Oakley&rt=nc&LH_Sold=1&LH_Complete=1&_ipg=200&_oaa=1&_fsrp=1&_dcat=79720'
# Load in html
html = requests.get(ebay_url)
# print(html.text)
driver = wd.Chrome(executable_path=r'/Users/mburley/Downloads/chromedriver')
driver.maximize_window() #Maximizes window
driver.implicitly_wait(30) # Gives an implicit wait for 30 seconds
driver.get(ebay_url)
wait = WebDriverWait(driver, 20) # Makes driver wait 20 seconds
sold_date = []
title = []
price = []
i = 1
## Loop here to get multiple pages
next_page = True
while next_page:
try:
# for item in all_items
for item in driver.find_elements(By.XPATH, "//div[contains(@class,'title--
tagblock')]/span[@class='POSITIVE']"):
try:
# Get Sale Date of item and update 'data'
sold_date.append(item.text)
except NoSuchElementException:
# Element not found
sold_date.append(None)
try:
# Get title of each item and update 'data'
title.append(driver.find_element_by_xpath(f".
(//div[contains(@class,'title--tagblock')]/span[@class='POSITIVE']/ancestor::div[contains(@class,'tag')]/following-sibling::a/h3)[{i}]").text)
except NoSuchElementException:
# Element not found
title.append(None)
try:
# Get price of each item and update 'data'
price.append(item.find_element_by_xpath(f"(//div[contains(@class,'title--tagblock')]/span[@class='POSITIVE']/ancestor::div[contains(@class,'tag')]/following-sibling::div[contains(@class,'details')]/descendant::span[@class='POSITIVE'])[{i}]").text)
except NoSuchElementException:
# Element not found
price.append(None)
i = i + 1
# Print results of scraped data on page
print(sold_date)
print(title)
print(price)
data = {
'Sold_date': sold_date,
'title': title,
'price': price
}
# Load Next Page by clicking button
button = driver.find_element_by_name('pagination__next icon-link')
button.click()
print("Clicked on Next Page!")
time.sleep(1)
except:
print("Done!")
next_page = False
df = pd.DataFrame.from_dict(data)
df.to_csv('out_two.csv', index = 0)
在我获得第 1 页的代码后,我添加了:
... code ...
## Loop here to get multiple pages
next_page = True
while next_page:
try:
... code to scrape page 1 ...
# Load Next Page by clicking button
button = driver.find_element_by_name('pagination__next icon-link')
button.click()
print("Clicked on Next Page!")
time.sleep(1)
except:
print("Done!")
next_page = False
不幸的是,它编辑了代码以抓取第一项,然后搜索下一页,但找不到“按钮”,因此它退出并打印完成。我对抓取不太了解,所以我尝试按照在线示例进行操作。任何人都可以帮忙吗?谢谢!
解决方案
推荐阅读
- ubuntu - 为什么浏览器无法在 Nginx 上打开文件?
- reactjs - 是否可以在生产构建(webpack / craco)上更改样式表 URI?
- javascript - 更换减速机正确打字
- javascript - 使用我想知道这张照片是浏览过的还是用相机拍的
- javascript - 如何使用 Google App Scripts 从工作表名称创建字符串?
- excel - 如何从版本数组中返回最新文档版本的值?
- ios - 无法在 iOS 14 内联 DatePicker 中点击天数
- css - vh 和 vw 单位的缺点是什么?
- azure-sql-database - 如何在远程 Azure SQL Server 的视图中创建数据所在的视图
- html - Img 在 HTML 5 中未加载