web-scraping - 美汤属性错误
问题描述
import requests
from bs4 import BeautifulSoup
import pandas as pd
for n in range(1, 16):
response = requests.get(
'https://www.flipkart.com/search?q=books&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as'
'=off '
'&page=' + str(n))
soup = BeautifulSoup(response.text, 'html.parser')
# print(soup.prettify())
urls = list()
for a in soup.find_all('a', {'class': '_2cLu-l'}):
urls.append('https://www.flipkart.com' + a['href'])
# for a in soup.find_all('a', {'class': '_31qSD5'}):
# urls.append('https://www.flipkart.com' + a['href'])
#
# for a in soup.find_all('a', {'class': '_3dqZjq'}):
# urls.append('https://www.flipkart.com' + a['href'])
products = list()
for url in urls:
product = dict()
page_soup = BeautifulSoup(requests.get(url).text, 'html.parser')
name = page_soup.find('h1', {'class': '_9E25nV'})
product['name'] = name.text
price = page_soup.find('div', {'class': '_1vC4OE _3qQ9m1'})
product['price'] = price.text
ratingsAndReviews = page_soup.find('span', {'class': '_38sUEc'})
if ratingsAndReviews is None:
product['ratingsAndReviews'] = '0 ratings & 0 reviews'
else:
product['ratingsAndReviews'] = ratingsAndReviews.text
products.append(product)
df = pd.DataFrame(products)
print(df)
df.to_csv(r'C:\Users\shiva\Desktop\Damn\Output_flipkart.csv', index=True)
产品['name'] = name.text | AttributeError:“NoneType”对象没有属性“文本”
解决方案
有时该网站会抛出不需要的 html 以阻止您。因此,try/except
在内部 for 循环中放置一个块。这样,即使您的某些 url 不起作用,它也不会停止程序。
并将代码转换为数据框并保存在 for 循环之外
import requests
from bs4 import BeautifulSoup
import pandas as pd
products = list()
for n in range(1, 16):
response = requests.get(
'https://www.flipkart.com/search?q=books&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as'
'=off '
'&page=' + str(n))
soup = BeautifulSoup(response.text, 'html.parser')
urls = list()
for a in soup.find_all('a', {'class': '_2cLu-l'}):
urls.append('https://www.flipkart.com' + a['href'])
for url in urls:
try:
product = dict()
res = requests.get(url)
page_soup = BeautifulSoup(res.text, 'html.parser')
name = page_soup.find('h1', {'class': '_9E25nV'})
product['name'] = name.text
price = page_soup.find('div', {'class': '_1vC4OE _3qQ9m1'})
product['price'] = price.text
ratingsAndReviews = page_soup.find('span', {'class': '_38sUEc'})
if ratingsAndReviews is None:
product['ratingsAndReviews'] = '0 ratings & 0 reviews'
else:
product['ratingsAndReviews'] = ratingsAndReviews.text
products.append(product)
except Exception as e:
print(e)
df = pd.DataFrame(products)
df.to_csv("data.csv", index=False)
推荐阅读
- angular - 角度测试:使用特定类型的参数测试方法
- android - 使用时库未添加其依赖项
- python-3.x - 创建 onefile 应用程序时如何修复 Pyinstaller Mac Os 错误
- c++ - 反转数字 (C++)
- android - 未指定 buildToolsVersion
- performance - 系统鼠标的移动速度有多快?
- android - Meteor 构建失败:“Android 目标:android:命令失败,退出代码 ENOENT”
- css - 在 react-native 中将阴影应用于容器
- xcode - iOS 12 iPad 拒绝启动请求 - Xcode
- git - 如何获取 ./configure 以生成默认值