首页 > 解决方案 > 美汤属性错误

问题描述

import requests
from bs4 import BeautifulSoup
import pandas as pd

for n in range(1, 16):
    response = requests.get(
        'https://www.flipkart.com/search?q=books&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as'
        '=off '
        '&page=' + str(n))

    soup = BeautifulSoup(response.text, 'html.parser')

    # print(soup.prettify())

    urls = list()

    for a in soup.find_all('a', {'class': '_2cLu-l'}):
        urls.append('https://www.flipkart.com' + a['href'])

    # for a in soup.find_all('a', {'class': '_31qSD5'}):
    #     urls.append('https://www.flipkart.com' + a['href'])
    #
    # for a in soup.find_all('a', {'class': '_3dqZjq'}):
    #     urls.append('https://www.flipkart.com' + a['href'])

    products = list()

    for url in urls:
        product = dict()
        page_soup = BeautifulSoup(requests.get(url).text, 'html.parser')

        name = page_soup.find('h1', {'class': '_9E25nV'})
        product['name'] = name.text

        price = page_soup.find('div', {'class': '_1vC4OE _3qQ9m1'})
        product['price'] = price.text

        ratingsAndReviews = page_soup.find('span', {'class': '_38sUEc'})

        if ratingsAndReviews is None:
            product['ratingsAndReviews'] = '0 ratings & 0 reviews'
        else:
            product['ratingsAndReviews'] = ratingsAndReviews.text

        products.append(product)

        df = pd.DataFrame(products)
        print(df)
        df.to_csv(r'C:\Users\shiva\Desktop\Damn\Output_flipkart.csv', index=True)

产品['name'] = name.text | AttributeError:“NoneType”对象没有属性“文本”

标签: web-scrapingbeautifulsoup

解决方案


有时该网站会抛出不需要的 html 以阻止您。因此,try/except在内部 for 循环中放置一个块。这样,即使您的某些 url 不起作用,它也不会停止程序。

并将代码转换为数据框并保存在 for 循环之外

import requests
from bs4 import BeautifulSoup
import pandas as pd

products = list()

for n in range(1, 16):
    response = requests.get(
        'https://www.flipkart.com/search?q=books&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as'
        '=off '
        '&page=' + str(n))

    soup = BeautifulSoup(response.text, 'html.parser')

    urls = list()
    
    for a in soup.find_all('a', {'class': '_2cLu-l'}):
        urls.append('https://www.flipkart.com' + a['href'])

    for url in urls:
        try:
            product = dict()
            res = requests.get(url)

            page_soup = BeautifulSoup(res.text, 'html.parser')
            name = page_soup.find('h1', {'class': '_9E25nV'})
            product['name'] = name.text

            price = page_soup.find('div', {'class': '_1vC4OE _3qQ9m1'})
            product['price'] = price.text

            ratingsAndReviews = page_soup.find('span', {'class': '_38sUEc'})

            if ratingsAndReviews is None:
                product['ratingsAndReviews'] = '0 ratings & 0 reviews'
            else:
                product['ratingsAndReviews'] = ratingsAndReviews.text

            products.append(product)
        except Exception as e:
            print(e)

df = pd.DataFrame(products)
df.to_csv("data.csv", index=False)

推荐阅读