首页 > 解决方案 > 如何使用 python 显示嵌套循环

问题描述

我是 python 的初学者,我从网络抓取开始,我想从旅游网站提取数据我需要酒店的名称、每家酒店的可用安排和价格,但我被困在安排列表中,每个酒店可以有几个安排,但它不起作用,我不知道为什么。如果你们中的任何人可以帮助我,我会将我的代码和提供的输出供您使用,并提前感谢您。

from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait

PATH = "C:\\Users\\marketing2\\Documents\\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get('https://tn.tunisiebooking.com/')
wait = WebDriverWait(driver, 20)

# write script //Your Script Seems fine
script = "document.getElementById('ville_des').value ='Sousse';document.getElementById('depart').value ='05/08/2021';document.getElementById('checkin').value ='05/08/2021';document.getElementById('select_ch').value = '1';"

# Execute script
driver.execute_script(script)

# click bouton search
btn_rechercher = driver.find_element_by_id('boutonr')
btn_rechercher.click()
sleep(10)

# click bouton details
btn_plus = driver.find_element_by_id('plus_res')
btn_plus.click()
sleep(10)

#getting the hotel names and by xpath in a loop
hotels=[]
pensions=[]
for v in range(1, 5):
        hotel = driver.find_element_by_xpath('/html/body/div[6]/div[2]/div[1]/div/div[2]/div/div[4]/div[' + str(v) + ']/div/div[3]/div[1]/div[1]/span/a/h3').get_attribute('innerHTML')
        for j in range (1,3):
            pension= driver.find_element_by_xpath('/html/body/div[6]/div[2]/div[1]/div/div[2]/div/div[4]/div[1]/div/div[3]/div[3]/div[1]/div[1]/form/div[1]/div[' + str(j) + ']/u').get_attribute('innerHTML')
        pensions.append((pension))
        hotels.append((hotel,pensions))
        

print(hotels)

标签: pythonseleniumloopsweb-scrapingnested-loops

解决方案


你可以试试

#!/usr/bin/env python
# coding: utf-8
import json
from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select


# create path and start webdriver
PATH = "/mnt/sdc/Work/scripts/Test/chromedriver"
driver = webdriver.Chrome(PATH)

# first get website
driver.get('https://tn.tunisiebooking.com/')
wait = WebDriverWait(driver, 20)

# params to select
params = {
    'destination': 'Sousse',
    'date_from': '05/08/2021',
    'date_to': '05/08/2021',
    'bedroom': '1'
}

# select destination
destination_select = Select(driver.find_element_by_id('ville_des'))
destination_select.select_by_value(params['destination'])

# select bedroom
bedroom_select = Select(driver.find_element_by_id('select_ch'))
bedroom_select.select_by_value(params['bedroom'])

# select dates
script = f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('checkin').value ='{params['date_to']}';"
driver.execute_script(script)

# submit form
form = driver.find_element_by_id('hotel_recherch_moteur')
form.submit()
sleep(5)

# ----------------------------------------------------------------------------
# get list of all hotels
hotels_list = []
hotels_objects = driver.find_elements_by_xpath(
    '//div[contains(@class, "enveloppe_produit")]'
)
for hotel_obj in hotels_objects:
    # get price object
    price_object = hotel_obj.find_element_by_xpath(
        './/div[@class="monaieprix"]'
    )
    price_value = price_object.find_element_by_xpath(
        './/div[1]'
    ).text.replace('\n', '')

    # get title data
    title_data = hotel_obj.find_element_by_xpath(
        './/span[contains(@class, "tittre_hotel")]'
    )

    # get arrangements
    arrangements_obj = hotel_obj.find_elements_by_xpath(
        './/div[contains(@class, "angle")]//u'
    )
    arrangements = [ao.text for ao in arrangements_obj]

    # create new object
    hotels_list.append({
        'name': title_data.find_element_by_xpath('.//a//h3').text,
        'arrangements': arrangements,
        'price': f'{price_value}'
    })

# ----------------------------------------------------------------------------
for hotel in hotels_list:
    print(json.dumps(hotel, indent=4))
{
    "name": "El Mouradi Palace",
    "arrangements": [
        "Petit dejeuner",
        "Demi pension plus",
        "All inclusive soft"
    ],
    "price": "67"
}
{
    "name": "KANTA",
    "arrangements": [
        "Petit dejeuner",
        "Demi pension",
        "All inclusive soft"
    ],
    "price": "43"
}
...

如果我帮助你 - 请将答案标记为正确


推荐阅读