python - 如何使用 Selenium 遍历 web 表?
问题描述
我一直在尝试遍历 Crunchbase 上的这个 EV 公司表,但由于某种原因,代码只是拉起第一行。知道为什么吗?谢谢 !:)
#imports
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
#paths
PATH = "C:/Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get("https://www.crunchbase.com/search/organizations/field/organization.companies/categories/electric-vehicle")
driver.maximize_window()
time.sleep(5)
print(driver.title)
WebDriverWait(driver, 20).until(
EC.visibility_of_element_located(
(By.XPATH, ('/html/body/chrome/div/mat-sidenav-container/mat-sidenav-content/div/search/page-layout/div/div/form/div[2]/results/div/div/div[3]/sheet-grid/div/div/grid-body/div/grid-row[1]/grid-cell[2]/div/field-formatter/identifier-formatter/a/div/div')
)))
companies = driver.find_elements_by_css_selector("div.identifier-label")
#create company dictionary and iterate through Crunchbase EV company table
company_list = []
for company in companies:
name = company.find_element_by_xpath('/html/body/chrome/div/mat-sidenav-container/mat-sidenav-content/div/search/page-layout/div/div/form/div[2]/results/div/div/div[3]/sheet-grid/div/div/grid-body/div/grid-row[1]/grid-cell[2]/div/field-formatter/identifier-formatter/a/div/div').text
industry = company.find_element_by_xpath('/html/body/chrome/div/mat-sidenav-container/mat-sidenav-content/div/search/page-layout/div/div/form/div[2]/results/div/div/div[3]/sheet-grid/div/div/grid-body/div/grid-row[1]/grid-cell[3]/div/field-formatter/identifier-multi-formatter/span').text
hq = company.find_element_by_xpath('/html/body/chrome/div/mat-sidenav-container/mat-sidenav-content/div/search/page-layout/div/div/form/div[2]/results/div/div/div[3]/sheet-grid/div/div/grid-body/div/grid-row[1]/grid-cell[4]/div/field-formatter/identifier-multi-formatter/span').text
cblist = {
'name': name,
'industry': industry,
'hq': hq
}
company_list.append(cblist)
#create dataframe
df = pd.DataFrame(company_list)
print(df)
解决方案
在所有标识符中为 for 循环中的每次迭代增加网格行索引,例如..
row_index = row_index + 1
name = company.find_element_by_xpath(
'/html/body/chrome/div/mat-sidenav-container/mat-sidenav-content/div/search/page-layout/div/div/form/div[2]/results/div/div/div[3]/sheet-grid/div/div/grid-body/div/grid-row['+str(row_index)+']/grid-cell[2]/div/field-formatter/identifier-formatter/a/div/div').text
推荐阅读
- uikit - iPad:状态栏一侧显示白色文本,另一侧显示黑色文本
- nested - 嵌套常量循环时间复杂度 O(n) 还是 O(n^2)?
- javascript - 在 JS 引擎中,IIFE 的执行上下文是在变量声明之前还是之后创建和退出?
- c++ - 为什么这个解决方案在给定的测试用例上提供 TLE?
- java - 如何在Java中为枚举分配类型
- ios - 为什么 CollectionView 单元格内部其他 collectionView 的大小错误?
- python-3.x - python中带有asyncio的继承和初始化模式
- sql - 从表中删除重复值并保留最新日期的 ID
- android-studio - 在物理设备上禁用的android studio上的热重载和热重启
- python - 当我尝试在 Jupyter 笔记本上计算归一化时出错(使用“-”和 df)