python - selenium + Python csv 文件输出搞砸了
问题描述
我正在尝试抓取一个 js(基于 AngularJS0 的网站,而我希望脚本执行的操作是:
- 登录
- 点击 div 元素
- 收集资料
- 回去
- 再次单击第二个 div 并循环相同的内容
- 单击下一步按钮并执行步骤(2-5)
- 将抓取的数据输出到 csv 文件
这是我的部分代码:
def main():
i = 1
while i < 13:
if i in range(1,11):
myxpath = "/html/body/div[1]/div[3]/div/div[1]/leads-list/div/ul/li["+str(i)+"]"
print(myxpath)
item = (By.XPATH, '//*[@id="description"]')
WebDriverWait(driver, 50).until(EC.element_to_be_clickable(item))
Itemlinks = driver.find_elements_by_xpath(myxpath)
print('total items displayed in current page', len(Itemlinks))
for items in Itemlinks:
time.sleep(10)
items.click()
print("item clicked")
time.sleep(10)
sections = driver.find_elements_by_xpath('//*[@id="longDescriptionHTML"]')
for section in sections:
try:
name = section.find_element_by_xpath(
"/html/body/div[1]/div[3]/div/div[2]/lead-detail/div[4]/div[1]/div[2]/h1").text
print(name)
titles.append(name)
except NoSuchElementException:
pass
try:
email = section.find_element_by_xpath(
"/html/body/div[1]/div[3]/div/div[2]/lead-detail/div[4]/div[1]/ul[1]/li[1]/div[1]/ul/li[2]").text
print(email)
emails.append(email)
except NoSuchElementException:
pass
try:
phone = section.find_element_by_xpath('//*[@id="longDescriptionHTML"]/ul[1]/li[1]/a[1]/div/ul/li[2]').text
print(phone)
phone_numbers.append(phone)
except NoSuchElementException:
pass
try:
address = section.find_element_by_xpath('//*[@id="longDescriptionHTML"]/ul[1]/li[1]/div[4]/ul/li[2]').text
print(address)
addresses.append(address)
except NoSuchElementException:
pass
try:
infos = section.find_element_by_xpath('//*[@id="longDescriptionHTML"]/ul[2]')
all_info = infos.find_elements_by_tag_name('li')
for li_info in all_info:
info = li_info.text
print (info)
information.append(info)
except NoSuchElementException:
pass
try:
description = section.find_element_by_xpath('//*[@id="longDescriptionHTML"]/div[3]/p').text
print(description)
lead_description.append(description)
except NoSuchElementException:
pass
try:
additional = section.find_element_by_xpath('//*[@id="longDescriptionHTML"]/div[4]/p').text
print(additional)
additional_data.append(additional)
except NoSuchElementException:
pass
try:
public_data1 = section.find_element_by_xpath('//*[@id="longDescriptionHTML"]/div[5]/ul')
all_public = public_data1.find_elements_by_tag_name('li')
for li_data in all_public:
public = li_data.text
print(public)
public_data.append(public)
except NoSuchElementException:
pass
driver.find_element_by_class_name("back-link.pull-left.col-md-3").click()
driver.execute_script("window.scrollTo(0, 50)")
time.sleep(5)
i += 1
continue
elif i in range(12):
link = wait.until(
EC.visibility_of_element_located((By.XPATH, "//a[contains(text(),'Next')]")))
driver.execute_script('arguments[0].scrollIntoView();', link)
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, "//a[contains(text(),'Next')]"))).click()
time.sleep(10)
df = pd.DataFrame(list(zip(titles, emails, phone_numbers, addresses, information, lead_description, additional_data, public_data)), columns=['Name', 'Email', 'Phone Number', 'Address', 'Information', 'Lead Description', 'Additional Data', 'Public Data'])
df.to_csv('zbuyer00.csv', index=False)
main()
if __name__ == '__main__':
main()
代码正在做应该做的事情(从第 1 步到第 6 步)。但是当我尝试输出 CSV 文件时;它在不同的行上打印“信息”和“公共”。对于“公共”和“信息”;我想在同一个单元格(但不同的列)上写入 csv 文件。当我尝试输出上述代码时,这里是我的 CSV 文件:
解决方案
推荐阅读
- multilingual - 支持文章的多语言无头 cms
- angular - WebAPI 第一次以 POST 的形式重定向,然后以 GET 的形式重定向到 Angular 应用程序
- android - 如何通过捆绑传递字体?
- loops - VBScript 无限循环冻结 PC
- user-interface - 颤振中的自定义结构
- python-3.x - 根据鼠标位置,pygame 显示器上的 Blit 表面
- asp.net - 如何使用Repeater使用sql server在表中插入,更新,删除,选择
- r - 将时差拆分为各个小时
- python - 尝试安装 Conda 环境时出错
- django - 如何在 Django-ORM 中强制子查询?