首页 > 解决方案 > 领英自动化

问题描述

我正在尝试使用 selenium Python 抓取连接名称。但它只滚动到一页并加载一页的结果。有什么方法可以让我用硒获得整页的结果?我附上代码以供参考。

import time
import requests
from bs4 import BeautifulSoup as bt
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from parsel import Selector
import urllib3
options=webdriver.ChromeOptions()

prefs={"profile.default_content_setting_values.notifications":2}
options.add_experimental_option("prefs",prefs)
options.add_argument("start-maximized")
options.add_argument("--disable-notifications")
from webdriver_manager.chrome import ChromeDriverManager

email=input("Please enter your linkedin email id: ")
password=input("Please enter your linkedin password: ")

driver=webdriver.Chrome(options=options,executable_path="D:\chromedriver.exe")
driver.get("http://www.linkedin.com/login/")
time.sleep(4)

ele=driver.find_element_by_name("session_key")
#print(ele.is_displayed())
    pwd=driver.find_element_by_name("session_password")

ele.send_keys(str(email))
pwd.send_keys(str(password))
driver.find_element_by_xpath("/html/body/div/main/div[2]/form/div[3]/button").click()
time.sleep(3)

#logging in

driver.get("https://www.linkedin.com/mynetwork/")
driver.get("https://www.linkedin.com/mynetwork/invite-connect/connections/")

def scroll(driver, timeout):
scroll_pause_time = timeout
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
    # Scroll down to bottom
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    # Wait to load page
    time.sleep(scroll_pause_time)

    # Calculate new scroll height and compare with last scroll height
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        # If heights are the same it will exit the function
        break
    last_height = new_height

 scroll(driver, 2)

标签: pythonselenium

解决方案


推荐阅读