首页 > 解决方案 > 这段代码不会从 Selenium 中提取链接,但它也没有给我任何错误,我做错了什么?

问题描述

第一段代码中的所有内容都完美无缺,但第二段代码什么也没做,即使代码正在注册。我最终想要做的是让代码找到前 6 个 Instagram 帖子的链接,并存储它们的指标(现在只是视图)。但我想在不同的帐户中执行此操作。表示不同的链接目录。到目前为止,这段代码应该是获取“v1Nh3 kIKUG _bz0w”类的“href”属性并将其存储在列表中。为了更好地了解我们在这里处理的内容,我参考了 HTML。帮助!!

from selenium import webdriver
from time import sleep
import random
import re

#bot detect evasion
waiting_time = [4,4,8.2,4,6,13,7.2,3.4,6.5,9]
number =  3 #random.choice(waiting_time)
bigger_number = 4000000

class seeder_gram():

    def __init__(self, username, phone, email, password):

        self.username = username
        self.phone = phone
        self.email = email
        self.password = password
        self.driver = webdriver.Chrome("/Users/apple/Downloads/chromedriver")
        self.driver.get('https://instagram.com')

        sleep(number)

        #username insert
        username_form = self.driver.find_element_by_xpath('/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div/div[1]/div/label/input')
        username_form.clear()
        username_form.send_keys("nothingtoseehere")

        #password insert
        password_form = self.driver.find_element_by_xpath('/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div/div[2]/div/label/input')
        password_form.clear()
        password_form.send_keys("nothingtoseehere")

        #clicks button
        button_click = self.driver.find_element_by_xpath('/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div/div[3]/button')
        button_click.click()

        sleep(number)

        #close noti bar
        try:                                                    
            noti_pop_up = self.driver.find_element_by_xpath('/html/body/div[4]/div/div/div/div[3]/button[2]')
            noti_pop_up.click()
        except: 
            pass
        
        #list of targets
        account_list = ["nothingtoseehere", "nothingtoseehere", "nothingtoseehere", "nothingtoseehere", "nothingtoseehere", "nothingtoseehere"]
        account_picker = random.choice(account_list)

        #searches for target
        self.driver.get('https://instagram.com/' + account_picker + '/')
        
        sleep(number)

        #post list
        post_id_finder = self.driver.find_elements_by_class_name("v1Nh3 kIKUG  _bz0w")
        list_of_links = []
        
        def get_href(post_id_finder):
            for post in post_id_finder:
                link = post_id_finder.get_attribute('href')
                link.append(list_of_links)
                print(list_of_links)

        get_href(post_id_finder)


        """
        #gives unique post directory
        #eventually this will be a list that appends links depending on post
        post_id = "/p/CLWlksPHsvN/"

        #searches for the first grid
        grid_search = self.driver.find_element_by_xpath('//*[@id="react-root"]/section/main/div/div[4]/article/div[1]/div/div[1]')


        #goes to post
        post_viewer = self.driver.get('https://www.instagram.com' + post_id)

        #checks view
        video_views = self.driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/section[2]/div/span')
        views = re.findall(r'\d', video_views.text)
        print(views)

        #combines view value


        
        post_index = 1

        while post_index != 6:


            #finds videos that show views
            
            #what if the video isnt based on views??

            #logs views in list 


            #goes back one page

            #moves on to next post
            post_index += 1
        """

        #back up serch feature in case first doesn't work, needs more work added though to make functional
        """
        search_up = self.driver.find_element_by_xpath('/html/body/div[1]/section/nav/div[2]/div/div/div[2]/input')
        search_up.clear()
        search_up.send_keys(account_picker)
        """

        sleep(bigger_number)

#registered users
user1 = seeder_gram("nothingtoseehere", "nothingtoseehere", "nothingtoseehere", "nothingtoseeheres")
user2 = seeder_gram("nothingtoseehere", "nothingtoseehere", "nothingtoseehere", "nothingtoseehere")


seeder_gram()

这是我遇到问题的代码:

#post list
        post_id_finder = self.driver.find_elements_by_class_name("v1Nh3 kIKUG  _bz0w")
        list_of_links = []
        
        def get_href(post_id_finder):
            for post in post_id_finder:
                link = post_id_finder.get_attribute('href')
                link.append(list_of_links)
                print(list_of_links)

        get_href(post_id_finder)

这是 HTML 的样子,如果有更好的方法来做到这一点: 在此处输入图像描述

标签: pythonpython-3.xseleniumselenium-webdriverselenium-chromedriver

解决方案


您不能在 find_element_by_class 名称中使用多个类名,最好使用 find by css 选择器之类的东西。


推荐阅读