python - 如何防止 Instagram 临时请求被 selenium 阻塞?
问题描述
我正在尝试编写我自己的 python 脚本来找到一个帐户最受关注的追随者,它似乎工作正常,但是在一段时间或运行脚本超过 1-2 次之后,instagram 给了我一个重试错误,我搜索了并发现它的 Instagram 暂时阻止了我的 ip,因为我一次给出了许多请求。
有谁知道解决这个问题的方法?
我的代码
"""
WHAT DOES THIS SCRIPT ACTUALLY DO?:
This script enables you to scrape all your followers and then find X top followed followers.
--------------------------------------------------------------------------------------------
NOTICE:
Unfortunately it is very hard now a days to scrape social media sites, due to
timeout issues, to many pings in a set time and other request restrictions.
So this script can only be ran 1-3 times a day.
I've tried also using exciting API's but all these are either too slow, or simply
show a '428' to many requests error.
"""
import instaloader
from selenium import webdriver
import time
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from rich.console import Console
from rich.table import Column, Table
# Global vars
L = instaloader.Instaloader()
URL = "https://www.instagram.com/{}/"
usernameGlobal = None
passwordGlobal = None
console = Console()
def get_followers():
# Login
while True: # Keep running if password/username was wrong
try:
global usernameGlobal, passwordGlobal
print("\n"+"*-=-*"*5)
usernameGlobal = input("> Enter your username: ")
passwordGlobal = input("> Enter your password: ")
L.login(usernameGlobal, passwordGlobal)
print("\n"+"-"*28+"\n> Successfully Logged In!")
print("> Please leave this program running in the background")
print("> Until you see the 'FINISHED' message'"+"\n"+"-"*28)
break
except:
print("\n"+"-"*28+"\n> Wrong Username / Password"+"\n"+"-"*28)
# Obtain profile metadata
profile = instaloader.Profile.from_username(L.context, usernameGlobal)
follow_list = []
# Loop through each follower and add to list
for followee in profile.get_followers():
follow_list.append(followee.username)
return follow_list
def scrape_data(username):
driver.get(URL.format(username))
FOLLOWERS = 0
try:
try:
FOLLOWERS = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/header/section/ul/li[2]/a/span').text
except: # For people who you don't follow but follow you and have private accounts
FOLLOWERS = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/header/section/ul/li[2]/span/span').text
except:
print("\n"+"-"*28+"\n> Please try this script again later!"+"\n"+"-"*28)
result = ''.join([i for i in FOLLOWERS if i.isdigit()])
return int(float(result))
def driver_login():
driver.get("https://www.instagram.com")
time.sleep(3)
element = driver.find_element_by_xpath("//input[@name='username']")
element.send_keys(usernameGlobal)
element = driver.find_element_by_xpath("//input[@name='password']")
element.send_keys(passwordGlobal)
element.send_keys(Keys.RETURN)
time.sleep(3)
# -- This is for if you have two factor authentication enabled --
# element = driver.find_element_by_xpath("//input[@name='verificationCode']")
# key = input("Enter Activation key: ")
# element.send_keys(key)
# element.send_keys(Keys.RETURN)
# time.sleep(3)
def output_result(size, result):
n_input = 0
# Get user to select how many of the top followed followers they want
while True:
try:
print("\n"+"*-=-*"*10)
n_input = int(input("> How many of your top followed followers do you want to see?\n> E.g 5 for top 5.\n> "))
if n_input > size:
continue
break
except:
print("\n"+"-"*28+"\n> Invalid input. (Must be a number & less then your follower count)"+"\n"+"-"*28)
# Make the table for a clean user friendly output and print it out
table = Table(show_header=True, header_style="bold magenta")
table.add_column("Your Followers", style="dim", width=12)
table.add_column("There Follower Count")
for x in range(n_input):
table.add_row(
list(result.keys())[x-1],
list(result.values())[x-1]
)
console.print(table)
return
if __name__ == "__main__":
list_of_followers = get_followers()
# Initialize the selenium driver
driver = webdriver.Chrome(ChromeDriverManager().install())
driver_login()
result = {}
for follower in list_of_followers:
followers = scrape_data(follower)
result[follower] = followers
# Sort the dictionary by descending order
result = dict(sorted(result.items(), key=lambda x: x[1], reverse=True))
print("\n> FINISHED")
driver.quit()
output_result(len(list_of_followers), result)
exit(0)
解决方案
如果您使用代理,您可能会发出无限的请求。您可以从各个站点购买数以千计的代理,并在字典中轮换它们。
只需将代理列表添加到您的 GET 请求并享受:
proxyDict = {
"http" : http_proxy,
"https" : https_proxy,
"ftp" : ftp_proxy
}
r = requests.get(url, headers=headers, proxies=proxyDict)
同样对于 Selenium,来自这个答案:
PROXY = "1.111.111.1:8080" #your proxy
chrome_options = WebDriverWait.ChromeOptions()
chrome_options.add_argument('--proxy-server=%s' % PROXY)
chrome = webdriver.Chrome(chrome_options=chrome_options)
chrome.get("instagram.com")
推荐阅读
- php - laravel 搜索belongsToMany 关系
- apache-kafka - KeyValueStore.get() 返回不一致的结果
- javascript - 如何添加功能以确保特定用户已经失聪
- javascript - PIXI.js 精灵在应用过滤器后失去旋转
- python - 如果安装文件未命名为 setup.py,为什么包安装失败?
- java - 为什么我无法从我的文件中读取 List 对象(Java IO)
- bash - 如何在 Bash 中将子字符串与字符串匹配
- excel - 声明对象变量以保存工作表引用时 VBA 运行时错误“9”
- r - 在 ggplot 中使用 R 中的 x 和 y 变量创建直方图
- flutter - 切换到其他屏幕时颤动黑屏