首页 > 解决方案 > 我将如何继续从群组中仅抓取在线电报成员?

问题描述

所以我在stackoverflow的某个地方找到了这段代码:


from telethon.sync import TelegramClient
from telethon.tl.functions.messages import GetDialogsRequest
from telethon.tl.types import InputPeerEmpty
from telethon.sync import TelegramClient
from telethon.tl.functions.messages import GetDialogsRequest
from telethon.tl.types import InputPeerEmpty, InputPeerChannel, InputPeerUser
from telethon.errors.rpcerrorlist import PeerFloodError, UserPrivacyRestrictedError
from telethon.tl.functions.channels import InviteToChannelRequest
import sys
import csv
import traceback
import time
from datetime import datetime
api_id =  123456789 #Enter Your 7 Digit Telegram API ID.
api_hash = '123456789'   #Enter Yor 32 Character API Hash.
phone = '123456789'
client = TelegramClient(phone, api_id, api_hash)

client.connect()
if not client.is_user_authorized():
    client.send_code_request(phone)
    client.sign_in(phone, input('Enter the code recieved to your Telegram messenger: '))


chats = []
last_date = None
chunk_size = 200
groups=[]

result = client(GetDialogsRequest(
             offset_date=last_date,
             offset_id=0,
             offset_peer=InputPeerEmpty(),
             limit=chunk_size,
             hash = 0
         ))
chats.extend(result.chats)

for chat in chats:
    try:
        if chat.megagroup== True:
            groups.append(chat)
    except:
        continue

print('Choose a group to scrape members from:')
i=0
for g in groups:
    print(str(i) + '- ' + g.title)
    i+=1

g_index = input("Enter a Number: ")
target_group=groups[int(g_index)]

print('Fetching Members...')
all_participants = []
all_participants = client.get_participants(target_group, aggressive=True)

print('Saving In file...')
with open("Scraped.csv","w",encoding='UTF-8') as f:
    writer = csv.writer(f,delimiter=",",lineterminator="\n")
    writer.writerow(['username','user id', 'access hash','name','group', 'group id','last seen'])
    for user in all_participants:
        accept=True
        try:
            lastDate=user.status.was_online
            num_months = (datetime.now().year - lastDate.year) * 12 + (datetime.now().month - lastDate.month)
            if(num_months>1):
                accept=False
        except:
            continue

        if (accept) :
            if user.username:
                username= user.username
            else:
                username= ""
            if user.first_name:
                first_name= user.first_name
            else:
                first_name= ""
            if user.last_name:
                last_name= user.last_name
            else:
                last_name= ""
            name= (first_name + ' ' + last_name).strip()
            writer.writerow([username,user.id,user.access_hash,name,target_group.title, target_group.id,user.status]) 

print('Members scraped successfully.')

这几乎会刮掉在线和最近活跃的会员,我如何将其更改为仅刮掉在线会员?我尝试查看 Telethon 文档,但我似乎不明白...我不确定在哪里可以寻求有关此问题的帮助,所以我在这里...

非常感谢任何形式的帮助!

谢谢你。

标签: pythontelegramtelethon

解决方案


只刮掉在线会员是不可能的。

尝试这样的事情:

import telethon
from telethon.sync import TelegramClient, events
from telethon.tl.functions.channels import GetParticipantsRequest
from telethon.tl.types import InputChannel
from telethon.tl.types import ChannelParticipantsSearch
from telethon.tl.functions.channels import GetFullChannelRequest

api_id = 123567
api_hash = "dsaopdas2131"
client = TelegramClient("RDJR", api_id, api_hash)

@client.on(events.NewMessage())
async def handler(event):
    chat_id = event.message.peer_id.channel_id
    offset = 0
    limit = 200
    my_filter = ChannelParticipantsSearch('')
    channel = await client(GetFullChannelRequest(chat_id))
    participants = await client(GetParticipantsRequest(channel=chat_id, filter=my_filter, offset=offset, limit=limit, hash=0))
    for x in participants.users:
        print(x.status)

with client as client:
    print("USER_BOT ONLINE!")
    client.run_until_disconnected()

您需要获取组/频道的所有参与者,然后对其进行迭代以打印状态。

输出将是这样的: 用户在线:

UserStatusOnline(expires=datetime.datetime(2021, 7, 6, 21, 6, 22, tzinfo=datetime.timezone.utc))

或者:

None

用户离线:

UserStatusOffline(was_online=datetime.datetime(2021, 7, 6, 18, 19, 35, tzinfo=datetime.timezone.utc))

推荐阅读