python - 蟒蛇 | Excel csv 文件 Unicode 问题
问题描述
有一个 python 文件可以从电报组中提取用户的数据。
这是代码:
from telethon.sync import TelegramClient
from telethon.tl.functions.messages import GetDialogsRequest
from telethon.tl.types import InputPeerEmpty, InputPeerChannel, InputPeerUser
from telethon.errors.rpcerrorlist import PeerFloodError, UserPrivacyRestrictedError
from telethon.tl.functions.channels import InviteToChannelRequest
import sys
import csv
import traceback
import time
import random
import re
api_id = 000000 # YOUR API_ID
api_hash = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' # YOUR API_HASH
phone = '+34000000000' # YOUR PHONE NUMBER, INCLUDING COUNTRY CODE
client = TelegramClient(phone, api_id, api_hash)
client.connect()
if not client.is_user_authorized():
client.send_code_request(phone)
client.sign_in(phone, input('Enter the code: '))
def add_users_to_group():
input_file = sys.argv[1]
users = []
with open(input_file, encoding='UTF-8') as f:
rows = csv.reader(f,delimiter=",",lineterminator="\n")
next(rows, None)
for row in rows:
user = {}
user['username'] = row[0]
try:
user['id'] = int(row[1])
user['access_hash'] = int(row[2])
except IndexError:
print ('users without id or access_hash')
users.append(user)
#random.shuffle(users)
chats = []
last_date = None
chunk_size = 10
groups=[]
result = client(GetDialogsRequest(
offset_date=last_date,
offset_id=0,
offset_peer=InputPeerEmpty(),
limit=chunk_size,
hash = 0
))
chats.extend(result.chats)
for chat in chats:
try:
if chat.megagroup== True: # CONDITION TO ONLY LIST MEGA GROUPS.
groups.append(chat)
except:
continue
print('Choose a group to add members:')
i=0
for group in groups:
print(str(i) + '- ' + group.title)
i+=1
g_index = input("Enter a Number: ")
target_group=groups[int(g_index)]
print('\n\nGrupo elegido:\t' + groups[int(g_index)].title)
target_group_entity = InputPeerChannel(target_group.id,target_group.access_hash)
mode = int(input("Enter 1 to add by username or 2 to add by ID: "))
error_count = 0
for user in users:
try:
print ("Adding {}".format(user['username']))
if mode == 1:
if user['username'] == "":
continue
user_to_add = client.get_input_entity(user['username'])
elif mode == 2:
user_to_add = InputPeerUser(user['id'], user['access_hash'])
else:
sys.exit("Invalid Mode Selected. Please Try Again.")
client(InviteToChannelRequest(target_group_entity,[user_to_add]))
print("Waiting 60 Seconds...")
time.sleep(60)
except PeerFloodError:
print("Getting Flood Error from telegram. Script is stopping now. Please try again after some time.")
except UserPrivacyRestrictedError:
print("The user's privacy settings do not allow you to do this. Skipping.")
except:
traceback.print_exc()
print("Unexpected Error")
error_count += 1
if error_count > 10:
sys.exit('too many errors')
continue
def list_users_in_group():
chats = []
last_date = None
chunk_size = 200
groups=[]
result = client(GetDialogsRequest(
offset_date=last_date,
offset_id=0,
offset_peer=InputPeerEmpty(),
limit=chunk_size,
hash = 0
))
chats.extend(result.chats)
for chat in chats:
try:
print(chat)
groups.append(chat)
# if chat.megagroup== True:
except:
continue
print('Choose a group to scrape members from:')
i=0
for g in groups:
print(str(i) + '- ' + g.title)
i+=1
g_index = input("Enter a Number: ")
target_group=groups[int(g_index)]
print('\n\nGrupo elegido:\t' + groups[int(g_index)].title)
print('Fetching Members...')
all_participants = []
all_participants = client.get_participants(target_group, aggressive=True)
print('Saving In file...')
with open("members-" + re.sub("-+","-",re.sub("[^a-zA-Z]","-",str.lower(target_group.title))) + ".csv","w",encoding='UTF-8') as f:
writer = csv.writer(f,delimiter=",",lineterminator="\n")
writer.writerow(['username','user id', 'access hash','name','group', 'group id'])
for user in all_participants:
if user.username:
username= user.username
else:
username= ""
if user.first_name:
first_name= user.first_name
else:
first_name= ""
if user.last_name:
last_name= user.last_name
else:
last_name= ""
name= (first_name + ' ' + last_name).strip()
writer.writerow([username,user.id,user.access_hash,name,target_group.title, target_group.id])
print('Members scraped successfully.')
def printCSV():
input_file = sys.argv[1]
users = []
with open(input_file, encoding='UTF-8') as f:
rows = csv.reader(f,delimiter=",",lineterminator="\n")
next(rows, None)
for row in rows:
user = {}
user['username'] = row[0]
user['id'] = int(row[1])
user['access_hash'] = int(row[2])
users.append(user)
print(row)
print(user)
sys.exit('FINITO')
# print('Fetching Members...')
# all_participants = []
# all_participants = client.get_participants(target_group, aggressive=True)
print('What do you want to do:')
mode = int(input("Enter \n1-List users in a group\n2-Add users from CSV to Group (CSV must be passed as a parameter to the script\n3-Show CSV\n\nYour option: "))
if mode == 1:
list_users_in_group()
elif mode == 2:
add_users_to_group()
elif mode == 3:
printCSV()
在我打开members--.csv
文件时提取成员后,我看到了 UniCode 字符的问题。
我该如何解决这个问题?
我在用excel 2016
解决方案
问题不在于您的代码,而在于 Excel。当 Excel 打开文件时,它使用您的 Windows 版本的默认编码,并且该编码绝不是UTF-8 - 这是他们在 Unicode 出现之前发明的众多代码页之一。
如果您使用文本导入向导,则可以选择文本编码,如果需要,您可以在那里选择 UTF-8。但每次您需要打开 CSV 时,这都是一件很痛苦的事情。
有一种方法可以让 Excel 识别文件是 UTF-8 编码并自动使用它,许多 Microsoft 产品使用相同的技巧。如果文件以 UTF-8 编码的 Unicode字节顺序标记 (BOM) U+FEFF(3 字节序列 0xEF、0xBB、0xBF)开头,Excel 将识别该文件为 UTF-8 编码并覆盖其默认值。如果您使用特殊编码,Python 将自动使用此 BOM 序列启动您的文件'utf_8_sig'
。
with open("members-" + re.sub("-+","-",re.sub("[^a-zA-Z]","-",str.lower(target_group.title))) + ".csv","w",encoding='utf_8_sig') as f:
不建议您将此特殊签名放在每个文件的开头,只有当您知道它会被需要它的应用程序使用时。
推荐阅读
- javascript - 视频播放完毕后如何添加自定义按钮
- ios - Xcode:ipatool 因异常而失败
- c++ - 如何使用“现代 CMake”设置编译器标志?
- python - 从 MySQL 数据库中递归获取所有子项
- jquery - Any way to automatically close the hamburger menu on :focus with jQuery?
- c++ - 如何在 C++ 中实现神经网络
- django - 无法在 DRF 中捕获 31 日至 2 月的无效日期
- javascript - Vue 不会在“间接”更改表达式的值时更新 DOM
- python - 使用带有无头 chrome 的 webdriver 下载文件时出错
- node.js - mongoDb 数据库表单数据空响应