python - 如何解决“charmap”编解码器无法编码字符错误
问题描述
我正在尝试从 YouTube 中提取评论,我得到:
Traceback (most recent call last):
File "<ipython-input-4-7667a2287691>", line 1, in <module>
runfile('C:/Users/ACER/Desktop/fyp/comment_extractor.py', wdir='C:/Users/ACER/Desktop/fyp')
File "C:\Users\ACER\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\ACER\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/ACER/Desktop/fyp/comment_extractor.py", line 120, in <module>
search_videos_by_keyword(service, q=keyword, part='id,snippet', eventType='completed', type='video')
File "C:/Users/ACER/Desktop/fyp/comment_extractor.py", line 105, in search_videos_by_keyword
write_to_csv(final_result)
File "C:/Users/ACER/Desktop/fyp/comment_extractor.py", line 55, in write_to_csv
comments_writer.writerow(list(row))
File "C:\Users\ACER\Anaconda3\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 845-846: character maps to `<undefined>`
我正在尝试将这些评论提取到 Excel 表中。你能告诉我为什么会出现这些错误吗?
CLIENT_SECRETS_FILE = "client_secret.json"
SCOPES = ['https://www.googleapis.com/auth/youtube.force-ssl']
API_SERVICE_NAME = 'youtube'
API_VERSION = 'v3'
import os
import pickle
import google.oauth2.credentials
import csv
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import google.oauth2.credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google_auth_oauthlib.flow import InstalledAppFlow
def get_authenticated_service():
credentials = None
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
credentials = pickle.load(token)
# Check if the credentials are invalid or do not exist
if not credentials or not credentials.valid:
# Check if the credentials have expired
if credentials and credentials.expired and credentials.refresh_token:
credentials.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
CLIENT_SECRETS_FILE, SCOPES)
credentials = flow.run_console()
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(credentials, token)
return build(API_SERVICE_NAME, API_VERSION, credentials = credentials)
def write_to_csv(comments):
with open('comments.csv', 'w') as comments_file:
comments_writer = csv.writer(comments_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
comments_writer.writerow(['Video ID', 'Title', 'Comment'])
for row in comments:
comments_writer.writerow(list(row))
def get_videos(service, **kwargs):
final_results = []
results = service.search().list(**kwargs).execute()
i = 0
max_pages = 3
while results and i < max_pages:
final_results.extend(results['items'])
# Check if another page exists
if 'nextPageToken' in results:
kwargs['pageToken'] = results['nextPageToken']
results = service.search().list(**kwargs).execute()
i += 1
else:
break
return final_results
def get_video_comments(service, **kwargs):
comments = []
results = service.commentThreads().list(**kwargs).execute()
while results:
for item in results['items']:
comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
comments.append(comment)
if 'nextPageToken' in results:
kwargs['pageToken'] = results['nextPageToken']
results = service.commentThreads().list(**kwargs).execute()
else:
break
return comments
def search_videos_by_keyword(service, **kwargs):
results = get_videos(service, **kwargs)
final_result = []
for item in results:
title = item['snippet']['title']
video_id = item['id']['videoId']
comments = get_video_comments(service, part='snippet', videoId=video_id, textFormat='plainText')
final_result.extend([(video_id, title, comment) for comment in comments])
write_to_csv(final_result)
keyword = input('Enter a keyword: ')
search_videos_by_keyword(service, q=keyword, part='id,snippet', eventType='completed', type='video')
if __name__ == '__main__':
# When running locally, disable OAuthlib's HTTPs verification. When
# running in production *do not* leave this option enabled.
os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
service = get_authenticated_service()
这是代码;你能建议改变吗?
解决方案
推荐阅读
- android - 根据用户意愿更改背景图像
- php - 无法使用 PHP 创建文件夹
- c# - 从设计器中创建的 DataSet 填充 DataGridView
- python - 子串搜索从随机二进制序列中获得近似匹配
- android - 适用于 Android 的动作电缆客户端
- selenium-webdriver - 如何检查硒中切换按钮的状态(开或关)?
- flutter - 强制容器内的小部件将自身限制为一行
- javascript - 使用按钮分别遍历arraylist和显示元素
- powershell - 通过电子邮件确认将内容从一个外部存储位置移动到网络
- javascript - 如何在threejs中使用buffergeometry在两点之间绘制一条动态线