首页 > 解决方案 > TypeError: list indices must be integers or slices, not str 为什么列表索引不是数字

问题描述

import requests
from bs4 import BeautifulSoup
import urllib.request
import re

with open('crawlingweb.csv')as f:
    content=f.readlines()
    content=[x.strip() for x in content]

for i in content:
    content[i].replace('[', '').replace(']', '')
    req = requests.get(content[i])
    html = req.text
    data = re.sub('[^0-9a-zA-Z\\s\\.\\,]', '', string=html).lower()
    data = re.sub('<[^>]*>', '', string=html)
    data = re.sub('[^ ㄱ-ㅣ가-힣]+', '', string=html)
    print(data)

content[i].replace('[', '').replace(']', '') 这一行是错误的我想要的是重复这段代码并爬取内容中的整个地址一次并打印出来。

import requests
from bs4 import BeautifulSoup
import urllib.request
import re

with open('crawlingweb.csv')as f:
    content=f.readlines()
    content=[x.strip() for x in content]
    content[183].replace('[','').replace(']','')

req = requests.get(content[183])
html = req.text

data = re.sub('[^0-9a-zA-Z\\s\\.\\,]', '', string=html).lower()
data = re.sub('<[^>]*>','',string=html)
data = re.sub('[^ ㄱ-ㅣ가-힣]+','',string=html)
print(data)

此代码有效

谢谢你的阅读

标签: python-3.xweb-crawler

解决方案


因为

with open('crawlingweb.csv')as f:               # this is a file
    content=f.readlines()                       # this is a list of strings
    content=[x.strip() for x in content]        # this is still a list of strings

for i in content:                               # i is a string from your list

你想要的是:

for index,line  in enumerate(content):
    # to replace the modified contentitem in content:
    content[index] = line.replace('[', '').replace(']', '')
    req = requests.get(content[index])

    # or - to simply fix 'line' before doing your request:
    line = line.replace('[', '').replace(']', '')
    req = requests.get(line)

或者像这样进一步做:

with open('crawlingweb.csv')as f:               
    content=f.readlines()                       
    content=[x.strip().replace('[', '').replace(']', '') for x in content]

推荐阅读