首页 > 技术文章 > pythn抓取网页小例子

wumac 2016-09-01 08:33 原文

import urllib.request
import re
from tkinter import *
win = Tk()
win.geometry('500x300+400+300')

t = Text(win)
t.pack()


url = 'http://stock.sohu.com/news/'

html = urllib.request.urlopen(url).read()

html = html.decode('GBK')



pattern = re.compile("<a test=a href='http://stock.sohu.com/(.*?)/(.*?).shtml' target='_blank'>(.*?)</a>",re.S)
items = re.findall(pattern,html)
for item in items:
    t.insert(END,item[2])
    t.insert(END,'\n')
    

 

推荐阅读