首页 > 技术文章 > 如何实时监控库存变动

turingbrain 2015-12-23 09:01 原文

 1 import urllib
 2 import os
 3 from bs4 import BeautifulSoup
 4 import difflib
 5 import time
 6 
 7 while(1):
 8     if os.path.exists('d:/new.txt') and os.path.exists('d:/old.txt'):
 9        os.remove('d:/old.txt')
10     if os.path.exists('d:/new.txt'):
11        os.rename('d:/new.txt','d:/old.txt')
12     url = 'http://www.zhenxin520.com/trends.asp?id=95'
13     response = urllib.urlopen(url)
14     soup =BeautifulSoup(response)
15     text = soup.find_all("td", attrs={"class": "text3"})
16     f1=open('d:/new.txt','w+')
17     f1.write(str(text))
18     f1.close()
19     if not os.path.exists('d:/old.txt'):
20        continue
21     f1=open('d:/new.txt','r')
22     f2=open('d:/old.txt','r')
23     f3=open('d:/diff.txt','w+')
24     d=difflib.Differ()
25     f1_lines=f1.readlines()
26     f2_lines=f2.readlines()
27     diff=list(d.compare(f1_lines,f2_lines))
28     diff2=list(d.compare(f2_lines,f1_lines))
29     i = -1
30     for line in diff:
31       i+=1
32       line2 = diff2[i]
33       if line[0]=='-':
34          try:
35             start = line.index('D')
36             start2 = line2.index('D')
37             substr = line[start:]
38             substr2 = line2[start2:]
39             rmstr = ['<span>','</span>','<br/>','</p>']
40             for j in rmstr:
41               substr = substr.replace(j,'')
42               substr2 = substr2.replace(j,'')
43             f3.write('new---'+substr+'\nold---'+substr2+'\n')
44          except:
45             continue
46     f3.close()
47     f2.close()
48     f1.close()
49     time.sleep(600)

differ.txt输出结果:

new---D012红色:男L*7 XL*3 XXL*22 女M*2 童4*7 

old---D012红色:男L*7 XL*4 XXL*22 女 S M L 童4*9 

推荐阅读