python - 如何打印文本文件的某些部分?Python
问题描述
import os
import re
from collections import Counter
from collections import OrderedDict
fileNames = []
textInfo = []
fileDict = {}
currentDirectoryPath = os.getcwd()
print(currentDirectoryPath)
regexp = re.compile(
r'(?P<clientIP>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).+\['
+ '(?P<timestamp>\d{2}/[A-Z][a-z]{2}/\d\d\d\d).+\"'
+ '(?P<action>[A-Z]{3,4}).+\"'
+ '\s*(?P<statuscode>[1-5][0-9][0-9])'
)
os.chdir("/content/drive/log")
currentDirectoryPath = os.getcwd()
listOfFileNames = os.listdir(currentDirectoryPath)
#for files in listOfFileNames :
#print(files)
f = open('access_1.log', 'r')
matched = 0
failed = 0
cnt_clientIPs = Counter()
cnt_clientAction = Counter()
cnt_clientTimeStamp = Counter()
cnt_clientStatusCode = Counter()
for line in f:
m = re.match(regexp, line)
if m:
cnt_clientIPs.update([m.group('clientIP')])
cnt_clientAction.update([m.group('action')])
cnt_clientStatusCode.update([m.group('statuscode')])
matched += 1
else:
failed += 1
continue
print("""""\
client .........: %s
timestamp ......: %s
action .........: %s
statuscode.........: %s
""" % ( m.group('clientIP'),
m.group('timestamp'),
m.group('action'),
m.group('statuscode'),
))
userInputIP = input("Enter how many of the top clients you want to see. ")
print('[*] %d lines matched the regular expression' % (matched))
print('[*] %d lines failed to match the regular expression' % (failed), end='\n\n')
print('[*] ============================================')
print('[*] '+ userInputIP +' Most Frequently Occurring Clients Queried')
print('[*] ============================================')
for clientIP, count in cnt_clientIPs.most_common(int(userInputIP)):
print('[*] %30s: %d' % (clientIP, count))
print('[*] ============================================')
userInput=input("Would you like to see all clients from a certian date? (Yes or no)")
if userInput.lower() == "yes":
startdateInput = input("What date would you like to start at? ")
enddateInput= input("What date would you like to end at? ")
date_start = datetime.strptime(startdateInput, '%d/%b/%Y')
date_end = datetime.strptime(enddateInput, '%d/%b/%Y')
print('[*] ============================================')
print('[*] '+ userInputIP +' Most Frequently Occurring Clients Queried')
print('[*] ============================================')
for clientIP, count in f:
log_date = l.split()[3].lstrip("[").split(":")[0]
log_date = datetime.strptime(log_date, '%d/%b/%Y')
if date_start <= log_date <= date_end:
print('[*] %30s: %d' % (clientIP, count))
else:
print('[*] ============================================')
print('[*] '+ userInputIP +' Most Frequently Occurring Clients Queried')
print('[*] ============================================')
for clientIP, count in cnt_clientIPs.most_common(int(userInputIP)):
print('[*] %30s: %d' % (clientIP, count))
print('[*] ============================================')
Enter how many of the top clients you want to see. 10
[*] 49997 lines matched the regular expression
[*] 3 lines failed to match the regular expression
[*] ============================================
[*] 10 Most Frequently Occurring Clients Queried
[*] ============================================
[*] 205.167.170.15: 15695
[*] 79.142.95.122: 3207
[*] 52.22.118.215: 734
[*] 84.112.161.41: 712
[*] 37.1.206.196: 371
[*] 91.200.12.22: 287
[*] 178.191.155.244: 284
[*] 198.50.160.104: 249
[*] 84.115.10.14: 234
[*] 93.83.250.186: 219
[*] ============================================
Would you like to see all clients from all time or a certian date? (Yes or no)yes
What date would you like to start at? 05/feb/2016
What date would you like to end at? 10/feb/2016
[*] ============================================
[*] 10 Most Frequently Occurring Clients Queried
[*] ============================================
[*] ============================================
我正在读取一个文件,我想打印出用户输入的一部分。因为我试图从开始日期到结束日期。所有都在文件中给出。我想打印该行的一部分。这两个日期之间的 ClientIP。
userInput=input("Would you like to see all clients from a certian date? (Yes or no)")
if userInput.lower() == "yes":
startdateInput = input("What date would you like to start at? ")
enddateInput= input("What date would you like to end at? ")
date_start = datetime.strptime(startdateInput, '%d/%b/%Y')
date_end = datetime.strptime(enddateInput, '%d/%b/%Y')
print('[*] ============================================')
print('[*] '+ userInputIP +' Most Frequently Occurring Clients Queried')
print('[*] ============================================')
for clientIP, count in f:
log_date = l.split()[3].lstrip("[").split(":")[0]
log_date = datetime.strptime(log_date, '%d/%b/%Y')
if date_start <= log_date <= date_end:
print('[*] %30s: %d' % (clientIP, count))
如您所见,我可以随时打印它们。因为它是文件中的所有内容。虽然我只是希望它获取文件的某个部分以打印出日期。下面文件中的文本示例。正如我想要的那样,从 2016 年 1 月 27 日到 2016 年 2 月 10 日。所以在我下面给出的示例中,它将打印这 4 行客户端 IP。188.23.144.118 192.164.248.191 192.164.248.191 205.167.170.15
80.110.186.51 - - [21/Dec/2015:17:20:12 +0100] "GET /images/stories/raith/oststeiermark.png HTTP/1.1" 200 65225 "http://www.almhuette-raith.at/" "Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1" "-"
188.23.144.118 - - [27/Jan/2016:20:50:42 +0100] "GET /images/phocagallery/Ferienwohnung_2/thumbs/phoca_thumb_m_2_wohnkche.jpg HTTP/1.1" 200 3141 "http://www.almhuette-raith.at/index.php?option=com_phocagallery&view=category&id=4:ferienwohnung2&Itemid=53" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586" "-"
192.164.248.191 - - [05/Feb/2016:23:53:03 +0100] "GET /images/phocagallery/almhuette/thumbs/phoca_thumb_m_jaegerzaun_gr.jpg HTTP/1.1" 200 5649 "http://www.almhuette-raith.at/index.php?option=com_phocagallery&view=category&id=1&Itemid=53" "Mozilla/5.0 (Linux; Android 5.0.2; SAMSUNG SM-T530 Build/LRX22G) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/3.2 Chrome/38.0.2125.102 Safari/537.36" "-"
192.164.248.191 - - [05/Feb/2016:23:53:03 +0100] "GET /components/com_phocagallery/assets/js/shadowbox/src/skin/classic/skin.css HTTP/1.1" 200 5236 "http://www.almhuette-raith.at/index.php?option=com_phocagallery&view=category&id=1&Itemid=53" "Mozilla/5.0 (Linux; Android 5.0.2; SAMSUNG SM-T530 Build/LRX22G) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/3.2 Chrome/38.0.2125.102 Safari/537.36" "-"
205.167.170.15 - - [10/Feb/2016:17:40:23 +0100] "GET /images/phocagallery/thumbs/phoca_thumb_l_winterfoto%209.jpg HTTP/1.1" 200 56144 "-" "Go-http-client/1.1" "-"
194.44.123.118 - - [18/Feb/2016:07:34:54 +0100] "GET /administrator/ HTTP/1.1" 200 4263 "-" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36" "-"
解决方案
您可以使用:
from datetime import datetime
date_start = datetime.strptime("27/jan/2016", '%d/%b/%Y')
date_end = datetime.strptime("10/feb/2016", '%d/%b/%Y')
with open("access.log") as f:
for l in f:
log_date = l.split()[3].lstrip("[").split(":")[0]
log_date = datetime.strptime(log_date, '%d/%b/%Y')
if date_start <= log_date <= date_end:
print(l)
推荐阅读
- ios - 动画多个 UIViews alpha
- timestamp - 在 AutoHotkey 中获取毫秒
- java - JavaFX:TableView Cell 像素化 ImageView - 如何撤消转换?
- c - C:我是不是传错了字符串?
- algorithm - 2个序列之间的最佳映射
- python-2.7 - Issue with duplicate values generated when using factory_boy and randint
- sql - oracle sql中星号的使用
- ace-editor - Ace Editor (1.4.3) 在按下 [enter] 时在错误的列上断线
- python-3.x - 发送带有内嵌图像的电子邮件 Flask-Mail?
- python - 使用 Pandas 返回单日总和