首页 > 解决方案 > 我的代码失败并显示错误消息 json_line = json.loads(line) 和 cert_check(TOPCERTPATH, TOPCSVPATH)

问题描述

我有一个 python 脚本,它应该接受一些输入并将输出作为 csv 文件生成。任何帮助将不胜感激!此代码应该读取已处理的 Censys X.509 证书数据和 VirusTotal 查询的 JSON 结果。然后它应该获取颁发者通用名称、主题域名的输出 CSV,以及关于是否在 VT 上为域找到任何积极结果的真假。

我按照@Joel 的建议注释掉了 try 和 pass 语句。这是完整的追溯

Traceback (most recent call last):
  File "C:/Users/bwerner/Documents/reporter1.py", line 126, in <module>
    cert_check(TOPCERTPATH, TOPCSVPATH)
  File "C:/Users/bwerner/Documents/reporter1.py", line 112, in cert_check
    json_line = json.loads(line)
  File "C:\Users\bwerner\AppData\Local\Continuum\anaconda3\Lib\json\__init__.py", line 354, in loads
    return _default_decoder.decode(s)
  File "C:\Users\bwerner\AppData\Local\Continuum\anaconda3\Lib\json\decoder.py", line 339, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "C:\Users\bwerner\AppData\Local\Continuum\anaconda3\Lib\json\decoder.py", line 357, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

import csv
import json
import pprint
import sys


TOPCERTPATH = 'TopScoringCERTS.txt'
BOTTOMCERTPATH = 'BottomScoringCERTS.txt'
TOPCSVPATH = 'TopScoringResults.csv'
BOTTOMCSVPATH = 'BottomScoringResults.csv'
VTOUTPUTPATH = './output/'
VTOUTPUTEXT = '.txt'


pp = pprint.PrettyPrinter(indent=4)


# Check files from VirusTotal queries for any positive results
# Result is false unless any nonzero positive result is true
def vt_result_check(vt_result_path):
    vt_result = None
    #try:
    vt_result = False
    with open(vt_result_path) as vt_result_file:
            vt_data = json.load(vt_result_file)

            # Look for any positive detected referrer samples
            #try:
            for sample in (vt_data['detected_referrer_samples']):
                    if (sample['positives'] > 0):
                        vt_result = True
           # except:
              #  pass

            # Look for any positive detected communicating samples
            #try:
            for sample in (vt_data['detected_communicating_samples']):
                    if (sample['positives'] > 0):
                        vt_result = True
            #except:
               # pass

            # Look for any positive detected downloaded samples
            #try:
            for sample in (vt_data['detected_downloaded_samples']):
                    if (sample['positives'] > 0):
                        vt_result = True
            #except:
                #pass

            # Look for any positive detected URLs
            #try:
            for sample in (vt_data['detected_urls']):
                    if (sample['positives'] > 0):
                        vt_result = True
            #except:
               # pass

            # Look for a Dr. Web category of known infection source
            #try:
            if (vt_data['Dr.Web category'] == "known infection source"):
                    vt_result = True
            #except:
                #pass

            # Look for a Forecepoint ThreatSeeker category of elevated exposure
            #try:
            if (vt_data['Forcepoint ThreatSeeker category'] == "elevated exposure"):
                    vt_result = True
            #except:
                #pass

            # Look for a Forecepoint ThreatSeeker category of phishing and other frauds
            #try:
            if (vt_data['Forcepoint ThreatSeeker category'] == "phishing and other frauds"):
                    vt_result = True
            #except:
               # pass

            # Look for a Forecepoint ThreatSeeker category of suspicious content
            #try:
            if (vt_data['Forcepoint ThreatSeeker category'] == "suspicious content"):
                    vt_result = True
            #except:
                #pass

            #pp.pprint(vt_data)
    #except:
        #pass
            return vt_result


# Read the processed Censys data and outputs results for the Issuer and Subject
def cert_check(certpath, csvpath):
    with open(csvpath, 'w') as csvfile:
        fieldnames = ['issuer', 'subject_dom', 'vt_result']
        writer = csv.writer(csvfile)
        writer.writerow(['Issuer Common Name', 'Subject Common Name', 'VirusTotal Results'])
        with open(certpath) as certfile:
            for line in certfile:
                issuer_cn = None
                subject_cn = None
                subject_dom = None
                #try:
                json_line = json.loads(line)
                issuer_cn = json_line['parsed']['issuer']['common_name'][0]
                subject_cn = json_line['parsed']['subject']['common_name'][0]
                subject_dom = subject_cn.encode('utf8')
                subject_dom = subject_dom.replace('*.','')
                vt_result_path = VTOUTPUTPATH + subject_dom + VTOUTPUTEXT
                vt_result = vt_result_check(vt_result_path)
                writer.writerow([issuer_cn, subject_dom, vt_result])
                    #pp.pprint(json_line['parsed']['subject']['common_name'][0])
                    #print(issuer_cn, subject_dom,vt_result)
                #except:
                    #pass


cert_check(TOPCERTPATH, TOPCSVPATH)
cert_check(BOTTOMCERTPATH, BOTTOMCSVPATH)

标签: pythonpython-3.xpython-3.6

解决方案


好吧,例外是 JSON 解码错误,所以我怀疑您没有正确解析文件内容。

假设您的文件内容是这样的

Not json
Not json
Still not json because beginning of line {"valid": "json"}
{ "response_code": 0, "verbose_msg": "Domain not found" }

您可以扩充您当前的功能以提供更多数据

def cert_check(certpath, csvpath):
    with open(csvpath, 'r') as csvfile:
        # fieldnames = ['issuer', 'subject_dom', 'vt_result']
        # writer = csv.writer(csvfile)
        # writer.writerow(['Issuer Common Name', 'Subject Common Name', 'VirusTotal Results'])
        with open(certpath) as certfile:
            for line in certfile.readlines():
                try:
                    json_line = json.loads(line)
                    print("line is JSON!", line)
                    pprint(json_line)
                    print("Breaking loop now")
                    break
                except:
                    print("Line is NOT JSON ", line)
                    # print(traceback.format_exc())
            else:
                print("File contains no JSON!")

if __name__ == "__main__":
    with open("test_data", "w") as test_file:
        test_file.write("Not JSON \n")
        test_file.write("Not JSON \n")
        test_file.write('Not JSON even though there is json later { "response_code": 0, "verbose_msg": "Domain not found" } \n')
        test_file.write('{ "response_code": 0, "verbose_msg": "Domain not found" }')

    cert_check("test_data", "test_data")

输出

Line is NOT JSON  Not JSON 

Line is NOT JSON  Not JSON 

Line is NOT JSON  Not JSON even though there is json later { "response_code": 0, "verbose_msg": "Domain not found" } 

line is JSON! { "response_code": 0, "verbose_msg": "Domain not found" }
{'response_code': 0, 'verbose_msg': 'Domain not found'}
Breaking loop now

推荐阅读