首页 > 解决方案 > 如何将大 JSON 文件导入 MYSQL

问题描述

我必须从给定的 JSON 文件(50 MB 大小)读取数据并将其导入 MYSQL。我尝试使用 Python 并失败(出现错误)。我不知道如何以最有效的方式将 JSON 文件导入 MYSQL。请帮帮我。

到目前为止我所做的是:

import os
import json
import MySQLdb

# read JSON file which is in the next parent folder
file = 'vehicle.json'
json_data = open(file).read()
json_obj = json.loads(json_data)


#print(json_obj)

# do validation and checks before insert
def validate_string(val):
    if val is not None:
        if type(val) is int:
            # for x in val:
            #    print(x)
            return str(val).encode('utf-8')
        else:
            return val


# connect to MySQL
con = MySQLdb.connect(host="127.0.0.1", user="root", passwd="PASSWORD", db="vehicle", port=3306)
cursor = con.cursor()

cursor.execute("DROP TABLE IF EXISTS vehicle")
sql1 = """ 
CREATE TABLE vehicle (
    CarYear int(4) ,
    make varchar(15),
    model varchar(30),
    cylinders varchar(2),
    VClass varchar(30),
    drive varchar(30),
    trany  varchar(30),
    displ varchar(20),
    eng_dscr varchar(20),
    trans_dscr varchar(30),
    mpgData varchar(5),
    evMotor varchar(10),
    youSaveSpend varchar(20),
    fuleType varchar(20),
    fuleType1 varchar(30),
    barrelsA08 varchar(2),
    charge120 varchar(2),
    charge240 varchar(2),
    city08 varchar(4),
    city08U varchar(2),
    cityA08 varchar(2),
    cityA08U varchar(2),
    cityCD varchar(2),
    cityE varchar(2),
    cityUF varchar(2),
    co2 varchar(2),
    coA2 varchar(2),
    co2TailpipeAGpm varchar(2),
    co2TailpipeGpm varchar(20),
    comb08 varchar(3),
    comb08U varchar(2),
    combA08 varchar(2),
    combA08U varchar(2),
    combE varchar(2),
    combinedCD varchar(2),
    combinedUF varchar(2),
    engld varchar(8),
    feScore varchar(3),
    fuelCost08 varchar(5),
    fuelCostA08 varchar(2),
    ghgScore varchar(2),
    ghgScoreA varchar(2),
    highway08 varchar(4),
    highway08U varchar(2),
    highwayA08 varchar(2),
    highwayA08U varchar(2),
    highwayCD varchar(2),
    highwayE varchar(2),
    highwayUF varchar(2),
    hlv varchar(2),
    hpv varchar(2),
    id varchar(8),
    lv2 varchar(2),
    lv4 varchar(2),
    phevBlended varchar(10),
    pv2 varchar(5),
    pv4 varchar(5),
    CarRange varchar(5),
    rangeCity varchar(5),
    rangeCityA varchar(5),
    rangeHwy varchar(5),
    rangeHwyA varchar(5),
    UCity varchar(5),
    UCityA varchar(5),
    UHighway varchar(5),
    UHighwayA varchar(5),
    guzzler varchar(10),
    tCharger varchar(10),
    sCharger varchar(10),
    atvType varchar(10),
    fuelType2 varchar(10),
    rangeA varchar(10),
    mfrCode varchar(10),
    c240Dscr varchar(10),
    charge240b varchar(5),
    c240bDscr varchar(10),
    createdOn varchar(30),
    modifiedOn varchar(30),
    startStop varchar(10),
    phevCity varchar(5),
    phevHwy varchar(5),
    phevComb varchar(5)   
)"""

cursor.execute(sql1)

# parse json data to SQL insert
for i, item in enumerate(json_obj):
    CarYear = validate_string(item.get("year", None))
    make = validate_string(item.get("make", None))
    model = validate_string(item.get("model", None))
    cylinders = validate_string(item.get("cylinders", None))
    VClass = validate_string(item.get("VClass", None))
    drive = validate_string(item.get("drive", None))
    trany = validate_string(item.get("trany", None))
    displ = validate_string(item.get("displ", None))
    eng_dscr = validate_string(item.get("eng_dscr", None))
    trans_dscr = validate_string(item.get("trans_dscr", None))
    mpgData = validate_string(item.get("mpgData", None))
    evMotor = validate_string(item.get("evMotor", None))
    youSaveSpend = validate_string(item.get("youSaveSpend", None))
    fuleType = validate_string(item.get("fuleType", None))
    fuleType1 = validate_string(item.get("fuleType1", None))
    barrelsA08 = validate_string(item.get("barrelsA08", None))
    charge120 = validate_string(item.get("charge120", None))
    charge240 = validate_string(item.get("charge240", None))
    city08 = validate_string(item.get("city08", None))
    city08U = validate_string(item.get("city08U", None))
    cityA08 = validate_string(item.get("cityA08", None))
    cityA08U = validate_string(item.get("cityA08U", None))
    cityCD = validate_string(item.get("cityCD", None))
    cityE = validate_string(item.get("cityE", None))
    cityUF = validate_string(item.get("cityUF", None))
    co2 = validate_string(item.get("co2", None))
    coA2 = validate_string(item.get("coA2", None))
    co2TailpipeAGpm = validate_string(item.get("co2TailpipeAGpm", None))
    co2TailpipeGpm = validate_string(item.get("co2TailpipeGpm", None))
    comb08 = validate_string(item.get("comb08", None))
    comb08U = validate_string(item.get("comb08U", None))
    combA08 = validate_string(item.get("combA08", None))
    combA08U = validate_string(item.get("combA08U", None))
    combE = validate_string(item.get("combE", None))
    combinedCD = validate_string(item.get("combinedCD", None))
    combinedUF = validate_string(item.get("combinedUF", None))
    engld = validate_string(item.get("engld", None))
    feScore = validate_string(item.get("feScore", None))
    fuelCost08 = validate_string(item.get("fuelCost08", None))
    fuelCostA08 = validate_string(item.get("fuelCostA08", None))
    ghgScore = validate_string(item.get("ghgScore", None))
    ghgScoreA = validate_string(item.get("ghgScoreA", None))
    highway08 = validate_string(item.get("highway08", None))
    highway08U = validate_string(item.get("highway08U", None))
    highwayA08 = validate_string(item.get("highwayA08", None))
    highwayA08U = validate_string(item.get("highwayA08U", None))
    highwayCD = validate_string(item.get("highwayCD", None))
    highwayE = validate_string(item.get("highwayE", None))
    highwayUF = validate_string(item.get("highwayUF", None))
    hlv = validate_string(item.get("hlv", None))
    hpv = validate_string(item.get("hpv", None))
    lv2 = validate_string(item.get("lv2", None))
    lv4 = validate_string(item.get("lv4", None))
    phevBlended = validate_string(item.get("phevBlended", None))
    pv2 = validate_string(item.get("pv2", None))
    pv4 = validate_string(item.get("pv4", None))
    CarRange = validate_string(item.get("CarRange", None))
    rangeCity = validate_string(item.get("rangeCity", None))
    rangeCityA = validate_string(item.get("rangeCityA", None))
    rangeHwy = validate_string(item.get("rangeHwy", None))
    rangeHwyA = validate_string(item.get("rangeHwyA", None))
    UCity = validate_string(item.get("UCity", None))
    UCityA = validate_string(item.get("UCityA", None))
    UHighway = validate_string(item.get("UHighway", None))
    UHighwayA = validate_string(item.get("UHighwayA", None))
    guzzler = validate_string(item.get("guzzler", None))
    tCharger = validate_string(item.get("tCharger", None))
    sCharger = validate_string(item.get("sCharger", None))
    atvType = validate_string(item.get("atvType", None))
    fuelType2 = validate_string(item.get("fuelType2", None))
    rangeA = validate_string(item.get("rangeA", None))
    mfrCode = validate_string(item.get("mfrCode", None))
    c240Dscr = validate_string(item.get("c240Dscr", None))
    charge240b = validate_string(item.get("charge240b", None))
    c240bDscr = validate_string(item.get("c240bDscr", None))
    createdOn = validate_string(item.get("createdOn", None))
    modifiedOn = validate_string(item.get("modifiedOn", None))
    startStop = validate_string(item.get("startStop", None))
    phevCity = validate_string(item.get("phevCity", None))
    phevHwy = validate_string(item.get("phevHwy", None))
    phevComb = validate_string(item.get("phevComb", None))

    sql2 = """INSERT INTO vehicle (CarYear,make,model,cylinders,VClass,drive,trany,displ,eng_dscr,trans_dscr,mpgData,evMotor,
    youSaveSpend,fuleType,fuleType1,barrelsA08,charge120,charge240,city08,city08U,cityA08,cityA08U,cityCD,cityE,cityUF,co2,coA2,co2TailpipeAGpm,
    co2TailpipeGpm,comb08,comb08U,combA08,combA08U,combE,combinedCD,combinedUF,engld,feScore,fuelCost08,fuelCostA08,ghgScore,ghgScoreA,highway08,
    highway08U,highwayA08,highwayA08U,highwayCD,highwayE,highwayUF,hlv,hpv,id,lv2,lv4,phevBlended,pv2,pv4,CarRange,rangeCity,rangeCityA,rangeHwy,
    rangeHwyA,UCity,UCityA,UHighway,UHighwayA,guzzler,tCharger,sCharger,atvType,fuelType2,rangeA,mfrCode,c240Dscr,charge240b,c240bDscr,createdOn,
    modifiedOn,startStop,phevCity,phevHwy,phevComb)
    VALUES (%d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s), (CarYear,make,model,cylinders,VClass,drive,trany,displ,eng_dscr,trans_dscr,mpgData,evMotor,
youSaveSpend,fuleType,fuleType1,barrelsA08,charge120,charge240,city08,city08U,cityA08,cityA08U,cityCD,cityE,cityUF,co2,coA2,co2TailpipeAGpm,
co2TailpipeGpm,comb08,comb08U,combA08,combA08U,combE,combinedCD,combinedUF,engld,feScore,fuelCost08,fuelCostA08,ghgScore,ghgScoreA,highway08,
highway08U,highwayA08,highwayA08U,highwayCD,highwayE,highwayUF,hlv,hpv,id,lv2,lv4,phevBlended,pv2,pv4,CarRange,rangeCity,rangeCityA,rangeHwy,
rangeHwyA,UCity,UCityA,UHighway,UHighwayA,guzzler,tCharger,sCharger,atvType,fuelType2,rangeA,mfrCode,c240Dscr,charge240b,c240bDscr,createdOn,
modifiedOn,startStop,phevCity,phevHwy,phevComb)"""

    cursor.execute(sql2)
    con.commit()
    # print(CarYear)

con.close()

我收到了这个错误:

回溯(最后一次调用):文件“C:/Users/risha/PycharmProjects/Test/JSON_TEST.py”,第 209 行,在 cursor.execute(sql2) 文件“C:\Users\risha\AppData\Local\Programs \Python\Python36\lib\site-packages\MySQLdb\cursors.py”,第 250 行,在执行 self.errorhandler(self, exc, value) 文件“C:\Users\risha\AppData\Local\Programs\Python\ Python36\lib\site-packages\MySQLdb\connections.py”,第 50 行,在默认错误处理程序中引发错误值文件“C:\Users\risha\AppData\Local\Programs\Python\Python36\lib\site-packages\MySQLdb\cursors .py",第 247 行,在执行 res = self._query(query) 文件 "C:\Users\risha\AppData\Local\Programs\Python\Python36\lib\site-packages\MySQLdb\cursors.py",行411,在_query rowcount = self._do_query(q) 文件“C:\Users\risha\AppData\Local\Programs\Python\Python36\lib\site-packages\MySQLdb\cursors.py”,第 374 行,在 _do_query db.query(q) 文件“C:\Users\risha\AppData\ Local\Programs\Python\Python36\lib\site-packages\MySQLdb\connections.py”,第 277 行,查询 _mysql.connection.query(self, query) _mysql_exceptions.ProgrammingError: (1064, “You have an error in your SQL 语法;查看与您的 MySQL 服务器版本相对应的手册,了解在 '%d,%s,%s,%s,%s,%s,%s,%s,%s,%s 附近使用的正确语法,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,% s' 在第 7 行")第 277 行,在查询 _mysql.connection.query(self, query) _mysql_exceptions.ProgrammingError: (1064, "您的 SQL 语法中有错误;请查看与您的 MySQL 服务器版本相对应的手册,以了解在 ' 附近使用的正确语法%d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s ,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s' 在第 7 行")第 277 行,在查询 _mysql.connection.query(self, query) _mysql_exceptions.ProgrammingError: (1064, "您的 SQL 语法中有错误;请查看与您的 MySQL 服务器版本相对应的手册,以了解在 ' 附近使用的正确语法%d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s ,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s' 在第 7 行")

进程以退出代码 1 结束

您也可以建议任何其他方法。我还为此创建了数据库,并且通过硬编码它工作正常,但这里显示了上述错误。

任何帮助都是不言而喻的。

标签: pythonmysqljsonpython-3.x

解决方案


推荐阅读