首页 > 技术文章 > python字典生成嵌套文件夹

zhujunzoe 2018-04-24 12:36 原文

假定有这么个场景,爬虫爬取的数据以json格式保存,即像python字典的字符串。

 1 # -*- coding=utf-8 -*-
 2 import json
 3 import re
 4 import os
 5 
 6 # 包含多个字典的列表
 7 movie_dict_list = [
 8     {"movie_rating": '8.6', "movie_column": '剧情/冒险/西部', "movie_title": "被解救的姜戈", "movie_director":'Quentin'},
 9     {"movie_rating": '8.4', "movie_column": '剧情/犯罪', "movie_title": "无耻混蛋", "movie_director":'Quentin'},
10     {"movie_rating": '8.8', "movie_column": '剧情/喜剧/犯罪', "movie_title": "低俗小说","movie_director":'Quentin'},
11     {"movie_rating": '9.1', "movie_column": '剧情/动作/科幻/犯罪/惊悚', "movie_title": "蝙蝠侠:黑暗骑士", "movie_director":'Nolan'},
12     {"movie_rating": '8.5', "movie_column": '剧情/悬疑/惊悚/犯罪', "movie_title": "记忆碎片","movie_director":'Nolan'},
13     {"movie_rating": '8.8', "movie_column": '剧情/悬疑/惊悚', "movie_title": "致命魔术","movie_director":'Nolan'}]
14 
15 # 将字典写入json格式的文件
16 with open('F:/movie.json','wb') as file:
17     for movie_dict in movie_dict_list:
18         file.write(json.dumps(movie_dict,ensure_ascii=False)  + '\n')
19 
20 # movie.json
21 '''{
22     "movie_rating": '8.6',
23     "movie_director": "Quentin",
24     "movie_title": "被解救的姜戈",
25     "movie_column": "剧情/冒险/西部"
26 } {
27     "movie_rating": '8.4',
28     "movie_director": "Quentin",
29     "movie_title": "无耻混蛋",
30     "movie_column": "剧情/犯罪"
31 } {
32     "movie_rating": '8.8',
33     "movie_director": "Quentin",
34     "movie_title": "低俗小说",
35     "movie_column": "剧情/喜剧/犯罪"
36 } {
37     "movie_rating": '9.1',
38     "movie_director": "Nolan",
39     "movie_title": "蝙蝠侠:黑暗骑士",
40     "movie_column": "剧情/动作/科幻/犯罪/惊悚"
41 } {
42     "movie_rating": '8.5',
43     "movie_director": "Nolan",
44     "movie_title": "记忆碎片",
45     "movie_column": "剧情/悬疑/惊悚/犯罪"
46 } {
47     "movie_rating": '8.8',
48     "movie_director": "Nolan",
49     "movie_title": "致命魔术",
50     "movie_column": "剧情/悬疑/惊悚"
51 }'''
52 
53 
54 
55 # 将json中的unicode字符串转换为str,(递归的把list和dict里的Unicode对象encode成str。)
56 # def byteify(input):
57 #     if isinstance(input, dict):
58 #         return {byteify(key):byteify(value) for key,value in input.iteritems()}
59 #     elif isinstance(input, list):
60 #         return [byteify(element) for element in input]
61 #     elif isinstance(input, unicode):
62 #         return input.encode('utf-8')
63 #     else:
64 #         return input
65 
66 
67 # 将json文件转为python字典,逐行读取,应对大文件
68 f = open('F:/movie.json','rb')
69 movie_dict_list = []
70 for line in f:
71     movie_a_line = json.loads(line)
72 
73     # 题外话,假如上面将字典写入json文件时未添加禁用ASCII码 ,那么json文件将以unicode字符串保存,
74     # 需要调用byteify函数,将json文件中的unicode字符串转换为python的str
75     # movie_a_line  = byteify(json.loads(line))
76     
77     movie_dict_list.append(movie_a_line)
78 f.close()
79 
80 # 提取字典中的某一个键作为文件名
81 for movie_dict in movie_dict_list:
82     folder = 'F:/test_dict_to_folder/' + movie_dict.get('movie_director')
83     if not os.path.exists(folder):
84         os.mkdir(folder)
85     # 正则用于剔除windows文件名中的非法字符
86     file = open(folder +  '/' + re.sub(r'[?\\*|“<>:/]','', movie_dict.get('movie_title')) + '.txt','w')
87     file.write(movie_dict.get('movie_column') + '\n' + movie_dict.get('movie_rating'))
88     file.close()
89 print 'Done!'

以上带并不是一个完整的脚本,只是把Python相关的基础知识点糅杂在一起,如有不足,欢迎指正。

 

推荐阅读