首页 > 解决方案 > 使用 jsonlite 从 json 文件中读取 n 行时出现“错误:解析错误:过早的 EOF”错误

问题描述

我有一个 3GB 的 json 文件。我想阅读前 1000 行,以便了解如何清理它。当我使用此代码进行导入时:

json_data <- jsonlite::stream_in(file("2020-04-05-20_cleaned.json"), pagesize = 100)

我得到的错误是:

opening file input connection.
Error: parse error: premature EOF
                                       [{"created_at":"Mon Apr 06 00:0
                     (right here) ------^
closing file input connection.

“2020-04-05-20_cleaned.json”文件的前几行如下所示:

[{"created_at":"Mon Apr 06 00:04:46 +0000 2020","id":1246951950769434624,"id_str":"1246951950769434624","text":"RT @PampichiNews: #Prevenci\u00f3n #Coronavirus En el municipio de San Mart\u00edn Jilotepeque, Chimaltenango, se observan las medias de distanciamie\u2026","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":872612162367361025,"id_str":"872612162367361025","name":"Lis R. SAre","screen_name":"lis_sare","location":"San Francisco Menendez, El Sal","url":null,"description":null,"translator_type":"none","protected":false,"verified":false,"followers_count":542,"friends_count":1975,"listed_count":1,"favourites_count":6364,"statuses_count":146437,"created_at":"Thu Jun 08 00:32:06 +0000 2017","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"F5F8FA","profile_background_image_url":"","profile_background_image_url_https":"","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/873012325690822656\/W8H2C0xO_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/873012325690822656\/W8H2C0xO_normal.jpg","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Sun Apr 05 22:49:13 +0000 2020","id":1246932937003749381,"id_str":"1246932937003749381","text":"#Prevenci\u00f3n #Coronavirus En el municipio de San Mart\u00edn Jilotepeque, Chimaltenango, se observan las medias de distan\u2026 https:\/\/t.co\/cm492OLpKZ","display_text_range":[0,140],"source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":751919185,"id_str":"751919185","name":"Pampich\u00ed News","screen_name":"PampichiNews","location":"Amatitlan, Guatemala","url":"https:\/\/www.facebook.com\/PampichiNews","description":"#Amatitlan: Agencia de noticias independiente. Nuestra mision es informar y generar opinion. WhastApp y Mercadeo \ud83d\udcde 4271 5444","translator_type":"none","protected":false,"verified":false,"followers_count":42298,"friends_count":4233,"listed_count":148,"favourites_count":39485,"statuses_count":87354,"created_at":"Sat Aug 11 21:15:57 +0000 2012","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"EBEBEB","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme7\/bg.gif","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme7\/bg.gif","profile_background_tile":false,"profile_link_color":"990000","profile_sidebar_border_color":"FFFFFF","profile_sidebar_fill_color":"F3F3F3","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1212032286478274565\/Gm6A4vdp_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1212032286478274565\/Gm6A4vdp_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/751919185\/1517624914","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":{"full_text":"#Prevenci\u00f3n #Coronavirus En el municipio de San Mart\u00edn Jilotepeque, Chimaltenango, se observan las medias de distanciamiento entre vendedores y compradores. Buena medida, buen ejemplo \ud83d\udc4f\ud83c\udffc\ud83d\udc4f\ud83c\udffc\ud83d\udc4f\ud83c\udffc https:\/\/t.co\/LJrkYRpcqi","display_text_range":[0,190],"entities":{"hashtags":[{"text":"Prevenci\u00f3n","indices":[0,11]},{"text":"Coronavirus","indices":[12,24]}],"urls":[],"user_mentions":[],"symbols":[],"media":[{"id":1246932923753865216,"id_str":"1246932923753865216","indices":[191,214],"media_url":"http:\/\/pbs.twimg.com\/media\/EU3-4zsWoAAHJ5l.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/EU3-4zsWoAAHJ5l.jpg","url":"https:\/\/t.co\/LJrkYRpcqi","display_url":"pic.twitter.com\/LJrkYRpcqi","expanded_url":"https:\/\/twitter.com\/PampichiNews\/status\/1246932937003749381\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":720,"h":540,"resize":"fit"},"medium":{"w":720,"h":540,"resize":"fit"},"small":{"w":680,"h":510,"resize":"fit"}}},{"id":1246932923762302983,"id_str":"1246932923762302983","indices":[191,214],"media_url":"http:\/\/pbs.twimg.com\/media\/EU3-4zuXYAcLzMb.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/EU3-4zuXYAcLzMb.jpg","url":"https:\/\/t.co\/LJrkYRpcqi","display_url":"pic.twitter.com\/LJrkYRpcqi","expanded_url":"https:\/\/twitter.com\/PampichiNews\/status\/1246932937003749381\/photo\/1","type":"photo","sizes":{"medium":{"w":308,"h":231,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":308,"h":231,"resize":"fit"},"large":{"w":308,"h":231,"resize":"fit"}}},{"id":1246932924055855110,"id_str":"1246932924055855110","indices":[191,214],"media_url":"http:\/\/pbs.twimg.com\/media\/EU3-400WoAYhMjP.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/EU3-400WoAYhMjP.jpg","url":"https:\/\/t.co\/LJrkYRpcqi","display_url":"pic.twitter.com\/LJrkYRpcqi","expanded_url":"https:\/\/twitter.com\/PampichiNews\/status\/1246932937003749381\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":720,"h":540,"resize":"fit"},"medium":{"w":720,"h":540,"resize":"fit"},"small":{"w":680,"h":510,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":1246932923753865216,"id_str":"1246932923753865216","indices":[191,214],"media_url":"http:\/\/pbs.twimg.com\/media\/EU3-4zsWoAAHJ5l.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/EU3-4zsWoAAHJ5l.jpg","url":"https:\/\/t.co\/LJrkYRpcqi","display_url":"pic.twitter.com\/LJrkYRpcqi","expanded_url":"https:\/\/twitter.com\/PampichiNews\/status\/1246932937003749381\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":720,"h":540,"resize":"fit"},"medium":{"w":720,"h":540,"resize":"fit"},"small":{"w":680,"h":510,"resize":"fit"}}},{"id":1246932923762302983,"id_str":"1246932923762302983","indices":[191,214],"media_url":"http:\/\/pbs.twimg.com\/media\/EU3-4zuXYAcLzMb.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/EU3-4zuXYAcLzMb.jpg","url":"https:\/\/t.co\/LJrkYRpcqi","display_url":"pic.twitter.com\/LJrkYRpcqi","expanded_url":"https:\/\/twitter.com\/PampichiNews\/status\/1246932937003749381\/photo\/1","type":"photo","sizes":{"medium":{"w":308,"h":231,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":308,"h":231,"resize":"fit"},"large":{"w":308,"h":231,"resize":"fit"}}},{"id":1246932924055855110,"id_str":"1246932924055855110","indices":[191,214],"media_url":"http:\/\/pbs.twimg.com\/media\/EU3-400WoAYhMjP.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/EU3-400WoAYhMjP.jpg","url":"https:\/\/t.co\/LJrkYRpcqi","display_url":"pic.twitter.com\/LJrkYRpcqi","expanded_url":"https:\/\/twitter.com\/PampichiNews\/status\/1246932937003749381\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":720,"h":540,"resize":"fit"},"medium":{"w":720,"h":540,"resize":"fit"},"small":{"w":680,"h":510,"resize":"fit"}}}]}},"quote_count":17,"reply_count":19,"retweet_count":90,"favorite_count":318,"entities":{"hashtags":[{"text":"Prevenci\u00f3n","indices":[0,11]},{"text":"Coronavirus","indices":[12,24]}],"urls":[{"url":"https:\/\/t.co\/cm492OLpKZ","expanded_url":"https:\/\/twitter.com\/i\/web\/status\/1246932937003749381","display_url":"twitter.com\/i\/web\/status\/1\u2026","indices":[117,140]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"es"},"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"Prevenci\u00f3n","indices":[18,29]},{"text":"Coronavirus","indices":[30,42]}],"urls":[],"user_mentions":[{"screen_name":"PampichiNews","name":"Pampich\u00ed News","id":751919185,"id_str":"751919185","indices":[3,16]}],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"es","timestamp_ms":"1586131486967"},
{"created_at":"Mon Apr 06 00:04:46 +0000 2020","id":1246951950752649217,"id_str":"1246951950752649217","text":"?? #UnidosPorArgentina #apocalipsis apocalipsis trompetas jesus biblia tengo miedo alemania y francia #covid\u2026 https:\/\/t.co\/3ZvFpRl6zh","source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":1246305369313476610,"id_str":"1246305369313476610","name":"RECUPERE LA CUENTA","screen_name":"hostageveIasco","location":null,"url":null,"description":"CUENTA SPAM","translator_type":"none","protected":false,"verified":false,"followers_count":183,"friends_count":4,"listed_count":0,"favourites_count":160,"statuses_count":435,"created_at":"Sat Apr 04 05:15:39 +0000 2020","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"F5F8FA","profile_background_image_url":"","profile_background_image_url_https":"","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1246640405514518529\/jHjnfEjV_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1246640405514518529\/jHjnfEjV_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/1246305369313476610\/1585977520","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":{"full_text":"?? #UnidosPorArgentina #apocalipsis apocalipsis trompetas jesus biblia tengo miedo alemania y francia #covid #coronavirus coronavirus cuarentena #Covid_19 #lacasadelpapel4 #QuedateEnCasa sellos alberto hoy vassoura teclado macaco renato #BuenDomingo https:\/\/t.co\/8UGf8txNQe","display_text_range":[0,273],"entities":{"hashtags":[{"text":"UnidosPorArgentina","indices":[3,22]},{"text":"apocalipsis","indices":[23,35]},{"text":"covid","indices":[102,108]},{"text":"coronavirus","indices":[109,121]},{"text":"Covid_19","indices":[145,154]},{"text":"lacasadelpapel4","indices":[155,171]},{"text":"QuedateEnCasa","indices":[172,186]},{"text":"BuenDomingo","indices":[237,249]}],"urls":[],"user_mentions":[],"symbols":[],"media":[{"id":1246639622505013249,"id_str":"1246639622505013249","indices":[250,273],"additional_media_info":{"monetizable":false},"media_url":"http:\/\/pbs.twimg.com\/ext_tw_video_thumb\/1246639622505013249\/pu\/img\/ACLlZJo2rwJkaB-F.jpg","media_url_https":"https:\/\/pbs.twimg.com\/ext_tw_video_thumb\/1246639622505013249\/pu\/img\/ACLlZJo2rwJkaB-F.jpg","url":"https:\/\/t.co\/8UGf8txNQe","display_url":"pic.twitter.com\/8UGf8txNQe","expanded_url":"https:\/\/twitter.com\/hostagevelasco\/status\/1246639766159929346\/video\/1","type":"video","video_info":{"aspect_ratio":[9,16],"duration_millis":89570,"variants":[{"bitrate":832000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/ext_tw_video\/1246639622505013249\/pu\/vid\/360x640\/jgUAHSWanHXSeGUp.mp4?tag=10"},{"bitrate":632000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/ext_tw_video\/1246639622505013249\/pu\/vid\/320x568\/nCKHMugpIVnT-DvU.mp4?tag=10"},{"content_type":"application\/x-mpegURL","url":"https:\/\/video.twimg.com\/ext_tw_video\/1246639622505013249\/pu\/pl\/t3VbCt07x8_5bfMh.m3u8?tag=10"}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":360,"h":640,"resize":"fit"},"medium":{"w":360,"h":640,"resize":"fit"},"small":{"w":360,"h":640,"resize":"fit"}},"source_status_id":1246639766159929346,"source_status_id_str":"1246639766159929346","source_user_id":1214430662633172992,"source_user_id_str":"1214430662633172992"}]},"extended_entities":{"media":[{"id":1246639622505013249,"id_str":"1246639622505013249","indices":[250,273],"additional_media_info":{"monetizable":false},"media_url":"http:\/\/pbs.twimg.com\/ext_tw_video_thumb\/1246639622505013249\/pu\/img\/ACLlZJo2rwJkaB-F.jpg","media_url_https":"https:\/\/pbs.twimg.com\/ext_tw_video_thumb\/1246639622505013249\/pu\/img\/ACLlZJo2rwJkaB-F.jpg","url":"https:\/\/t.co\/8UGf8txNQe","display_url":"pic.twitter.com\/8UGf8txNQe","expanded_url":"https:\/\/twitter.com\/hostagevelasco\/status\/1246639766159929346\/video\/1","type":"video","video_info":{"aspect_ratio":[9,16],"duration_millis":89570,"variants":[{"bitrate":832000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/ext_tw_video\/1246639622505013249\/pu\/vid\/360x640\/jgUAHSWanHXSeGUp.mp4?tag=10"},{"bitrate":632000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/ext_tw_video\/1246639622505013249\/pu\/vid\/320x568\/nCKHMugpIVnT-DvU.mp4?tag=10"},{"content_type":"application\/x-mpegURL","url":"https:\/\/video.twimg.com\/ext_tw_video\/1246639622505013249\/pu\/pl\/t3VbCt07x8_5bfMh.m3u8?tag=10"}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":360,"h":640,"resize":"fit"},"medium":{"w":360,"h":640,"resize":"fit"},"small":{"w":360,"h":640,"resize":"fit"}},"source_status_id":1246639766159929346,"source_status_id_str":"1246639766159929346","source_user_id":1214430662633172992,"source_user_id_str":"1214430662633172992"}]}},"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"UnidosPorArgentina","indices":[3,22]},{"text":"apocalipsis","indices":[23,35]},{"text":"covid","indices":[102,108]}],"urls":[{"url":"https:\/\/t.co\/3ZvFpRl6zh","expanded_url":"https:\/\/twitter.com\/i\/web\/status\/1246951950752649217","display_url":"twitter.com\/i\/web\/status\/1\u2026","indices":[110,133]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"es","timestamp_ms":"1586131486963"},
{"created_at":"Mon Apr 06 00:04:46 +0000 2020","id":1246951950819758082,"id_str":"1246951950819758082","text":"RT @AyshaRenna: There were sounds of crackers in Delhi. Has #COVID19 left #India? \nWhat an insult to the hapless health workers fighting Co\u2026","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":211953796,"id_str":"211953796","name":"Hindustani4Evr","screen_name":"Hindustani4Evr","location":null,"url":null,"description":null,"translator_type":"none","protected":false,"verified":false,"followers_count":16,"friends_count":45,"listed_count":0,"favourites_count":1143,"statuses_count":1570,"created_at":"Thu Nov 04 18:52:45 +0000 2010","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/abs.twimg.com\/sticky\/default_profile_images\/default_profile_normal.png","profile_image_url_https":"https:\/\/abs.twimg.com\/sticky\/default_profile_images\/default_profile_normal.png","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Sun Apr 05 16:03:35 +0000 2020","id":1246830853629431809,"id_str":"1246830853629431809","text":"There were sounds of crackers in Delhi. Has #COVID19 left #India? \nWhat an insult to the hapless health workers fig\u2026 https:\/\/t.co\/53OeuGqCvn","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":1206939287196823552,"id_str":"1206939287196823552","name":"Aysha Renna","screen_name":"AyshaRenna","location":"New Delhi, India","url":"https:\/\/www.instagram.com\/aysharenna\/","description":"Muslim Activist.\nStudent | Jamia Millia Islamia","translator_type":"none","protected":false,"verified":false,"followers_count":29334,"friends_count":159,"listed_count":15,"favourites_count":206,"statuses_count":339,"created_at":"Tue Dec 17 14:09:05 +0000 2019","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"F5F8FA","profile_background_image_url":"","profile_background_image_url_https":"","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1212072907943206912\/OWFWTGHK_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1212072907943206912\/OWFWTGHK_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/1206939287196823552\/1577815803","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":{"full_text":"There were sounds of crackers in Delhi. Has #COVID19 left #India? \nWhat an insult to the hapless health workers fighting Corona in adverse conditions even without necessary protective gears? \n #\u0905\u0902\u0927\u0947\u0930_\u0928\u0917\u0930\u0940_\u091a\u094c\u092a\u091f_\u0930\u093e\u091c\u093e","display_text_range":[0,214],"entities":{"hashtags":[{"text":"COVID19","indices":[44,52]},{"text":"India","indices":[58,64]},{"text":"\u0905\u0902\u0927\u0947\u0930_\u0928\u0917\u0930\u0940_\u091a\u094c\u092a\u091f_\u0930\u093e\u091c\u093e","indices":[193,214]}],"urls":[],"user_mentions":[],"symbols":[]}},"quote_count":6,"reply_count":78,"retweet_count":232,"favorite_count":574,"entities":{"hashtags":[{"text":"COVID19","indices":[44,52]},{"text":"India","indices":[58,64]}],"urls":[{"url":"https:\/\/t.co\/53OeuGqCvn","expanded_url":"https:\/\/twitter.com\/i\/web\/status\/1246830853629431809","display_url":"twitter.com\/i\/web\/status\/1\u2026","indices":[117,140]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en"},"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"COVID19","indices":[60,68]},{"text":"India","indices":[74,80]}],"urls":[],"user_mentions":[{"screen_name":"AyshaRenna","name":"Aysha Renna","id":1206939287196823552,"id_str":"1206939287196823552","indices":[3,14]}],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1586131486979"},
{"created_at":"Mon Apr 06 00:04:46 +0000 2020","id":1246951947627827201,"id_str":"1246951947627827201","text":"Definitive proof that God is real, and God is good:   \nwhen after three weeks of a quarantine, you find your mom\u2019s\u2026 https:\/\/t.co\/mq92jMk3sa","display_text_range":[0,140],"source":"\u003ca href=\"https:\/\/mobile.twitter.com\" rel=\"nofollow\"\u003eTwitter Web App\u003c\/a\u003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":883619466,"id_str":"883619466","name":"omid safi","screen_name":"ostadjaan","location":"Duke University","url":"http:\/\/www.illuminatedtours.com","description":"Professor of Islamic studies. Jedi-wannabe. Leads educational tours to Turkey and Morocco, Illuminated Tours (https:\/\/t.co\/TgpwCFUf9L). Love & justice, Here & Now.","translator_type":"none","protected":false,"verified":false,"followers_count":15178,"friends_count":6134,"listed_count":318,"favourites_count":16229,"statuses_count":10385,"created_at":"Tue Oct 16 03:42:29 +0000 2012","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"FFF04D","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme19\/bg.gif","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme19\/bg.gif","profile_background_tile":false,"profile_link_color":"0099CC","profile_sidebar_border_color":"FFF8AD","profile_sidebar_fill_color":"F6FFD1","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/455714981497929729\/GY5KFKW7_normal.jpeg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/455714981497929729\/GY5KFKW7_normal.jpeg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/883619466\/1572536506","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":{"full_text":"Definitive proof that God is real, and God is good:   \nwhen after three weeks of a quarantine, you find your mom\u2019s qormeh-sabzi hidden in the back of the freezer!   masha\u2019allah!!! \u2764\ufe0f   \n\n#Qormehsabzi in a time of #Coronavirus.  :-) https:\/\/t.co\/O0HjtMxEO8","display_text_range":[0,231],"entities":{"hashtags":[{"text":"Qormehsabzi","indices":[187,199]},{"text":"Coronavirus","indices":[213,225]}],"urls":[],"user_mentions":[],"symbols":[],"media":[{"id":1246951862315687937,"id_str":"1246951862315687937","indices":[232,255],"media_url":"http:\/\/pbs.twimg.com\/media\/EU4QHLVWoAE64OS.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/EU4QHLVWoAE64OS.jpg","url":"https:\/\/t.co\/O0HjtMxEO8","display_url":"pic.twitter.com\/O0HjtMxEO8","expanded_url":"https:\/\/twitter.com\/ostadjaan\/status\/1246951947627827201\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":2048,"h":1536,"resize":"fit"},"medium":{"w":1200,"h":900,"resize":"fit"},"small":{"w":680,"h":510,"resize":"fit"}}},{"id":1246951921413488641,"id_str":"1246951921413488641","indices":[232,255],"media_url":"http:\/\/pbs.twimg.com\/media\/EU4QKnfXgAEW_sQ.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/EU4QKnfXgAEW_sQ.jpg","url":"https:\/\/t.co\/O0HjtMxEO8","display_url":"pic.twitter.com\/O0HjtMxEO8","expanded_url":"https:\/\/twitter.com\/ostadjaan\/status\/1246951947627827201\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":2048,"h":1536,"resize":"fit"},"medium":{"w":1200,"h":900,"resize":"fit"},"small":{"w":680,"h":510,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":1246951862315687937,"id_str":"1246951862315687937","indices":[232,255],"media_url":"http:\/\/pbs.twimg.com\/media\/EU4QHLVWoAE64OS.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/EU4QHLVWoAE64OS.jpg","url":"https:\/\/t.co\/O0HjtMxEO8","display_url":"pic.twitter.com\/O0HjtMxEO8","expanded_url":"https:\/\/twitter.com\/ostadjaan\/status\/1246951947627827201\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":2048,"h":1536,"resize":"fit"},"medium":{"w":1200,"h":900,"resize":"fit"},"small":{"w":680,"h":510,"resize":"fit"}}},{"id":1246951921413488641,"id_str":"1246951921413488641","indices":[232,255],"media_url":"http:\/\/pbs.twimg.com\/media\/EU4QKnfXgAEW_sQ.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/EU4QKnfXgAEW_sQ.jpg","url":"https:\/\/t.co\/O0HjtMxEO8","display_url":"pic.twitter.com\/O0HjtMxEO8","expanded_url":"https:\/\/twitter.com\/ostadjaan\/status\/1246951947627827201\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":2048,"h":1536,"resize":"fit"},"medium":{"w":1200,"h":900,"resize":"fit"},"small":{"w":680,"h":510,"resize":"fit"}}}]}},"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[{"url":"https:\/\/t.co\/mq92jMk3sa","expanded_url":"https:\/\/twitter.com\/i\/web\/status\/1246951947627827201","display_url":"twitter.com\/i\/web\/status\/1\u2026","indices":[116,139]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1586131486218"}]

由于 SO 字符限制,无法添加更多行

如果我能得到您的一些意见,将不胜感激。谢谢!

编辑:

#i have tried this too, lead to same error as above
dat <- readLines("2020-04-05-20_cleaned.json", n = 100 )
jsonlite::stream_in(textConnection(dat))

但是这个:

fromJSON(sprintf("[%s]", paste(readLines("2020-04-05-20_cleaned.json", n = 100), collapse="")))

给我:

Error: parse error: unallowed token at this point in JSON text
          mestamp_ms":"1586131489768"},]
                     (right here) ------^

我看过很多帖子,没有一个能帮助我。

标签: rjsoneof

解决方案


@MrFlick 提到:

stream_in旨在以ndjson格式读取数据。[似乎只在第一行的前导和行尾的逗号使得它看起来可能不是您拥有的格式。预处理您的数据以首先将其转换为正确的格式可能会更好。

我意识到有逗号,所以我只是从字符串的末尾删除了它,然后使用sprintf为我修复的那个重新制作了 json:

pagesize=10
con <- "2020-04-05-20_cleaned.json"
initialJSON = readLines(con, n = pagesize)
collapsedJSON <- paste(initialJSON[2:pagesize], collapse="")
collapsedJSON <- substr(collapsedJSON, 1, nchar(collapsedJSON)-1)
fixedJSON <- sprintf("[%s]", collapsedJSON, collapse=",")

readJSON <- jsonlite::fromJSON(fixedJSON)

给了我这样的输出:

# A tibble: 9 x 32
  created_at      id id_str text  source truncated in_reply_to_sta~
  <chr>        <dbl> <chr>  <chr> <chr>  <lgl>     <lgl>           
1 Mon Apr 0~ 1.25e18 12469~ "?? ~ "<a h~ TRUE      NA              
2 Mon Apr 0~ 1.25e18 12469~ "RT ~ "<a h~ FALSE     NA              
3 Mon Apr 0~ 1.25e18 12469~ "Def~ "<a h~ TRUE      NA              
4 Mon Apr 0~ 1.25e18 12469~ "RT ~ "<a h~ FALSE     NA              
5 Mon Apr 0~ 1.25e18 12469~ "RT ~ "<a h~ FALSE     NA              
6 Mon Apr 0~ 1.25e18 12469~ "RT ~ "<a h~ FALSE     NA              
7 Mon Apr 0~ 1.25e18 12469~ "RT ~ "<a h~ FALSE     NA              
8 Mon Apr 0~ 1.25e18 12469~ "RT ~ "<a h~ FALSE     NA              
9 Mon Apr 0~ 1.25e18 12469~ "RT ~ "<a h~ FALSE     NA              
# ... with 152 more variables: in_reply_to_status_id_str <lgl>,
#   in_reply_to_user_id <lgl>, in_reply_to_user_id_str <lgl>,
#   in_reply_to_screen_name <lgl>, user$id <dbl>, $id_str <chr>,
#   $name <chr>, $screen_name <chr>, $location <chr>, $url <chr>,
#   $description <chr>, $translator_type <chr>, $protected <lgl>,
#   $verified <lgl>, $followers_count <int>, $friends_count <int>,
#   $listed_count <int>, $favourites_count <int>, $statuses_count <int>,
#   $created_at <chr>, $utc_offset <lgl>, $time_zone <lgl>,
#   $geo_enabled <lgl>, $lang <lgl>, $contributors_enabled <lgl>,
#   $is_translator <lgl>, $profile_background_color <chr>,
#   $profile_background_image_url <chr>,
#   $profile_background_image_url_https <chr>,
#   $profile_background_tile <lgl>, $profile_link_color <chr>,
#   $profile_sidebar_border_color <chr>, $profile_sidebar_fill_color <chr>,
#   $profile_text_color <chr>, $profile_use_background_image <lgl>,
#   $profile_image_url <chr>, $profile_image_url_https <chr>,
#   $profile_banner_url <chr>, $default_profile <lgl>,
#   $default_profile_image <lgl>, $following <lgl>,
#   $follow_request_sent <lgl>, $notifications <lgl>, geo <lgl>,
#   coordinates <lgl>, place <lgl>, contributors <lgl>,
#   is_quote_status <lgl>, extended_tweet$full_text <chr>,
#   $display_text_range <list>, $entities$hashtags <list>, $$urls <list>,
#   $$user_mentions <list>, $$symbols <list>, $$media <list>,
#   $extended_entities$media <list>, quote_count <int>, reply_count <int>,
#   retweet_count <int>, favorite_count <int>, entities$hashtags <list>,
#   $urls <list>, $user_mentions <list>, $symbols <list>, $media <list>,
#   favorited <lgl>, retweeted <lgl>, possibly_sensitive <lgl>,
#   filter_level <chr>, lang <chr>, timestamp_ms <chr>,
#   retweeted_status$created_at <chr>, $id <dbl>, $id_str <chr>,
#   $text <chr>, $source <chr>, $truncated <lgl>,
#   $in_reply_to_status_id <lgl>, $in_reply_to_status_id_str <lgl>,
#   $in_reply_to_user_id <lgl>, $in_reply_to_user_id_str <lgl>,
#   $in_reply_to_screen_name <lgl>, $user$id <dbl>, $$id_str <chr>,
#   $$name <chr>, $$screen_name <chr>, $$location <chr>, $$url <chr>,
#   $$description <chr>, $$translator_type <chr>, $$protected <lgl>,
#   $$verified <lgl>, $$followers_count <int>, $$friends_count <int>,
#   $$listed_count <int>, $$favourites_count <int>, $$statuses_count <int>,
#   $$created_at <chr>, $$utc_offset <lgl>, $$time_zone <lgl>, ...

感谢 MrFlick 的帮助。


推荐阅读