首页 > 解决方案 > 使用 Java 将大型 JSON 文件拆分为较小的 JSON 文件

问题描述

我有一个 JSON 格式的大型数据集,为了便于使用,我想将其拆分为多个 json 文件,同时仍保持结构。例如:{ "{"users": [ { "userId": 1, "firstName": "Krish", "lastName": "Lee", "phoneNumber": "123456", "emailAddress": "krish.lee@learningcontainer.com" }, { "userId": 2, "firstName": "racks", "lastName": "jacson", "phoneNumber": "123456", "emailAddress": "racks.jacson@learningcontainer.com" }, { "userId": 3, "firstName": "denial", "lastName": "roast", "phoneNumber": "33333333", "emailAddress": "denial.roast@learningcontainer.com" }, { "userId": 4, "firstName": "devid", "lastName": "neo", "phoneNumber": "222222222", "emailAddress": "devid.neo@learningcontainer.com" }, { "userId": 5, "firstName": "jone", "lastName": "mac", "phoneNumber": "111111111", "emailAddress": "jone.mac@learningcontainer.com" } ] } 我应该能够以每个用户 ID 转到不同文件的方式拆分它。到目前为止,我已经尝试将它们放到地图上并尝试拆分地图,并将其转换为数组并拆分数组,但运气不佳。这些文件包含用户 ID,但它不再是 json 格式关于如何在 Java 中实现这一点的任何建议?

预期结果:{"users": [ { "userId": 1, "firstName": "Krish", "lastName": "Lee", "phoneNumber": "123456", "emailAddress": "krish.lee@learningcontainer.com" } ] }

标签: javajsonfilesplit

解决方案


要处理大文件,更喜欢使用面向流/事件的解析。Gson 和 Jackson 都支持这种方式。只是一个带有小型 JSON 解析器的插图https://github.com/anatolygudkov/green-jelly

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.io.Writer;

public class SplitMyJson {
    private static final String jsonToSplit = "{\"users\": [\n" +
            "    {\n" +
            "      \"userId\": 1,\n" +
            "      \"firstName\": \"Krish\",\n" +
            "      \"lastName\": \"Lee\",\n" +
            "      \"phoneNumber\": \"123456\",\n" +
            "      \"emailAddress\": \"krish.lee@learningcontainer.com\"\n" +
            "    },\n" +
            "    {\n" +
            "      \"userId\": 2,\n" +
            "      \"firstName\": \"racks\",\n" +
            "      \"lastName\": \"jacson\",\n" +
            "      \"phoneNumber\": \"123456\",\n" +
            "      \"emailAddress\": \"racks.jacson@learningcontainer.com\"\n" +
            "    },\n" +
            "    {\n" +
            "      \"userId\": 3,\n" +
            "      \"firstName\": \"denial\",\n" +
            "      \"lastName\": \"roast\",\n" +
            "      \"phoneNumber\": \"33333333\",\n" +
            "      \"emailAddress\": \"denial.roast@learningcontainer.com\"\n" +
            "    },\n" +
            "    {\n" +
            "      \"userId\": 4,\n" +
            "      \"firstName\": \"devid\",\n" +
            "      \"lastName\": \"neo\",\n" +
            "      \"phoneNumber\": \"222222222\",\n" +
            "      \"emailAddress\": \"devid.neo@learningcontainer.com\"\n" +
            "    },\n" +
            "    {\n" +
            "      \"userId\": 5,\n" +
            "      \"firstName\": \"jone\",\n" +
            "      \"lastName\": \"mac\",\n" +
            "      \"phoneNumber\": \"111111111\",\n" +
            "      \"emailAddress\": \"jone.mac@learningcontainer.com\"\n" +
            "    }\n" +
            "  ]\n" +
            "}";

    public static void main(String[] args) {
        final JsonParser parser = new JsonParser();
        parser.setListener(new Splitter(new File("/home/gudkov/mytest")));
        parser.parse(jsonToSplit); // if you read a file, call parse() several times part by part in a loop until EOF
        parser.eoj(); // and then call .eoj()
    }

    static class Splitter extends JsonParserListenerAdaptor {
        private final JsonGenerator jsonGenerator = new JsonGenerator();
        private final AppendableWriter<Writer> appendableWriter = new AppendableWriter<>();

        private final File outputFolder;
        private int objectDepth;
        private int userIndex;

        Splitter(final File outputFolder) {
            this.outputFolder = outputFolder;
            if (!outputFolder.exists()) {
                outputFolder.mkdirs();
            }

            jsonGenerator.setOutput(appendableWriter);
        }

        private boolean userJustStarted() {
            return objectDepth == 2;
        }

        private boolean userJustEnded() {
            return objectDepth == 1;
        }

        private boolean notInUser() {
            return objectDepth < 2;
        }

        @Override
        public boolean onObjectStarted() {
            objectDepth++;

            if (notInUser()) return true;

            if (userJustStarted()) {
                try {
                    appendableWriter.set(new FileWriter(new File(outputFolder, "user-" + userIndex + ".json")));
                } catch (IOException e) {
                    throw new UncheckedIOException(e);
                }
                userIndex++;
            }
            jsonGenerator.startObject();
            return true;
        }

        @Override
        public boolean onObjectEnded() {
            if (notInUser()) {
                objectDepth--;
                return true;
            }

            objectDepth--;

            jsonGenerator.endObject();

            if (userJustEnded()) { // user object ended
                try {
                    jsonGenerator.eoj();
                    appendableWriter.output().close();
                } catch (IOException e) {
                    throw new UncheckedIOException(e);
                }
            }
            return true;
        }

        @Override
        public boolean onArrayStarted() {
            if (notInUser()) return true;
            jsonGenerator.startArray();
            return true;
        }

        @Override
        public boolean onArrayEnded() {
            if (notInUser()) return true;
            jsonGenerator.endArray();
            return true;
        }

        @Override
        public boolean onObjectMember(final CharSequence name) {
            if (notInUser()) return true;
            jsonGenerator.objectMember(name);
            return true;
        }

        @Override
        public boolean onStringValue(final CharSequence data) {
            if (notInUser()) return true;
            jsonGenerator.stringValue(data, true);
            return true;
        }

        @Override
        public boolean onNumberValue(final JsonNumber number) {
            if (notInUser()) return true;
            jsonGenerator.numberValue(number);
            return true;
        }

        @Override
        public boolean onTrueValue() {
            if (notInUser()) return true;
            jsonGenerator.trueValue();
            return true;
        }

        @Override
        public boolean onFalseValue() {
            if (notInUser()) return true;
            jsonGenerator.falseValue();
            return true;
        }

        @Override
        public boolean onNullValue() {
            if (notInUser()) return true;
            jsonGenerator.nullValue();
            return true;
        }
    }
}

通过这种方式,您可以轻松地为非常大的文件实现过滤、聚合等,并且在常规 Java 中具有最高的性能。


推荐阅读