首页 > 解决方案 > 将字节流解析成大对象

问题描述

我有一个字节流输入(大约 100MB)。我需要将字节流分析成一个大数据对象,其中包含两百万个数据项对象(大小约为 50 个字节)。

每个数据项都有 int、short 和其他对象等成员。我已经尝试了两百万次循环Datainputstream来解决这个问题,但这需要几秒钟。有可能在一秒钟内处理它吗?这是示例:`

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
class DataItem {
    private Part0 member0;
    private Part1 member1;
    private Part3 member3;
    private Part4 member4;
    private int member5;
    private int member6;
    public void setMember0(Part0 member) {
        this.member0 = member;
    }
    public void setMember1(Part1 member) {
        this.member1 = member;
    }
    public void setMember3(Part3 member) {
        this.member3 = member;
    }
    public void setMember4(Part4 member) {
        this.member4 = member;
    }
    public void setMember5(int member) {
        this.member5 = member;
    }
    public void setMember6(int member) {
        this.member6 = member;
    }
}

class Part0 {
    Part2 member1;
    String member2;
    public void setMember1(Part2 member) {
        this.member1 = member;
    }
    public void setMember2(String member) {
        this.member2 = member;
    }

}
class Part1 {
    short member1;
    byte  member2;
    byte  member3;
    byte  member4;
    byte  member5;
    byte  member6;
    byte  member7;
    public void setMember5(byte member) {
        this.member5 = member;
    }
    public void setMember6(byte member) {
        this.member6 = member;
    }
    public void setMember7(byte member) {
        this.member7 = member;
    }
    public void setMember1(short member) {
        this.member1 = member;
    }
    public void setMember2(byte member) {
        this.member2 = member;
    }
    public void setMember3(byte member) {
        this.member3 = member;
    }
    public void setMember4(byte member) {
        this.member4 = member;
    }
}
class Part2 {
    short member1;
    short member2;
    int member3;
    byte member4;
    byte member5;
    short member6;
    public void setMember1(short member) {
        this.member1 = member;
    }
    public void setMember2(short member) {
        this.member2 = member;
    }
    public void setMember3(int member) {
        this.member3 = member;
    }
    public void setMember4(byte member) {
        this.member4 = member;
    }
    public void setMember5(byte member) {
        this.member5 = member;
    }
    public void setMember6(short member) {
        this.member6 = member;
    }
}
class Part3 {
    short member1;
    short member2;
    public void setMember1(short member) {
        this.member1 = member;
    }
    public void setMember2(short member) {
        this.member2 = member;
    }
}
class Part4 {
    int member1;
    short member2;
    short member3;
    public void setMember1(int member) {
        this.member1 = member;
    }
    public void setzMember2(short member) {
        this.member2 = member;
    }
    public void setMember3(short member) {
        this.member3 = member;
    }
}
public class testForHugeData {

    public static void main(String[]args) throws IOException {
        int runtimes = 2000000;
        createFile();
        
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        FileInputStream rd = new FileInputStream("test.txt");
        BufferedInputStream ws = new BufferedInputStream(rd);
        byte []buffer = new byte[1024];
        int len;
        while((len = ws.read(buffer,0,1024))!=-1) {
            bos.write(buffer,0,len);
        }
        byte[] arr = bos.toByteArray();
        System.out.println("a input byteStream sized "+arr.length +" is created");
        
        ByteArrayInputStream bs = new ByteArrayInputStream(arr);
        
        // create a datainputStream
        DataInputStream ds = new DataInputStream(bs);
        // create a bufferedInputStream
        BufferedInputStream fs = new BufferedInputStream(ds);
        
        runTaskForManyTimes(runtimes,ds,fs);
    }

    private static void runTaskForManyTimes(int runtimes, DataInputStream ds, BufferedInputStream fs) throws IOException {

        HageData hugeData = new HageData();
        long start = System.currentTimeMillis();
        for(int i= 0;i<runtimes;i++) {
            hugeData.addDataItems(taskUseDataInputStream(runtimes,ds));
        }
        System.out.println("use dataIuputStream to analyze byte stream:");
        System.out.println("  it takes "+(System.currentTimeMillis()-start)+"ms to loop 2 million times");
        
        HageData hugeData1 = new HageData();
        start = System.currentTimeMillis();
        for(int i= 0;i<runtimes;i ++) {
            hugeData1.addDataItems(taskUseBufferedInputStream(runtimes,fs));
        }
        System.out.println("use bufferedIuputStream to analyze byte stream:");
        System.out.println("  it takes "+(System.currentTimeMillis()-start)+"ms to loop 2 million times");
    }
    private static DataItem taskUseDataInputStream(int runtimes, DataInputStream ds) throws IOException {
        DataItem item = new DataItem();
        Part1 part1 = new Part1();
        part1.setMember1(ds.readShort());
        part1.setMember2(ds.readByte());
        part1.setMember3(ds.readByte());
        part1.setMember4(ds.readByte());
        part1.setMember5(ds.readByte());
        part1.setMember6(ds.readByte());
        part1.setMember7(ds.readByte());
        item.setMember1(part1);
        Part0 part0 = new Part0();
        Part2 part2 = new Part2();
        part2.setMember1(ds.readShort());
        part2.setMember3(ds.readInt());
        part2.setMember5(ds.readByte());
        part2.setMember2(ds.readShort());
        part2.setMember6(ds.readShort());
        part2.setMember4(ds.readByte());
        byte[] tmp = new byte[10];
        for(int i = 0; i< 10; i++) {
            tmp[i] = ds.readByte();
        }
        part0.setMember1(part2);
        part0.setMember2(new String(tmp));
        item.setMember0(part0);
        Part3 part3 = new Part3();
        part3.setMember1(ds.readShort());
        part3.setMember2(ds.readShort());
        item.setMember3(part3);
        Part4 part4 = new Part4();
        part4.setMember1(ds.readInt());
        part4.setzMember2(ds.readShort());
        part4.setMember3(ds.readShort());
        item.setMember4(part4);
        item.setMember5(ds.readInt());
        item.setMember6(ds.readInt());
        
        return item;
    }
    private static DataItem taskUseBufferedInputStream(int runtimes, BufferedInputStream fs) throws IOException {
        DataItem item = new DataItem();
        Part1 part1 = new Part1();
        part1.setMember1(readShort(fs));
        part1.setMember2((byte)fs.read());
        part1.setMember3((byte)fs.read());
        part1.setMember4((byte)fs.read());
        part1.setMember5((byte)fs.read());
        part1.setMember6((byte)fs.read());
        part1.setMember7((byte)fs.read());
        item.setMember1(part1);
        Part0 part0 = new Part0();
        Part2 part2 = new Part2();
        part2.setMember1(readShort(fs));
        part2.setMember3(readInt(fs));
        part2.setMember5((byte)fs.read());
        part2.setMember2(readShort(fs));
        part2.setMember6(readShort(fs));
        part2.setMember4((byte)fs.read());
        byte[] tmp = new byte[10];
        for(int i = 0; i< 10; i++) {
            tmp[i] = (byte)fs.read();
        }
        part0.setMember1(part2);
        part0.setMember2(new String(tmp));
        item.setMember0(part0);
        Part3 part3 = new Part3();
        part3.setMember1(readShort(fs));
        part3.setMember2(readShort(fs));
        item.setMember3(part3);
        Part4 part4 = new Part4();
        part4.setMember1(readInt(fs));
        part4.setzMember2(readShort(fs));
        part4.setMember3(readShort(fs));
        item.setMember4(part4);
        item.setMember5(readInt(fs));
        item.setMember6(readInt(fs));
        
        return item;
    }
    private static short readShort(BufferedInputStream fs) throws IOException {
        // created to read short from BufferedInputStream
        byte [] tmp = new byte[2];
        tmp[0] = (byte)fs.read();
        tmp[1] = (byte)fs.read();
        
        return (short)(tmp[0]<<8|tmp[1]);
    }
    private static int readInt(BufferedInputStream fs) throws IOException {
        // created to read int from BufferedInputStream
        byte [] tmp = new byte[4];
        tmp[0] = (byte)fs.read();
        tmp[1] = (byte)fs.read();
        tmp[2] = (byte)fs.read();
        tmp[3] = (byte)fs.read();
        return (int)(tmp[0]<<24|tmp[1]<<16|tmp[2]<<8|tmp[3]);
    }
    
    private static void createFile() throws IOException {
        File file = new File("test.txt");
        if(!file.exists()) {
            file.createNewFile();
        }
        // so we create a random file sized 100,000,000 for test 
        RandomAccessFile file1 = new RandomAccessFile(file, "rw");
        file1.setLength(100000000); //you can change size here
        file1.close();
    }
    
}

这是结果:`

a input byteStream sized 100000000 is created
use dataIuputStream to analyze byte stream:
  it takes 4489ms to loop 2 million times
use bufferedIuputStream to analyze byte stream:
  it takes 4686ms to loop 2 million times

所以看起来bufferedStream比较慢?但是当我将输入字节流的大小更改为 400M(通过将测试文件大小更改为 400M)时,结果是:`

    a input byteStream sized 400000000 is created
use dataIuputStream to analyze byte stream:
  it takes 4740ms to loop 2 million times
use bufferedIuputStream to analyze byte stream:
  it takes 1384ms to loop 2 million times

因此,bufferedInputstream 的性能似乎取决于缓冲区大小。反正时间成本太高了。

标签: javaloopsdatainputstreambufferedinputstream

解决方案


推荐阅读