首页 > 解决方案 > 读取 Iris 数据集并存储在二维数组中

问题描述

我想用Iris 数据集运行 kmeans 算法,我需要从文件中读取此数据集并将其存储在二维数组 ( double[][] data) 中。

这是我以这种形式使用的数据(虹膜数据集)的一部分:

private static final double[][] data = {
    {0, 5.1, 3.5, 1.4, 0.2},
    {0, 4.9, 3, 1.4, 0.2},
    {0, 4.7, 3.2, 1.3, 0.2},
    {0, 4.6, 3.1, 1.5, 0.2},
    {0, 5, 3.6, 1.4, 0.2},
    {0, 5.4, 3.9, 1.7, 0.4},
    {0, 4.6, 3.4, 1.4, 0.3},
    {0, 5, 3.4, 1.5, 0.2},
    {0, 4.4, 2.9, 1.4, 0.2},
    {0, 4.9, 3.1, 1.5, 0.1},
    {0, 5.4, 3.7, 1.5, 0.2},
    {0, 4.8, 3.4, 1.6, 0.2},
    {0, 4.8, 3, 1.4, 0.1},
    {0, 4.3, 3, 1.1, 0.1},
    {0, 5.8, 4, 1.2, 0.2},
    {0, 5.7, 4.4, 1.5, 0.4},
    {0, 5.4, 3.9, 1.3, 0.4},
    {0, 5.1, 3.5, 1.4, 0.3},
    {0, 5.7, 3.8, 1.7, 0.3},
    {0, 5.1, 3.8, 1.5, 0.3},
    {0, 5.4, 3.4, 1.7, 0.2},
    {0, 5.1, 3.7, 1.5, 0.4},
    {0, 4.6, 3.6, 1, 0.2},
    {0, 5.1, 3.3, 1.7, 0.5},
    {0, 4.8, 3.4, 1.9, 0.2},
    {0, 5, 3, 1.6, 0.2},
    {0, 5, 3.4, 1.6, 0.4},
    {0, 5.2, 3.5, 1.5, 0.2},
    {0, 5.2, 3.4, 1.4, 0.2},
    {0, 4.7, 3.2, 1.6, 0.2},
    {0, 4.8, 3.1, 1.6, 0.2},
    {0, 5.4, 3.4, 1.5, 0.4},
    {0, 5.2, 4.1, 1.5, 0.1},
    {0, 5.5, 4.2, 1.4, 0.2},
    {0, 4.9, 3.1, 1.5, 0.1},
    {0, 5, 3.2, 1.2, 0.2},
    {0, 5.5, 3.5, 1.3, 0.2},
    {0, 4.9, 3.1, 1.5, 0.1},
    {0, 4.4, 3, 1.3, 0.2},
    {0, 5.1, 3.4, 1.5, 0.2},
    {0, 5, 3.5, 1.3, 0.3},
    {0, 4.5, 2.3, 1.3, 0.3},
    {0, 4.4, 3.2, 1.3, 0.2},
    {0, 5, 3.5, 1.6, 0.6},
    {1, 5.7, 2.8, 4.5, 1.3},
    {1, 6.3, 3.3, 4.7, 1.6},
    {1, 4.9, 2.4, 3.3, 1},
    {1, 6.6, 2.9, 4.6, 1.3},
    {1, 5.2, 2.7, 3.9, 1.4},
    {1, 5, 2, 3.5, 1},
    {1, 5.9, 3, 4.2, 1.5},
    {1, 6, 2.2, 4, 1},
    {1, 6.1, 2.9, 4.7, 1.4},
    {1, 5.6, 2.9, 3.6, 1.3},
    {1, 6.7, 3.1, 4.4, 1.4},
    {1, 5.6, 3, 4.5, 1.5},
    {1, 5.8, 2.7, 4.1, 1},
    {1, 6.2, 2.2, 4.5, 1.5},
    {1, 5.6, 2.5, 3.9, 1.1},
    {1, 5.9, 3.2, 4.8, 1.8},
    {1, 6.1, 2.8, 4, 1.3},
    {1, 6.3, 2.5, 4.9, 1.5},
    {1, 6.1, 2.8, 4.7, 1.2},
    {1, 6.4, 2.9, 4.3, 1.3},
    {1, 6.6, 3, 4.4, 1.4},
    {1, 6.8, 2.8, 4.8, 1.4},
    {1, 6.7, 3, 5, 1.7},
    {1, 6, 2.9, 4.5, 1.5},
    {1, 5.7, 2.6, 3.5, 1},
    {1, 5.5, 2.4, 3.8, 1.1},
    {1, 5.5, 2.4, 3.7, 1},
    {1, 5.8, 2.7, 3.9, 1.2},
    {1, 6, 2.7, 5.1, 1.6},
    {1, 5.4, 3, 4.5, 1.5},
    {1, 6, 3.4, 4.5, 1.6},
    {1, 6.7, 3.1, 4.7, 1.5},
    {1, 6.3, 2.3, 4.4, 1.3},
    {1, 5.6, 3, 4.1, 1.3},
    {1, 5.5, 2.5, 4, 1.3},
    {1, 5.5, 2.6, 4.4, 1.2},
    {1, 6.1, 3, 4.6, 1.4},
    {1, 5.8, 2.6, 4, 1.2},
    {1, 5, 2.3, 3.3, 1},
    {1, 5.6, 2.7, 4.2, 1.3},
    {1, 5.7, 3, 4.2, 1.2},
    {1, 5.7, 2.9, 4.2, 1.3},
    {1, 6.2, 2.9, 4.3, 1.3},
    {1, 5.1, 2.5, 3, 1.1},
    {1, 5.7, 2.8, 4.1, 1.3},
};

但我想从文件中读取这些数据,然后将其存储在data[][]二维数组中,以便在我的 kmeans 算法中使用。

这是我文件的一部分:

0   5.1 3.5 1.4 0.2
0   4.9 3   1.4 0.2
0   4.7 3.2 1.3 0.2
1.0 5.1 3.5 1.4 0.2
1.0 4.9 3.0 1.4 0.2
1.0 4.7 3.2 1.3 0.2
1.0 4.6 3.1 1.5 0.2
1.0 5.0 3.6 1.4 0.2
1.0 5.4 3.9 1.7 0.4
1.0 4.6 3.4 1.4 0.3
1.0 5.0 3.4 1.5 0.2
1.0 4.4 2.9 1.4 0.2
1.0 4.9 3.1 1.5 0.1
1.0 5.4 3.7 1.5 0.2
1.0 4.8 3.4 1.6 0.2
1.0 4.8 3.0 1.4 0.1
1.0 4.3 3.0 1.1 0.1
1.0 5.8 4.0 1.2 0.2

标签: java

解决方案


您可以按如下方式阅读您的文件:

final String fileName = "someFileName";
double[][] data;
try (Stream<String> stream = Files.lines(Paths.get(fileName))) {
      data = stream.map(line -> line.split("\\s+"))
                   .map(a -> Arrays.stream(a).mapToDouble(Double::parseDouble).toArray()) 
                   .toArray(double[][]::new);  
} catch (IOException e) { e.printStackTrace(); }

推荐阅读