首页 > 技术文章 > Online Judge(OJ)搭建——4、具体实现

Piers 2017-06-05 20:36 原文

代码编译、运行、保存:

本系统目前支持 Java、C++ 的编译。如有其他语言需要编译,扩展也很简单,因为这里使用了一个抽象类LanguageTest,处理好代码运行编译之前的文件保存,代码运行之中的测试用例读取,代码运行编译之后的数据保存。主要利用了面向对象的多态性。

package per.piers.onlineJudge.service;

import org.springframework.stereotype.Service;
import per.piers.onlineJudge.Exception.ExistenceException;
import per.piers.onlineJudge.controller.TestController;
import per.piers.onlineJudge.model.InputOutput;
import per.piers.onlineJudge.model.TestInfo;

import java.io.*;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.Scanner;

@Service
public abstract class LanguageTest {

    private int uid;
    private int qid;
    private long submitTime;
    protected String code;
    protected String codeDir;
    protected String codeFile;
    private boolean isCompiled = false;
    private List<String> compileCommands = new ArrayList<>();
    private List<String> executeCommands = new ArrayList<>();

    protected LanguageTest(int uid, int qid, String code, long submitTime) {
        this.uid = uid;
        this.qid = qid;
        this.code = code;
        this.submitTime = submitTime;
        Properties properties = new Properties();
        try {
            try (InputStream inputStream = TestController.class.getClassLoader().getResourceAsStream("config/codeProcessor/codeProcessor.properties")) {
                properties.load(inputStream);
                String tmpDir = properties.getProperty("path");
                this.codeDir = String.format("%s/%s/%s/%s/", tmpDir, uid, qid, submitTime);
                this.codeFile = String.format("%s/%s", codeDir, getCodeFileName());
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        this.compileCommands = getCompileCommands();
        this.executeCommands = getExecuteCommands();
    }

    protected abstract List<String> getCompileCommands();

    protected abstract List<String> getExecuteCommands();

    protected abstract String getCodeFileName();

    public String compile() throws IOException {
        File codeFile = new File(this.codeFile);
        if (!codeFile.exists()) {
            codeFile.getParentFile().mkdirs();
            codeFile.createNewFile();
        } else {
            throw new ExistenceException("temp code file");
        }
        try (FileWriter writer = new FileWriter(codeFile)) {
            writer.write(code);
            writer.flush();
        }
        //TODO: Docker 权限控制
        ProcessBuilder processBuilder = new ProcessBuilder(compileCommands);
        processBuilder.directory(new File(codeDir));
        processBuilder.redirectErrorStream(true);
        Process process = processBuilder.start();
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
            StringBuilder output = new StringBuilder();
            String line = null;
            while ((line = reader.readLine()) != null)
                output.append(line + "\n");
            isCompiled = true;
            return output.toString().isEmpty() ? null : output.toString();
        }
    }

    public TestInfo execute(ArrayList<InputOutput> inputOutputs) throws IOException {
        if (!isCompiled) throw new IllegalStateException("not compiled");

        int correct = 0;
        ArrayList<InputOutput> results = new ArrayList<>();
        // test all test cases
        for (InputOutput inputOutput : inputOutputs) {
            String output = test(inputOutput.getInput());
            InputOutput actualInputOutput = new InputOutput();
            actualInputOutput.setInput(inputOutput.getInput());
            actualInputOutput.setOutput(output);
            if (output.equals(inputOutput.getOutput())) {
                correct++;
                actualInputOutput.setCorrect(true);
            } else {
                actualInputOutput.setCorrect(false);
            }
            results.add(actualInputOutput);
        }
        TestInfo testInfo = new TestInfo(uid, qid, new Timestamp(submitTime), code, (double) correct / (double) inputOutputs.size());
        testInfo.setInputOutputs(results);
        return testInfo;
    }

    protected String test(String input) throws IOException {
        ProcessBuilder processBuilder = new ProcessBuilder(executeCommands);
        processBuilder.directory(new File(codeDir));
        processBuilder.redirectErrorStream(true);
        Process process = processBuilder.start();
        try (OutputStream outputStream = process.getOutputStream()) {
            outputStream.write(input.getBytes("UTF-8"));
            outputStream.flush();
        }
        StringBuilder results = new StringBuilder();
        try (Scanner in = new Scanner(process.getInputStream())) {
            while (in.hasNextLine())
                results.append(in.nextLine());
        }
        return results.toString();
    }

}

在子类中,只需要设置一些参数即可扩展,比如Docker编译Java的命令、Docker运行Java的命令、代码文件名。 

package per.piers.onlineJudge.service;
import java.util.ArrayList;
import java.util.List;

public class JavaTest extends LanguageTest {

    public JavaTest(int uid, int qid, String code, long submitTime) {
        super(uid, qid, code, submitTime);
    }

    @Override
    protected List<String> getCompileCommands() {
        ArrayList<String> compileCommands = new ArrayList<>();
        compileCommands.add("docker");
        compileCommands.add("run");
        compileCommands.add("--rm");
        compileCommands.add("-u");
        compileCommands.add("root");
        compileCommands.add("-v");
        compileCommands.add(String.format("%s:%s", codeDir, codeDir));
        compileCommands.add("openjdk:8");
        compileCommands.add("/bin/sh");
        compileCommands.add("-c");
        compileCommands.add(String.format("cd %s&&javac Main.java", codeDir));
        return compileCommands;
    }

    @Override
    protected List<String> getExecuteCommands() {
        ArrayList<String> executeCommands = new ArrayList<>();
        executeCommands.add("docker");
        executeCommands.add("run");
        executeCommands.add("-i");
        executeCommands.add("--rm");
        executeCommands.add("-u");
        executeCommands.add("root");
        executeCommands.add("-v");
        executeCommands.add(String.format("%s:%s", codeDir, codeDir));
        executeCommands.add("openjdk:8");
        executeCommands.add("/bin/sh");
        executeCommands.add("-c");
        executeCommands.add(String.format("cd %s&&timeout 3s java Main", codeDir));
        return executeCommands;
    }

    @Override
    protected String getCodeFileName() {
        return "Main.java";
    }

}
package per.piers.onlineJudge.service;

import per.piers.onlineJudge.model.InputOutput;
import per.piers.onlineJudge.model.TestInfo;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class CppTest extends LanguageTest {

    public CppTest(int uid, int qid, String code, long submitTime) {
        super(uid, qid, code, submitTime);
    }

    @Override
    protected List<String> getCompileCommands() {
        ArrayList<String> compileCommands = new ArrayList<>();
        compileCommands.add("docker");
        compileCommands.add("run");
        compileCommands.add("--rm");
        compileCommands.add("-u");
        compileCommands.add("root");
        compileCommands.add("-v");
        compileCommands.add(String.format("%s:%s", codeDir, codeDir));
        compileCommands.add("gcc:7");
        compileCommands.add("/bin/sh");
        compileCommands.add("-c");
        compileCommands.add(String.format("cd %s&&g++ Main.cpp", codeDir));
        return compileCommands;
    }

    @Override
    protected List<String> getExecuteCommands() {
        ArrayList<String> executeCommands = new ArrayList<>();
        executeCommands.add("docker");
        executeCommands.add("run");
        executeCommands.add("--rm");
        executeCommands.add("-i");
        executeCommands.add("-u");
        executeCommands.add("root");
        executeCommands.add("-v");
        executeCommands.add(String.format("%s:%s", codeDir, codeDir));
        executeCommands.add("gcc:7");
        executeCommands.add("/bin/sh");
        executeCommands.add("-c");
        executeCommands.add(String.format("cd %s&&timeout 3s ./a.out", codeDir));
        return executeCommands;
    }

    @Override
    protected String getCodeFileName() {
        return "Main.cpp";
    }

}

这里利用 Docker 进行代码编译。Docker 是一个虚拟容器,放在 Docker 中运行的程序不会影响操作系统,也不会影响 Docker 容器中其他的程序。恶意代码在 Docker 中被执行,容器只会被破坏,不会有别的影响,此时只需重启容器即可。

Docker 编译 Java 命令:Docker run --rm -u root -v /onlineJudge:/onlineJudge openjdk:8 /bin/sh -c cd /onlineJudge&&javac Main.java

其中,--rm 是用完删除容器,-u root 是以 root 身份运行(此 root 不等于操作系统中 root,权限低了很多),-v /onlineJudge:/onlineJudge 是挂在卷,存放代码的位置,openjdk:8 就是镜像名和版本,/bin/sh -c cd /onlineJudge&&javac Main.java 是容器启动之后运行的命令,利用 shell 进入 /onlineJudge 文件夹并执行 javac Main.java 的命令,&& 表示同时执行。

Docker 运行 Java 命令:Docker run --rm -i -u root -v /onlineJudge:/onlineJudge openjdk:8 /bin/sh -c cd /onlineJudge&&timeout 3s Main

其中,-i 表示容器接收系统输入输出流。timeout 为 Linux 限时函数。

Docker 编译 C++ 命令:Docker run --rm -u root -v /onlineJudge:/onlineJudge openjdk:8 /bin/sh -c cd /onlineJudge&&g++ Main.cpp

Docker 运行 C++ 命令:Docker run --rm -i -u root -v /onlineJudge:/onlineJudge openjdk:8 /bin/sh -c cd /onlineJudge&&timeout 3s ./a.out

 

Token 生成:

token 在用户在注册或者忘记密码时生成的。在用户注册或者忘记密码时,要给予根据一定条件生成的 token,这样黑客就无法利用 URL 进行信息窃取和破坏。比如,如果用户 Piers 忘记密码的链接不是用  token 生成的,那么黑客就可以访问特定的 URL 对 Piers 的信息篡改(形如 http://youWebsite.com/password/Piers);而生成的 token 可以防止这一点,URL 完全是随机的(形如 http://youWebiste/password/1042637985,http://youWebiste/password/3798510426),黑客除非黑进用户的邮箱,否则很难得知用户忘记密码的链接。此外,token 还是有时间限制的,过了时间的 token,从服务器中删除。

这里 token 的算法比较简单,token = 系统时间字符串 + (用户 email 的每个字符 ASCII 值 * 10) % 100。本系统流量较小,出现 token 重复的概率很低。token 保存在 ConcurrentHashMap 中,防止由于多线程带来的异常。

其实更先进的 token 应该是用反对成加密的形式生成。

package per.piers.onlineJudge.util;

import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;public class TokenUtil {

    private static final long TIMEOUT = 1000 * 60 * 5;
    private static ConcurrentHashMap<String, String> tokenEmails = new ConcurrentHashMap<>();

    public static synchronized String addURLToken(long time, String email) {
        char[] emailCharacters = email.toCharArray();
        Random random = new Random();
        int emailSum = 0;
        for (char c : emailCharacters) {
            emailSum += ((int) c) * random.nextInt(10);
        }
        String key = String.format("%d%03d", time, emailSum % 100);
        tokenEmails.put(key, email);
        return key;
    }

    public static synchronized String getEmailFromToken(String token) {
        long now = System.currentTimeMillis();
        for (String checkToken : tokenEmails.keySet()) {
            long create = Long.parseLong(checkToken.substring(0, token.length() - 3));
            if (now < create) throw new IllegalStateException("now < create");
            if (now - create > TIMEOUT) {
                tokenEmails.remove(checkToken);
            }
        }
        if (!tokenEmails.containsKey(token)) return null;
        long create = Long.parseLong(token.substring(0, token.length() - 3));
        if (now < create) throw new IllegalStateException("now < create");
        if (now - create < TIMEOUT) return tokenEmails.get(token);
        else return null;
    }

}

 

邮件发送:

邮件发送采用 javax.mail 包。首先设置邮件的域名、用户名、密码,再设置邮件的内容,包括主题、发件人等,最后发送邮件。

package per.piers.onlineJudge.util;

import javax.mail.*;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;
import java.io.IOException;
import java.io.InputStream;
import java.security.Security;
import java.util.Date;
import java.util.Properties;

public class MailUtil {

    private MailUtil() {
    }

    public static void sendEmail(String email, String subject, String content) throws MessagingException {
        Security.addProvider(new com.sun.net.ssl.internal.ssl.Provider());

        final Properties properties = new Properties();
        try (InputStream inputStream = MailUtil.class.getClassLoader().getResourceAsStream("config/mail/mail.properties");) {
            properties.load(inputStream);
        } catch (IOException e) {
            e.printStackTrace();
        }

        String username = properties.getProperty("mail.username");
        String password = properties.getProperty("mail.password");
        String domain = properties.getProperty("mail.domain");
        Session session = Session.getDefaultInstance(properties, new Authenticator() {

            protected PasswordAuthentication getPasswordAuthentication() {
                return new PasswordAuthentication(properties.getProperty("mail.username"), password);
            }

        });

        Message msg = new MimeMessage(session);
        msg.setFrom(new InternetAddress(username + "@" + domain));
        msg.setRecipients(Message.RecipientType.TO, InternetAddress.parse(email, false));
        msg.setSubject(subject);
        msg.setText(content);
        msg.setSentDate(new Date());
        Transport.send(msg);
    }

}

 

读取 Excel 文件:

主要是利用 POI 读取 Excel 文件,支持 xls、xlsx 格式。

其操作的顺序基本和 Excel 的结构一致,首先读取 Workbook,其实读取 Sheet,再次读取 Column,最后读取 Row。Row 的内容类型可以有很多类型,比如作为 String 读出。

package per.piers.onlineJudge.util;

import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.HashSet;

public class ExcelUtil {

    private boolean isValidExcelFile(File file) {
        return file.getName().endsWith("xls") || file.getName().endsWith("xlsx");
    }

    private Workbook getWorkbook(File file) throws IOException {
        Workbook wb = null;
        if (file.getName().endsWith("xls")) {  //Excel 2003
            wb = new HSSFWorkbook(new FileInputStream(file));
        } else if (file.getName().endsWith("xlsx")) {  // Excel 2007/2010
            wb = new XSSFWorkbook(new FileInputStream(file));
        }
        return wb;
    }

    public HashSet<String> readColumns(File excelFile, String columnName) throws IOException {
        if (!isValidExcelFile(excelFile)) throw new IllegalArgumentException("not a excel file");
        Workbook workbook = getWorkbook(excelFile);
        Sheet sheet = workbook.getSheetAt(0);
        Row row0 = sheet.getRow(0);
        if(row0 == null) return null;
        int index = -1;
        for (int i = 0; i < row0.getPhysicalNumberOfCells(); i++) {
            if (row0.getCell(i).getStringCellValue().equals(columnName)) {
                index = i;
                break;
            }
        }
        if (index == -1) return null;

        HashSet<String> columns = new HashSet<>(sheet.getPhysicalNumberOfRows());
        for (int i = 1; i < sheet.getPhysicalNumberOfRows(); i++) {
            columns.add(sheet.getRow(i).getCell(index).getStringCellValue());
        }
        return columns;
    }

}

 

抄袭作弊检测:

主要是利用了 K-means,K-means 具体原理网上有很多,这里就不多讲了。

具体实现选用的是 WEKA。WEKA 需要修改数据源,在 weka.jar/weka/experiment/DatabaseUtils.props 配置 MySQL 数据库连接:

# Database settings for MySQL 3.23.x, 4.x
#
# General information on database access can be found here:
# http://weka.wikispaces.com/Databases
#
# url:     http://www.mysql.com/
# jdbc:    http://www.mysql.com/products/connector/j/
# author:  Fracpete (fracpete at waikato dot ac dot nz)
# version: $Revision: 11885 $

# JDBC driver (comma-separated list)
jdbcDriver=com.mysql.cj.jdbc.Driver

# database URL
jdbcURL=jdbc:mysql://localhost:3306/online_judge?serverTimezone=UTC&useUnicode=true&characterEncoding=UTF-8&useSSL=true

# specific data types
string, getString() = 0;    --> nominal
boolean, getBoolean() = 1;  --> nominal
double, getDouble() = 2;    --> numeric
byte, getByte() = 3;        --> numeric
short, getByte()= 4;        --> numeric
int, getInteger() = 5;      --> numeric
long, getLong() = 6;        --> numeric
float, getFloat() = 7;      --> numeric
date, getDate() = 8;        --> date
text, getString() = 9;      --> string
time, getTime() = 10;       --> date
timestamp, getTime() = 11;  --> date

# other options
CREATE_DOUBLE=DOUBLE
CREATE_STRING=TEXT
CREATE_INT=INT
CREATE_DATE=DATETIME
DateFormat=yyyy-MM-dd HH:mm:ss
checkUpperCaseNames=false
checkLowerCaseNames=false
checkForTable=true

# All the reserved keywords for this database
# Based on the keywords listed at the following URL (2009-04-13):
# http://dev.mysql.com/doc/mysqld-version-reference/en/mysqld-version-reference-reservedwords-5-0.html
Keywords=\
  ADD,\
  ALL,\
  ALTER,\
  ANALYZE,\
  AND,\
  AS,\
  ASC,\
  ASENSITIVE,\
  BEFORE,\
  BETWEEN,\
  BIGINT,\
  BINARY,\
  BLOB,\
  BOTH,\
  BY,\
  CALL,\
  CASCADE,\
  CASE,\
  CHANGE,\
  CHAR,\
  CHARACTER,\
  CHECK,\
  COLLATE,\
  COLUMN,\
  COLUMNS,\
  CONDITION,\
  CONNECTION,\
  CONSTRAINT,\
  CONTINUE,\
  CONVERT,\
  CREATE,\
  CROSS,\
  CURRENT_DATE,\
  CURRENT_TIME,\
  CURRENT_TIMESTAMP,\
  CURRENT_USER,\
  CURSOR,\
  DATABASE,\
  DATABASES,\
  DAY_HOUR,\
  DAY_MICROSECOND,\
  DAY_MINUTE,\
  DAY_SECOND,\
  DEC,\
  DECIMAL,\
  DECLARE,\
  DEFAULT,\
  DELAYED,\
  DELETE,\
  DESC,\
  DESCRIBE,\
  DETERMINISTIC,\
  DISTINCT,\
  DISTINCTROW,\
  DIV,\
  DOUBLE,\
  DROP,\
  DUAL,\
  EACH,\
  ELSE,\
  ELSEIF,\
  ENCLOSED,\
  ESCAPED,\
  EXISTS,\
  EXIT,\
  EXPLAIN,\
  FALSE,\
  FETCH,\
  FIELDS,\
  FLOAT,\
  FLOAT4,\
  FLOAT8,\
  FOR,\
  FORCE,\
  FOREIGN,\
  FROM,\
  FULLTEXT,\
  GOTO,\
  GRANT,\
  GROUP,\
  HAVING,\
  HIGH_PRIORITY,\
  HOUR_MICROSECOND,\
  HOUR_MINUTE,\
  HOUR_SECOND,\
  IF,\
  IGNORE,\
  IN,\
  INDEX,\
  INFILE,\
  INNER,\
  INOUT,\
  INSENSITIVE,\
  INSERT,\
  INT,\
  INT1,\
  INT2,\
  INT3,\
  INT4,\
  INT8,\
  INTEGER,\
  INTERVAL,\
  INTO,\
  IS,\
  ITERATE,\
  JOIN,\
  KEY,\
  KEYS,\
  KILL,\
  LABEL,\
  LEADING,\
  LEAVE,\
  LEFT,\
  LIKE,\
  LIMIT,\
  LINES,\
  LOAD,\
  LOCALTIME,\
  LOCALTIMESTAMP,\
  LOCK,\
  LONG,\
  LONGBLOB,\
  LONGTEXT,\
  LOOP,\
  LOW_PRIORITY,\
  MATCH,\
  MEDIUMBLOB,\
  MEDIUMINT,\
  MEDIUMTEXT,\
  MIDDLEINT,\
  MINUTE_MICROSECOND,\
  MINUTE_SECOND,\
  MOD,\
  MODIFIES,\
  NATURAL,\
  NOT,\
  NO_WRITE_TO_BINLOG,\
  NULL,\
  NUMERIC,\
  ON,\
  OPTIMIZE,\
  OPTION,\
  OPTIONALLY,\
  OR,\
  ORDER,\
  OUT,\
  OUTER,\
  OUTFILE,\
  PRECISION,\
  PRIMARY,\
  PRIVILEGES,\
  PROCEDURE,\
  PURGE,\
  READ,\
  READS,\
  REAL,\
  REFERENCES,\
  REGEXP,\
  RELEASE,\
  RENAME,\
  REPEAT,\
  REPLACE,\
  REQUIRE,\
  RESTRICT,\
  RETURN,\
  REVOKE,\
  RIGHT,\
  RLIKE,\
  SCHEMA,\
  SCHEMAS,\
  SECOND_MICROSECOND,\
  SELECT,\
  SENSITIVE,\
  SEPARATOR,\
  SET,\
  SHOW,\
  SMALLINT,\
  SONAME,\
  SPATIAL,\
  SPECIFIC,\
  SQL,\
  SQLEXCEPTION,\
  SQLSTATE,\
  SQLWARNING,\
  SQL_BIG_RESULT,\
  SQL_CALC_FOUND_ROWS,\
  SQL_SMALL_RESULT,\
  SSL,\
  STARTING,\
  STRAIGHT_JOIN,\
  TABLE,\
  TABLES,\
  TERMINATED,\
  THEN,\
  TINYBLOB,\
  TINYINT,\
  TINYTEXT,\
  TO,\
  TRAILING,\
  TRIGGER,\
  TRUE,\
  UNDO,\
  UNION,\
  UNIQUE,\
  UNLOCK,\
  UNSIGNED,\
  UPDATE,\
  UPGRADE,\
  USAGE,\
  USE,\
  USING,\
  UTC_DATE,\
  UTC_TIME,\
  UTC_TIMESTAMP,\
  VALUES,\
  VARBINARY,\
  VARCHAR,\
  VARCHARACTER,\
  VARYING,\
  WHEN,\
  WHERE,\
  WHILE,\
  WITH,\
  WRITE,\
  XOR,\
  YEAR_MONTH,\
  ZEROFILL

# The character to append to attribute names to avoid exceptions due to
# clashes between keywords and attribute names
KeywordsMaskChar=_

#flags for loading and saving instances using DatabaseLoader/Saver
nominalToStringLimit=50
idColumn=auto_generated_id

VARCHAR = 0
TEXT = 0

之后根据K-means的流程,设置相关工作条件,执行算法。

package per.piers.onlineJudge.util;

import per.piers.onlineJudge.model.TestInfo;
import weka.clusterers.ClusterEvaluation;
import weka.clusterers.SimpleKMeans;
import weka.core.EuclideanDistance;
import weka.core.Instances;
import weka.experiment.InstanceQuery;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.StringToWordVector;

import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

public class FindPlagiarismAlgorithm {

    public String cluster(int qid, TestInfo[] testInfos) throws Exception {
        InstanceQuery query = new InstanceQuery();
        final Properties properties = new Properties();
        try (InputStream inputStream = MailUtil.class.getClassLoader().getResourceAsStream("config/mybatis/applications.properties");) {
            properties.load(inputStream);
        } catch (IOException e) {
            e.printStackTrace();
        }
        query.setUsername(properties.getProperty("jdbc.username"));
        query.setPassword(properties.getProperty("jdbc.password"));
        query.setQuery("SELECT code FROM tests WHERE qid = " + qid + ";");
        Instances data = query.retrieveInstances();

        StringToWordVector filter = new StringToWordVector();
        filter.setInputFormat(data);
        filter.setWordsToKeep(1000);
        filter.setIDFTransform(true);
        filter.setOutputWordCounts(true);
        Instances dataFiltered = Filter.useFilter(data, filter);

        SimpleKMeans skm = new SimpleKMeans();
        skm.setDisplayStdDevs(false);
        skm.setDistanceFunction(new EuclideanDistance());
        skm.setMaxIterations(500);
        skm.setDontReplaceMissingValues(true);
        skm.setNumClusters(3);
        skm.setPreserveInstancesOrder(false);
        skm.setSeed(100);

        skm.buildClusterer(dataFiltered);
        ClusterEvaluation eval = new ClusterEvaluation();
        eval.setClusterer(skm);
        eval.evaluateClusterer(dataFiltered);

        StringBuilder builder = new StringBuilder();
        for (int i = 0; i < dataFiltered.numInstances(); i++) {
            builder.append("用户ID:" + testInfos[i].getUid() + ",提交时间:" + testInfos[i].getSubmitTime() + ",在聚类编号 " + skm.clusterInstance(dataFiltered.instance(i)) + " 中。\n");
        }
        return builder.toString();
    }

}

 

推荐阅读