首页 > 解决方案 > 如何修复:“唯一错误 >= 10:比较 (5) 仅适用于原子类型和列表类型”

问题描述

我想在训练和测试上划分我的数据。

我计算了百分位数,我想为数值变量选择唯一值

column <- c("NAME", "COUNTRY_CODE", "REGION_COUNTRY", "NACE_REV_2", 
            "CATEGORY_COMPANY", "TOTAL_ASSETS", "NET_CURRENT_ASSETS", 
            "FIXED_ASSETS", "NET_INCOME", "CAPITAL",  "WORKING_CAPITAL", 
            "LOANS", "CURRENT_RATIO", "ROA", "EBIT")

data <- PL_ALL[, column]

nums <- sapply(data, is.numeric) 

data.n <- data[, nums]

fact <- sapply(data, is.factor)

data.f <- data[, fact]

percentile <- apply(X=data.n, MARGIN=2, FUN=function(x) 
  round(quantile(x, seq(0.1, 1, 0.1), na.rm=TRUE), 2))

#There is ok

numeric <- colnames(data.n[which(unique >= 10)])

num_as_fact <- colnames(data.n[which(unique < 10 & unique > 1)])

排队: numeric<-colnames(data.n[which(unique>=10)]) 我有一个问题:

Error in unique >= 10 :comparison (5) is possible only for atomic and list types

并且类似:

num_as_fact<-colnames(data.n[which(unique<10 & unique>1 )])我有个问题:

Error in unique < 10 : comparison (3) is possible only for atomic and list types

我的数据:

dput(head(baza.n,15))
    structure(list(TOTAL_ASSETS = c(8L, 11L, 11L, 15L, 16L, 17L, 
17L, 18L, 21L, 22L, 25L, 28L, 28L, 29L, 32L), NET_CURRENT_ASSETS = c(-222L, 
-275L, -1281L, -353L, -97L, -48L, -16L, -8L, -70L, -642L, -375L, 
-236L, -50L, -476L, -1845L), FIXED_ASSETS = c(0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 21L, 0L, 0L, 1L, 0L), NET_INCOME = c(-1L, 
-1L, -2L, -1L, -9L, 0L, 0L, 0L, 0L, -8L, -1L, -1L, 5L, -23L, 
-11L), CAPITAL = c(-210L, -274L, -1281L, 26L, 21L, -72L, -22L, 
-11L, 51L, 129L, -393L, 0L, 3L, -507L, 192L), WORKING_CAPITAL = c(-168L, 
-248L, -987L, -275L, -22L, -23L, -7L, -2L, -1L, -414L, -342L, 
-123L, -55L, -478L, -660L), LOANS = c(30L, 0L, 106L, 30L, 8L, 
25L, 0L, 0L, 0L, 89L, 1L, 83L, 5L, 0L, 671L), CURRENT_RATIO = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), ROA = c(-13L, 
-9L, -14L, -3L, -56L, 1L, -1L, -1L, -1L, -37L, -5L, -4L, 16L, 
-79L, -31L), EBIT = c(-1L, -1L, 2L, -1L, -9L, 0L, 0L, 0L, 0L, 
-12L, -1L, -1L, -4L, -23L, -11L), WC.TA = c(-21, -22.55, -89.73, 
-18.33, -1.38, -1.35, -0.41, -0.11, -0.05, -18.82, -13.68, -4.39, 
-1.96, -16.48, -20.62), EBIT.TA = c(-0.12, -0.09, 0.18, -0.07, 
-0.56, 0, 0, 0, 0, -0.55, -0.04, -0.04, -0.14, -0.79, -0.34)), row.names = c(NA, 
15L), class = "data.frame")

标签: r

解决方案


推荐阅读