r - 通过 SQL Server 2017 使用 R 进行回归分析
问题描述
我想通过 SQL Server 2017 使用 R 代码执行回归分析(它集成在这里)。
这是使用 csv 的本机 R 代码
我们按 [CustomerName]+[ItemRelation]+[DocumentNum]+[DocumentYear] 分组分别执行回归的代码的主要内容
df=read.csv("C:/Users/synthex/Desktop/re.csv", sep=";",dec=",")
#load needed library
library(tidyverse)
library(broom)
#order dataset
df=df[ order(df[,5]),]
df=df[ order(df[,6]),]
#delete signs
df$Customer<-gsub("\\-","",df$Customer)
#create lm function for separately by group regression
my_lm <- function(df) {
lm(SaleCount~IsPromo, data = df)
}
reg=df %>%
group_by(CustomerName,ItemRelation,DocumentNum,DocumentYear) %>%
nest() %>%
mutate(fit = map(data, my_lm),
tidy = map(fit, tidy)) %>%
select(-fit, - data) %>%
unnest()
w=aggregate(df$action, by=list(CustomerName=df$CustomerName,ItemRelation=df$ItemRelation, DocumentNum=df$DocumentNum, DocumentYear=df$DocumentYear), FUN=sum)
View(w)
# multiply each group by the number of days of the action
EA<-data.frame(reg$CustomerName,reg$ItemRelation,reg$DocumentNum,reg$DocumentYear, reg$estimate*w$x)
#del intercepts
toDelete <- seq(2, nrow(EA), 2)
newdat=EA[ toDelete ,]
View(newdat)
完成的结果:此代码在 SSMS 中运行
所以我做了什么:
EXECUTE sp_execute_external_script
@language = N'R'
, @script = N' OutputDataSet <- InputDataSet;'
, @input_data_1 = N' SELECT [CustomerName]
,[ItemRelation]
,[SaleCount]
,[DocumentNum]
,[DocumentYear]
,[IsPromo]
FROM [Action].[dbo].[promo_data];'
WITH RESULT SETS (([CustomerName] nvarchar(max) NOT NULL, [ItemRelation] int NOT NULL,
[SaleCount] int NOT NULL,[DocumentNum] int NOT NULL,
[DocumentYear] int NOT NULL, [IsPromo] int NOT NULL));
df=as.data.frame(InputDataSet)
Message 102, level 15, state 1, line 17
Incorrect syntax near the "=" construct.
那么,如何在 SQL 中按组分别进行回归分析呢?
注意,所有的系数都必须保存,因为新的数据来到sql,应该已经通过每个组的构建模型的方程自动计算了。
上面的代码只是简单地估计了动作的影响,每组的贝塔系数乘以每组动作的天数。
如果需要,这是一个可重现的示例:
df=structure(list(CustomerName = structure(c(1L, 2L, 3L, 3L, 1L,
2L, 3L, 3L, 4L, 4L, 4L, 1L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
1L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L,
2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L), .Label = c("Attacks of the vehicle",
"Auchan TS", "Tape of the vehicle", "X5 Retail Group"), class = "factor"),
ItemRelation = c(13322L, 13322L, 158121L, 158122L, 13322L,
13322L, 158121L, 158122L, 11592L, 13189L, 13191L, 13322L,
13322L, 158121L, 158122L, 11592L, 13189L, 13191L, 158121L,
158121L, 158122L, 158122L, 13322L, 13322L, 158121L, 158122L,
11592L, 13189L, 13191L, 157186L, 157192L, 158009L, 158010L,
158121L, 158121L, 158122L, 158122L, 13322L, 13322L, 158121L,
158122L, 11592L, 13189L, 13191L, 157186L, 157192L, 158009L,
158010L, 158121L, 158121L, 158122L, 158122L, 13322L, 13322L,
158121L, 158122L, 11514L, 11592L, 11623L, 13189L, 13191L),
SaleCount = c(10L, 35L, 340L, 260L, 3L, 31L, 420L, 380L,
45L, 135L, 852L, 1L, 34L, 360L, 140L, 14L, 62L, 501L, 0L,
560L, 640L, 0L, 0L, 16L, 0L, 0L, 15L, 66L, 542L, 49L, 228L,
3360L, 5720L, 980L, 0L, 0L, 1280L, 9L, 29L, 200L, 120L, 46L,
68L, 569L, 52L, 250L, 2360L, 3140L, 1640L, 0L, 0L, 1820L,
5L, 33L, 260L, 220L, 665L, 25L, -10L, 62L, 281L), DocumentNum = c(36L,
4L, 41L, 41L, 36L, 4L, 41L, 41L, 33L, 33L, 33L, 36L, 4L,
41L, 41L, 33L, 33L, 33L, 63L, 62L, 62L, 63L, 36L, 4L, 41L,
41L, 33L, 33L, 33L, 57L, 56L, 12L, 12L, 62L, 63L, 63L, 62L,
36L, 4L, 41L, 41L, 33L, 33L, 33L, 57L, 56L, 12L, 12L, 62L,
63L, 63L, 62L, 36L, 4L, 41L, 41L, 60L, 33L, 71L, 33L, 33L
), DocumentYear = c(2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L), IsPromo = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("CustomerName", "ItemRelation",
"SaleCount", "DocumentNum", "DocumentYear", "IsPromo"), class = "data.frame", row.names = c(NA,
-61L))
解决方案
推荐阅读
- python - 查找最大空闲时间块
- javascript - Liferay 7 上的 Portlet 部署问题
- c - Binary Bomb Phase 5 - 寻找两个整数作为输入
- php - 如何使用 api 在 Dynamics crm 中设置leadsource?
- html - 如何将此行与 flexbox 对齐?
- python - NameError:使用 input() 时未定义名称“Daniel”
- javascript - 如何从 Node JS 中的数组中删除某些电子邮件地址
- sql - 如何让已经运行的 PL SQL 包运行得更快?
- ios - UI 在 tableview reloadData() 上冻结
- c# - 将文本转换为 C# 数组