首页 > 解决方案 > 通过 SQL Server 2017 使用 R 进行回归分析

问题描述

我想通过 SQL Server 2017 使用 R 代码执行回归分析(它集成在这里)。

这是使用 csv 的本机 R 代码

我们按 [CustomerName]+[ItemRelation]+[DocumentNum]+[DocumentYear] 分组分别执行回归的代码的主要内容

df=read.csv("C:/Users/synthex/Desktop/re.csv", sep=";",dec=",")

#load needed library

library(tidyverse)
library(broom)
#order dataset
df=df[ order(df[,5]),]
df=df[ order(df[,6]),]
#delete signs
df$Customer<-gsub("\\-","",df$Customer)

#create lm function for separately by group regression

my_lm <- function(df) {
  lm(SaleCount~IsPromo, data = df)
}

reg=df %>%
  group_by(CustomerName,ItemRelation,DocumentNum,DocumentYear) %>%
  nest() %>%
  mutate(fit = map(data, my_lm),
         tidy = map(fit, tidy)) %>%
  select(-fit, - data) %>%
  unnest()

w=aggregate(df$action, by=list(CustomerName=df$CustomerName,ItemRelation=df$ItemRelation, DocumentNum=df$DocumentNum, DocumentYear=df$DocumentYear), FUN=sum)
View(w)

# multiply each group by the number of days of the action
EA<-data.frame(reg$CustomerName,reg$ItemRelation,reg$DocumentNum,reg$DocumentYear, reg$estimate*w$x)

#del intercepts
toDelete <- seq(2, nrow(EA), 2)
newdat=EA[ toDelete ,]
View(newdat)

完成的结果:此代码在 SSMS 中运行

所以我做了什么:

EXECUTE sp_execute_external_script
      @language = N'R'
    , @script = N' OutputDataSet <- InputDataSet;'
    , @input_data_1 = N' SELECT     [CustomerName]
      ,[ItemRelation]
      ,[SaleCount]
      ,[DocumentNum]
      ,[DocumentYear]
      ,[IsPromo]
 FROM [Action].[dbo].[promo_data];'

    WITH RESULT SETS (([CustomerName] nvarchar(max) NOT NULL, [ItemRelation] int NOT NULL,
     [SaleCount] int NOT NULL,[DocumentNum]  int NOT NULL,
    [DocumentYear] int NOT NULL, [IsPromo] int NOT NULL));


df=as.data.frame(InputDataSet)

Message 102, level 15, state 1, line 17
Incorrect syntax near the "=" construct.

那么,如何在 SQL 中按组分别进行回归分析呢?

注意,所有的系数都必须保存,因为新的数据来到sql,应该已经通过每个组的构建模型的方程自动计算了。

上面的代码只是简单地估计了动作的影响,每组的贝塔系数乘以每组动作的天数。

如果需要,这是一个可重现的示例:

df=structure(list(CustomerName = structure(c(1L, 2L, 3L, 3L, 1L, 
2L, 3L, 3L, 4L, 4L, 4L, 1L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 1L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
1L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 
2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L), .Label = c("Attacks of the vehicle", 
"Auchan TS", "Tape of the vehicle", "X5 Retail Group"), class = "factor"), 
    ItemRelation = c(13322L, 13322L, 158121L, 158122L, 13322L, 
    13322L, 158121L, 158122L, 11592L, 13189L, 13191L, 13322L, 
    13322L, 158121L, 158122L, 11592L, 13189L, 13191L, 158121L, 
    158121L, 158122L, 158122L, 13322L, 13322L, 158121L, 158122L, 
    11592L, 13189L, 13191L, 157186L, 157192L, 158009L, 158010L, 
    158121L, 158121L, 158122L, 158122L, 13322L, 13322L, 158121L, 
    158122L, 11592L, 13189L, 13191L, 157186L, 157192L, 158009L, 
    158010L, 158121L, 158121L, 158122L, 158122L, 13322L, 13322L, 
    158121L, 158122L, 11514L, 11592L, 11623L, 13189L, 13191L), 
    SaleCount = c(10L, 35L, 340L, 260L, 3L, 31L, 420L, 380L, 
    45L, 135L, 852L, 1L, 34L, 360L, 140L, 14L, 62L, 501L, 0L, 
    560L, 640L, 0L, 0L, 16L, 0L, 0L, 15L, 66L, 542L, 49L, 228L, 
    3360L, 5720L, 980L, 0L, 0L, 1280L, 9L, 29L, 200L, 120L, 46L, 
    68L, 569L, 52L, 250L, 2360L, 3140L, 1640L, 0L, 0L, 1820L, 
    5L, 33L, 260L, 220L, 665L, 25L, -10L, 62L, 281L), DocumentNum = c(36L, 
    4L, 41L, 41L, 36L, 4L, 41L, 41L, 33L, 33L, 33L, 36L, 4L, 
    41L, 41L, 33L, 33L, 33L, 63L, 62L, 62L, 63L, 36L, 4L, 41L, 
    41L, 33L, 33L, 33L, 57L, 56L, 12L, 12L, 62L, 63L, 63L, 62L, 
    36L, 4L, 41L, 41L, 33L, 33L, 33L, 57L, 56L, 12L, 12L, 62L, 
    63L, 63L, 62L, 36L, 4L, 41L, 41L, 60L, 33L, 71L, 33L, 33L
    ), DocumentYear = c(2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L), IsPromo = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("CustomerName", "ItemRelation", 
"SaleCount", "DocumentNum", "DocumentYear", "IsPromo"), class = "data.frame", row.names = c(NA, 
-61L))

标签: rssmsdata-sciencesql-server-2017

解决方案


推荐阅读