首页 > 解决方案 > 如何为这些数据创建循环?

问题描述

我遇到的问题是这样的:我有使用 R 的经验,但不幸的是循环不是我的强项。我想创建一个缩短以下内容的循环:

library(wbstats)

enrg_cons = wb(country = "all", indicator = "EG.USE.PCAP.KG.OE")
gdp = wb(country = "all", indicator = "NY.GDP.PCAP.CD")


#Separating members of the OECD
#Australia
enrg_cons_AUS = enrg_cons[which(enrg_cons$iso3c == "AUS" & enrg_cons$date >=1995 & enrg_cons$date <=2014), 
                          names(enrg_cons) %in% c("date", "value", "country")]
gdp_AUS = gdp[which(gdp$iso3c == "AUS" & gdp$date >=1995 & gdp$date<=2014), names(gdp) %in% c("date", "value", "country")]
#Austria
enrg_cons_AUT = enrg_cons[which(enrg_cons$iso3c == "AUT" & enrg_cons$date >=1995 & enrg_cons$date <=2014), 
                          names(enrg_cons) %in% c("date", "value", "country")]
gdp_AUT = gdp[which(gdp$iso3c == "AUT" & gdp$date >=1995 & gdp$date<=2014), names(gdp) %in% c("date", "value", "country")]
#Belgium
enrg_cons_BEL = enrg_cons[which(enrg_cons$iso3c == "BEL" & enrg_cons$date >=1995 & enrg_cons$date <=2014), 
                          names(enrg_cons) %in% c("date", "value", "country")]
gdp_BEL = gdp[which(gdp$iso3c == "BEL" & gdp$date >=1995 & gdp$date<=2014), names(gdp) %in% c("date", "value", "country")]
#Canada
enrg_cons_CAN = enrg_cons[which(enrg_cons$iso3c == "CAN" & enrg_cons$date >=1995 & enrg_cons$date <=2014), 
                          names(enrg_cons) %in% c("date", "value", "country")] 

我想调查大约 20 个经合组织国家的 GDP 和能源消耗,我想创建一个很好的循环,从上面的代码中提取值,而不是为每个国家编写它。我还想为以下命令创建一个循环:

#Augmented Dickey-Fuller(ADF) test
adf.test(log(gdp_AUS$value), k = 0)$p.value; adf.test(diff(log(enrg_cons_AUS$value)), k = 0)
adf.test(log(gdp_AUT$value), k = 0)$p.value; adf.test(diff(log(enrg_cons_AUT$value)), k = 0)

再次针对我正在研究的所有国家/地区。我希望信息足够且可重复,如果没有,请告诉我,我会尽力改进它!先感谢您!

标签: r

解决方案


这主要是按组聚合。对于每个isoc,您都对汇总统计感兴趣。我们可以聚合这里使用

library(data.table)
setDT(gdp)
gdp[between(date, 1995, 2014) & iso3c != 'SOM', 
    adf.test(log(value), k = 0)$p.value,
    by = iso3c]

      iso3c         V1
     <char>      <num>
  1:    ARB 0.75501918
  2:    CSS 0.49522577
  3:    CEB 0.73116544
  4:    EAR 0.92497081
  5:    EAS 0.99000000
 ---                  
253:    VIR 0.97525907
254:    PSE 0.95374047
255:    YEM 0.05792444
256:    ZMB 0.95657239
257:    ZWE 0.79529898

现在你可能已经注意到这部分了iso3C != 'SOM'。存在一些错误,因为似乎没有足够的观察结果来进行计算。因此,更通用的方法是:

library(data.table)
setDT(gdp)
gdp[between(date, 1995, 2014), 
    if (.N > 5L) adf.test(log(value), k = 0)$p.value,
    by = iso3c]

setDT(enrg_cons)
enrg_cons[between(date, 1995, 2014),
          if (.N > 5L) adf.test(diff(log(value)), k = 0),
          by = iso3c]

数据和设置:

library(wbstats)
library(tseries)

enrg_cons = wb(country = "all", indicator = "EG.USE.PCAP.KG.OE")
gdp = wb(country = "all", indicator = "NY.GDP.PCAP.CD")

推荐阅读