首页 > 解决方案 > 如何在数据集 A 的月份 = 数据集 B 的上个月的条件下映射两个数据集 A 和 B

问题描述

有两个数据集——“Open_raw_data”和“exchangeRate”。在 Open_raw_data 我想获取当前日期列和货币列的汇率基础。但是,在映射必须与上个月的 exchangeRate 进行映射的情况下。

数据

Open_raw_data <- structure(list(Current_date = structure(c(3L, 1L, 2L), .Label = c("11-May-20", 
"15-Jun-20", "27-Apr-20"), class = "factor"), INV_AMOUNT = c(7269.11, 
2367.51, 12694.02), CURRENCY = structure(c(1L, 1L, 1L), .Label = "EUR", class = "factor")), class = "data.frame", row.names = c(NA, 
-3L))

exchange_rate <- structure(list(Currency = structure(c(8L, 8L, 8L, 8L, 7L, 7L, 
7L, 7L, 13L, 13L, 13L, 13L, 20L, 20L, 20L, 20L, 5L, 5L, 5L, 5L, 
16L, 16L, 16L, 16L, 3L, 3L, 3L, 3L, 17L, 17L, 17L, 17L, 2L, 2L, 
2L, 2L, 15L, 15L, 15L, 15L, 4L, 4L, 4L, 4L, 8L, 7L, 13L, 20L, 
5L, 16L, 3L, 17L, 2L, 15L, 4L, 11L, 11L, 11L, 11L, 11L, 8L, 7L, 
13L, 20L, 5L, 16L, 3L, 17L, 2L, 15L, 4L, 11L, 14L, 14L, 14L, 
14L, 14L, 14L, 18L, 18L, 18L, 18L, 18L, 18L, 10L, 10L, 10L, 10L, 
10L, 10L, 12L, 12L, 12L, 12L, 12L, 12L, 21L, 21L, 21L, 21L, 21L, 
21L, 9L, 9L, 9L, 9L, 9L, 9L, 19L, 19L, 19L, 19L, 19L, 19L, 1L, 
1L, 1L, 1L, 1L, 1L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 11L, 13L, 15L, 16L, 17L, 20L, 14L, 18L, 10L, 12L, 
21L, 9L, 19L), .Label = c("AUD", "CNY", "CZK", "EUR", "GBP", 
"HKD", "IDR", "INR", "KZT", "MAD", "MXN", "NGN", "PLN", "RON", 
"RUB", "SEK", "SGD", "TRY", "UAH", "USD", "VND"), class = "factor"), 
    Year = c(2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L), Month = c(1L, 2L, 3L, 
    4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
    3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
    2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 2L, 3L, 4L, 5L, 6L, 6L, 6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 
    1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 
    4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 
    1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 
    4L, 5L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
    7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), Exchnage_Rate = c(0.01266, 
    0.01256, 0.01204, 0.01224, 6.6e-05, 6.4e-05, 5.5e-05, 6.16e-05, 
    0.23288, 0.23283, 0.2197, 0.22014, 0.90671, 0.90721, 0.91044, 
    0.91894, 1.18646, 1.16307, 1.12671, 1.14633, 0.09401, 0.09444, 
    0.09037, 0.09348, 0.0396, 0.0331, 0.0366, 0.03691, 0.66551, 
    0.65106, 0.63837, 0.65294, 0.13068, 0.12975, 0.12894, 0.13019, 
    0.01434, 0.01357, 0.0114, 0.01259, 1, 1, 1, 1, 0.01192, 6.1e-05, 
    0.22483, 0.90058, 1.11213, 0.09536, 0.03714, 0.63709, 0.12622, 
    0.01283, 1, 0.04824, 0.04625, 0.03803, 0.03885, 0.04053, 
    0.01192, 6.1e-05, 0.22483, 0.90058, 1.11213, 0.09536, 0.03714, 
    0.63709, 0.12622, 0.01283, 1, 0.03859, 0.20928, 0.20821, 
    0.207, 0.20657, 0.20638, 0.20652, 0.15065, 0.14644, 0.14303, 
    0.13266, 0.13203, 0.1297, 0.09419, 0.09447, 0.09344, 0.09283, 
    0.09197, 0.0919, 0.0025, 0.00253, 0.00233, 0.00237, 0.00232, 
    0.00232, 3.91e-05, 3.97e-05, 3.86e-05, 3.91e-05, 3.86e-05, 
    3.83e-05, 0.0024, 0.00237, 0.00204, 0.00214, 0.00221, 0.00222, 
    0.03693, 0.03708, 0.03263, 0.03406, 0.03341, 0.03325, 0.61695, 
    0.59838, 0.55659, 0.60261, 0.60036, 0.6116, 0.1168, 0.11828, 
    0.11845, 0.11863, 0.11616, 0.11474, 0.6114, 0.12609, 0.03737, 
    1, 1.09587, 0.11484, 6.3e-05, 0.0118, 0.03859, 0.22492, 0.01251, 
    0.0954, 0.63841, 0.89127, 0.20652, 0.1297, 0.0919, 0.00232, 
    3.83e-05, 0.00222, 0.03325)), class = "data.frame", row.names = c(NA, 
-147L))

我试过的代码:

i1 <- as.numeric(format(open_data$Current_date, '%Y'))
i2 <- as.numeric(format(open_data$Current_date, '%m'))

exchangeRate <- exchangeRate %>% 
  filter(Month == i2 - 1 & Year == i1)

Open_raw_data_f1 <- sqldf("select Open_raw_data.*,exchangeRate.Exchnage_Rate as Exchnage_Rate from Open_raw_data  join
                        exchangeRate on Open_raw_data.Currency =exchangeRate.Currency")

标签: r

解决方案


尝试这个

library(dplyr)
library(lubridate)

exchange_rate %>%
  mutate(Date = make_date(Year, Month), .keep = "unused") %>% 
  right_join(Open_raw_data %>% mutate(
               Current_date = dmy(Current_date),
               Prev_date = rollback(Current_date, roll_to_first = T) - months(1)),
             by = c("Date" = "Prev_date", "Currency" = "CURRENCY")) %>%
  select(-Date)

#   Currency Exchnage_Rate Current_date INV_AMOUNT
# 1      EUR             1   2020-04-27    7269.11
# 2      EUR             1   2020-05-11    2367.51
# 3      EUR             1   2020-06-15   12694.02

推荐阅读