首页 > 解决方案 > 通过识别函数的瓶颈来改进速度函数

问题描述

我希望这个问题不会被认为是宽泛的。然而,我不确定如何以不同的方式提问:在他的 youtube 视频中,Ed Boone 用 R 介绍了 ABM。他编写了以下代码(仅更改了一些变量名)。代码在 1000 次观察中运行良好,但是当我放大观察时它变得非常慢。因此,我想提高功能的速度。我可以在每一行之后手动添加start.time_x <- Sys.time()等(看看什么需要很多时间),但我想知道是否有更好的方法来识别瓶颈,因为所有嵌套的 for 循环甚至都变得相当复杂(因为我还必须考虑每个代码运行多少次):

# ABM_Covid - II

start.time <- Sys.time()
Data_Generator <- function(nPop1, E0, I0) {
    # Create a population of susceptibles
    Data <- data.frame( AgentNo=1:nPop1,
                        State="Susceptible",
                        Mixing= runif(nPop1,0,1),
                        TimeE = 0,
                        TimeI = 0,
                        stringsAsFactors = FALSE)
    Data$State[1:E0] <- "Exposed"  # This just says that the first person is exposed, since the mixing is random anyway, this is not an issue (because the mixing of Exposed is random)
    Data$Time[1:E0] <- rbinom(E0, 13, 0.5) + 1 # Exposure up to 14 days
    Data$State[(E0+1):(E0+I0)] <- "Infected"
    Data$Time[(E0+1):(E0+I0)] <- rbinom(I0, 12, 0.5)
    return(Data)
}

ABM_Covid <- function(Data, parameters, runtime){
    nPop1 <- nrow(Data)
    # runtime <- 15
    Results <- data.frame(  Susceptible     = rep (0, runtime),
                            Exposed         = rep (0, runtime),
                            Infected        = rep (0, runtime),
                            Recovered       = rep (0, runtime),
                            Deaths          = rep (0, runtime))
    # Move people through time
    for (k in 1:runtime){
        # Moving people through time
        StateSusceptible <- (1:nPop1)[Data$State == "Susceptible"]
        StateSusceptible_or_Exposed <- (1:nPop1)[Data$State == "Susceptible" | Data$State == "Exposed"]
        for (i in StateSusceptible) {
            # Determine if they like to meet others
            Mix1 <- Data$Mixing[i]
            # How many agents will they meet? The plus one meets everybody meets somebody
            Meetings <- round(Mix1*parameters$MaxMix,0) + 1
            # Grab the agents they will meet
            People_met <- sample(StateSusceptible_or_Exposed, Meetings, replace=TRUE, prob = Data$Mixing[StateSusceptible_or_Exposed])
            for (j in 1:length(People_met)) {
                    # Grab who they will meet
                    Meetingsa <- Data[People_met[j], ]
                    # If exposed change State
                    if(Meetingsa$State== "Exposed") {
                        Urand1 <- runif(1,0,1)
                        if (Urand1 < parameters$S2E){
                            Data$State[i] <-  "Exposed"
                        }   
                    }
                }
            }
            # Grab those who have been exposed and increment
            StateE1 <- (1:nPop1)[Data$State== "Exposed"]
            Data$TimeE[StateE1] = Data$TimeE[StateE1] + 1
            StateE2 <- (1:nPop1)[Data$State== "Exposed" & Data$TimeE > 14]
            Data$State[StateE2] <- "Recovered"    
            # Grab those who could become sick
            StateE3 <- (1:nPop1)[Data$State== "Exposed" & Data$TimeE > 3]
            for (i in StateE3){
                Urand1 <- runif(1,0,1)
                # randomly assign whether they get sick or not
                if ( Urand1 < parameters$E2I ) {
                    Data$State[i] <- "Infected"
                }
            }
            # Update how long they have been sick
            StateI1 <- (1:nPop1)[Data$State== "Infected"]
            Data$TimeI[StateI1] = Data$TimeI[StateI1] + 1
            # Recovered bin
            StateI2 <- (1:nPop1)[Data$State== "Infected" & Data$TimeI > 14]
            Data$State[StateI2] <- "R"
            # Not recovered could potentially die
            StateI3 <- (1:nPop1)[Data$State== "Infected" & Data$TimeI < 15]
            Data$State[StateI3] <- ifelse(runif(length(StateI3), 0, 1 ) > parameters$I2D, "Infected", "Deaths")
            Results$Susceptible[k] <- length(Data$State[Data$State=="Susceptible"])
            Results$Exposed[k] <- length(Data$State[Data$State=="Exposed"])
            Results$Infected[k] <- length(Data$State[Data$State=="Infected"])
            Results$Recovered[k] <- length(Data$State[Data$State=="Recovered"])
            Results$Deaths[k] <- length(Data$State[Data$State=="Deaths"])
    }
    return(Results)
}

Data <- Data_Generator(1000, E0=5, I0=2)
parameters <- data.frame( MaxMix = 10,
                    S2E = 0.25,
                    E2I = 0.1,
                    I2D = 0.1)
Model1 <- ABM_Covid(Data, parameters, runtime=25)
plot(1:25, Model1$Susceptible, type="l", col="purple", ylim = c(0,1000))
lines(1:25, Model1$Exposed, type="l", col="orange")
lines(1:25, Model1$Infected, type="l", col="red")
lines(1:25, Model1$Recovered, type="l", col="seagreen")
lines(1:25, Model1$Deaths, type="l", col="black")
end.time <- Sys.time()
time.taken <- end.time - start.time

标签: rfunctionperformancefor-loop

解决方案


这是优化功能的一部分(状态更新)的一种方法。这样你就摆脱了i循环和j循环。

我不确定我是否理解算法的每一个细节。但也许你无论如何都可以使用这个解决方案的一些部分和想法。

update_State <- function(nmeeting, seed = 123) {
  set.seed(seed)
  x <- Data %>%
    filter(State %in% c("Susceptible", "Exposed")) %>%
    slice_sample(n = nmeeting) %>%
    filter(State == "Exposed") %>%
    summarise(prob = any(runif(n()) <= paramters$S2E)) %>%
    pull(prob)
  
  return(if (x) "Exposed" else "Susceptible")
  
}

Data %>% 
  filter(State %in% c("Susceptible", "Exposed")) %>%
  mutate(
    n_meetings =  round(Mixing*parameters$MaxMix,0) + 1,
    new_State = map(n_meetings, update_State) #this is the new State after all the Meetings have been made
  )

这个想法是检查给定的人一次会面的所有暴露的人。

这将在 4-5 秒内运行。


推荐阅读