首页 > 解决方案 > JAGS - pow 函数在带有标签切换的混合模型中无法正常工作

问题描述

我正在拟合一个混合模型来估计 3 个群体中每个群体的特征的平均值。我有一个标签转换问题,我正在尝试计算每个群体中每个基因型的观察到的个体数量和预期个体数量之间的距离,以重新标记群体集群。下面是一个可重现的例子。

由于某些原因,JAGS 无法正确计算距离的平方值。下面代码中对应的行是:pow(DistNumPerClust[k,j], 2))

因此,输出矩阵results$mean$dist不同于results$mean$DistNumPerClust^2后验计算的矩阵。有人知道解决这个问题的方法吗?

library(R2jags)
library(runjags)
library(dirmult)
set.seed(123)

############################
## Simulation of the data ## 
############################ 

npop=3
ngeno=2
freqbalance=1
nsamplesizeperpop <- 100
freqMLG <- t(rdirichlet(n=npop, alpha=rep(freqbalance, ngeno)))

samplesizegenoperpop <- sweep(freqMLG, 1, nsamplesizeperpop, "*") 

## Compute membership (probability that a genotype comes from pop 1, 2 or 3)
## Genotype as rows and populations as columns
membership <- sweep(freqMLG, 1, rowSums(freqMLG), "/")

# Parameters for simulations
nind=90
N = npop*nind # nb of observations

clust <- rep(1:npop, each=N/npop)

geno <- c()
for (i in 1:N){
  geno <- c(geno, sum(rmultinom(n=1, size=1, prob=freqMLG[, clust[i]])*1:ngeno))
}

numgeno <- as.numeric(table(geno))
## Multiply membership probabilities by sample size for each genotype
ExpNumPerClust <- sweep(membership, 1, numgeno, "*")

muOfClustsim <- c(1, 20, 50) # vector of population means
sigma <- 1.5 # residual sd
(tausim <- 1/(sigma*sigma)) # precision

# parameters are treated as data for the simulation step
data <- list(N=N, npop=npop, ngeno=ngeno, geno=geno, muOfClustsim=muOfClustsim, tausim=tausim, samplesizegenoperpop=samplesizegenoperpop)


## JAG model

txtstring <- "
data{
  # Likelihood:
  for (i in 1:N){
  ysim[i] ~ dnorm(eta[i], tausim) # tau is precision (1 / variance)
  eta[i] <- muOfClustsim[clust[i]]
  clust[i] ~ dcat( pClust[geno[i], 1:npop] )
  }
  for (k in 1:ngeno){
   pClust[k, 1:npop] ~ ddirch( samplesizegenoperpop[k,] )
  }
}

model{
fake <- 0
}
"

# Simulate with jags
out <- run.jags(txtstring, data = data, monitor=c("ysim"), sample=1, n.chains=1, summarise=FALSE)

# reformat the outputs
ysim <- coda::as.mcmc(out)[1:N]

## Estimation model
bayes.mod <- function(){

  # Likelihood:
  for (i in 1:N){
    ysim[i] ~ dnorm(eta[i], tau) # tau is precision (1 / variance)
    eta[i] <- beta[clust[i]]
    clust[i] ~ dcat( pClust[geno[i], 1:npop] )

  }
  for (k in 1:ngeno){
    ## pClust membership estimates 
   pClust[k, 1:npop] ~ ddirch( samplesizegenoperpop[k,] )
  }


    for (k in 1:ngeno){
      for (j in 1:npop){
        # problem of label switching: try to compute the distance between ObsNumPerClust and ExpNumPerClust (i.e. between observed and expected number of individuals of each genotype in each population)
        ObsNumPerClust[k,j] <- pClust[k, j] * numgeno[k] 
        DistNumPerClust[k,j] <- ObsNumPerClust[k,j] - ExpNumPerClust[k,j]
        dist[k,j] <- pow(DistNumPerClust[k,j], 2)
      }
    }


  # Priors
  beta ~ dmnorm(mu, sigma.inv)
  mu ~ dmnorm(m, V)
  sigma.inv ~  dwish(R, K)
  tau ~ dgamma(0.01, 0.01)
  # parameters transformations
  sig <- sqrt(1/ tau)
}

m = rep(1, npop)
V = diag(rep(0.01, npop))
R = diag(rep(0.1, npop))
K = npop

## Input variables
sim.dat.jags<-list("ysim","N","npop", "ngeno", "geno","m","V","R", "K", "samplesizegenoperpop","numgeno","ExpNumPerClust")

## Variables to monitor
bayes.mod.params <- c("beta","tau","sig","DistNumPerClust","dist")

## Starting values
init1 <- list(beta = c(0, 100, 1000), tau = 1)
bayes.mod.inits <-  list(init1)

## Run model
bayes.mod.fit<-jags(data = sim.dat.jags, inits = bayes.mod.inits, parameters.to.save = bayes.mod.params, n.chains=1, n.iter=101000, n.burnin=1000, n.thin=200, model.file = bayes.mod)

results <- print(bayes.mod.fit)

results$mean$dist
results$mean$DistNumPerClust^2

标签: powjagsmixture-model

解决方案


您似乎期望转换后的一组值的平均值与转换同一组值的平均值的结果相同。但事实并非如此——例如:

values <- c(1,2,3,6,8,20)
mean(values)^2
mean(values^2)

都不是一回事。

等效情况发生在您的模型中-您将 dist[k,j] 计算为 DistNumPerClust[k,j] 的平方,然后汇总为 dist 的平均值,并期望这与 DistNumPerClust 的平均值的平方相同[k,j]。或者在一个更简单的例子中:

library('runjags')

X <- 1:100
Y <- rnorm(length(X), 2*X + 10, 1)

model <- "model { 
for(i in 1 : N){ 
    Y[i] ~ dnorm(true.y[i], precision);
    true.y[i] <- (m * X[i]) + c
} 
m ~ dunif(-1000,1000)
c ~ dunif(-1000,1000) 
precision ~ dexp(1)
p2 <- precision^2

}"

data <- list(X=X, Y=Y, N=length(X))

results <- run.jags(model=model, monitor=c("m", "c", "precision", "p2"), 
data=data, n.chains=2)
results

更具体地说,这些不应该是相同的:

summary(results)['p2','Mean']
summary(results)['precision','Mean']^2

如果你想计算同样的东西,你可以提取完整的值链作为 MCMC 对象,并对这些进行转换:

p <- combine.mcmc(results,vars='precision')
p2 <- combine.mcmc(results,vars='p2')

mean(p^2)
mean(p2)

mean(p)
mean(sqrt(p2))

现在一切都是等价的。

马特


推荐阅读