hmatsu1226/HyPhyTree

scope wrong for example scripts

Opened this issue · 5 comments

It appears that certain objects R not passed to functions in the example scripts, resulting in errors. For example:

Poincare_dist <- function(u,v){
    if(sum(u**2)>=1 || sum(v**2)>=1){
        return(Inf)
    }else{
        return(acosh(1+2*(sum((u-v)**2))/(1-sum(u**2))/(1-sum(v**2))))
    }
}

objfn_H1 <- function(tmpz){
    ret <- 0
    for(i in 1:Nleaves){
        a <- Poincare_dist(Z1[i,], tmpz)
        ret <- ret + (a - Dall[i,Nleaves+n])**2
    }
    return(ret)
}

objfn_H2 <- function(tmpz){
    ret <- 0
    for(i in 1:Nleaves){
        a <- Poincare_dist(Z2[i,], tmpz)
        ret <- ret + (log(cosh(a)) - Dall[i,Nleaves+n])**2
    }
    return(ret)
}

objfn_MDS <- function(tmpz){
    ret <- 0
    for(i in 1:Nleaves){
        a <- sqrt(sum((X1.mds$points[i,]-tmpz)**2))
        ret <- ret + (a - Dall[i,Nleaves+n])**2
    }
    return(ret)
}

embed_tree = function(tree, Ms=c(10,20)){
    Nleaves <- length(tree$tip.label)
    
    tree$mrca <- mrca(tree)
    Dall <- dist.nodes(tree)
    Dleaves <- cophenetic(tree)

    Nall <- dim(Dall)[1]
    X1 <- Dleaves
    X2 <- acosh(exp(Dleaves))

    MSE_H1 <- rep(0, length(Ms))
    MSE_H2 <- rep(0, length(Ms))
    MSE_MDS <- rep(0, length(Ms))

    for(M in Ms){
        #general hyperbolic embeddings
        X1.hydra <- hydraPlus(X1, dim=M, curvature=1, alpha=1, equi.adj=0, control=list(return.dist=1, isotropic.adj=FALSE))
        Z1 <- X1.hydra$r * X1.hydra$directional

        Z1inter <- matrix(0,Nall - Nleaves,M)
        for(n in 1:(Nall-Nleaves)){
            tmp <- optim(rep(0,M),objfn_H1,gr=NULL,method="BFGS")
            Z1inter[n,] <- tmp$par
        }

        Z1inter_dist <- matrix(0, Nall, Nall)
        for(i in 1:Nall){
            for(j in 1:Nall){
                if(i > Nleaves){
                    tmpz1 <- Z1inter[i-Nleaves,]
                }else{
                    tmpz1 <- Z1[i,]
                }

                if(j > Nleaves){
                    tmpz2 <- Z1inter[j-Nleaves,]
                }else{
                    tmpz2 <- Z1[j,]
                }

                Z1inter_dist[i,j] <- Poincare_dist(tmpz1, tmpz2)
            }
        }

        #our hyperbolic embeddings
        X2.hydra <- hydraPlus(X2, dim=M, curvature=1, alpha=1, equi.adj=0, control=list(return.dist=1, isotropic.adj=FALSE))
        Z2 <- X2.hydra$r * X2.hydra$directional

        Z2inter <- matrix(0,Nall-Nleaves,M)
        for(n in 1:(Nall-Nleaves)){
            tmp <- optim(rep(0,M),objfn_H2,gr=NULL,method="BFGS")
            Z2inter[n,] <- tmp$par
        }

        Z2inter_dist <- matrix(0, Nall, Nall)
        for(i in 1:Nall){
            for(j in 1:Nall){
                if(i > Nleaves){
                    tmpz1 <- Z2inter[i-Nleaves,]
                }else{
                    tmpz1 <- Z2[i,]
                }

                if(j > Nleaves){
                    tmpz2 <- Z2inter[j-Nleaves,]
                }else{
                    tmpz2 <- Z2[j,]
                }

                Z2inter_dist[i,j] <- Poincare_dist(tmpz1, tmpz2)
            }
        }

        #Euclidean embeddings
        X1.mds <- sammon(X1, k=M)

        Zmdsinter <- matrix(0,Nall-Nleaves,M)
        for(n in 1:(Nall-Nleaves)){
            tmp <- optim(rep(0,M),objfn_MDS,gr=NULL,method="BFGS")
            Zmdsinter[n,] <- tmp$par
        }

        Zmdsinter_dist <- matrix(0, Nall, Nall)
        for(i in 1:Nall){
            for(j in 1:Nall){
                if(i > Nleaves){
                    tmpz1 <- Zmdsinter[i-Nleaves,]
                }else{
                    tmpz1 <- X1.mds$points[i,]
                }

                if(j > Nleaves){
                    tmpz2 <- Zmdsinter[j-Nleaves,]
                }else{
                    tmpz2 <- X1.mds$points[j,]
                }

                Zmdsinter_dist[i,j] <- sqrt(sum((tmpz1-tmpz2)**2))
            }
        }

        #MSE
        MSE_H1[which(Ms==M)] <- sum((c(Dall[upper.tri(Dall)])-c(Z1inter_dist[upper.tri(Dall)]))**2) / choose(Nall,2)
        MSE_H2[which(Ms==M)] <- sum((c(Dall[upper.tri(Dall)])-c(log(cosh(Z2inter_dist[upper.tri(Dall)]))))**2) / choose(Nall,2)
        MSE_MDS[which(Ms==M)] <- sum((c(Dall[upper.tri(Dall)])-c(Zmdsinter_dist[upper.tri(Dall)]))**2) / choose(Nall,2)
    }

    output <- rbind(MSE_MDS, MSE_H1, MSE_H2)
    rownames(output) <- c("MDS","H1","H2")
    colnames(output) <- Ms
    #write.csv(output, paste("out/MSE_Dint_",nt,".csv",sep=""))
}

embed_tree(ape::rtree(10))

produces the output:

iter   10 value 0.021336
iter   20 value 0.001870
iter   30 value 0.000279
iter   40 value 0.000080
iter   50 value 0.000034
iter   60 value 0.000019
iter   70 value 0.000013
iter   80 value 0.000009
iter   90 value 0.000006
iter  100 value 0.000004
iter  110 value 0.000003
iter  120 value 0.000002
iter  130 value 0.000001
iter  140 value 0.000001
iter  150 value 0.000000
iter  160 value 0.000000
iter  170 value 0.000000
final  value 0.000000 
converged
Error in Poincare_dist(Z1[i, ], tmpz): object 'Z1' not found
Traceback:

1. embed_tree(ape::rtree(10))
2. optim(rep(0, M), objfn_H1, gr = NULL, method = "BFGS")   # at line 58 of file <text>
3. (function (par) 
 . fn(par, ...))(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0))
4. fn(par, ...)
5. Poincare_dist(Z1[i, ], tmpz)   # at line 12 of file <text>

Z1 isn't actually passed to objfn_H1() in optim(rep(0,M),objfn_MDS,gr=NULL,method="BFGS")

Also, why hard-code Nleaves instead of getting it from the tree object via length(tree$tip.label)?

I think hydraPlus is not working well.
The hydraPlus embedd the distance matrix into M dimensional space (now M=10), but M=10 is too large compared to the number of leaves in the tree you are giving, rtree(10).
I think this may be the cause of error, so could you please check with M=3 to try?

I investigated trees with outgroup in some situations, and Nleaves is length(tree$tip.label)-1 in this situation.
The reason of hard-code Nleaves is a remnant of that. So, as pointed out, it would have been better to use length(tree$tip.label) in this case.

Yeah, the number of dimensions was too high, but the same error was generated with less dimensions. Passing Z1, Nleaves, and n to the intended function through optim() fixed the issue

Sorry, I misread the situation.
In the original script, those values are treated as global variables. It's not a complimentary implementation, but it's in the process of being improved through trial and error. I'm glad you were able to fix the error.