Gamma family in manyglm
Closed this issue · 3 comments
Add support for the gamma family using the inverse link
-
update
manyglm.R
-
update
resampTest.h
-
update
glm.cpp
-
update
glmtest.cpp
-
update
Rinterface.cpp
-
update
manyglm.Rd
-
write some tests for manyglm
in resampTest.h
in the class definition of GammaGlm:
The following have been taken from the R
The shape parameter is fixed to 1 at the moment, mu is the rate
I have:
// Gamma regression y1 ~ GAMMA(n, mu)
// Using inverse Link
// these have been taken from looking at Gamma() unless otherwise stated
// See issue #53
class GammaGlm : public PoissonGlm {
public:
GammaGlm(const reg_Method *);
virtual ~GammaGlm();
double link(double mui) const { return 1 / MAX(mintol, mui); }
double linkinv(double etai) const { return 1 / MAX(mintol, etai); }
double varfunc(double mui) const { return mui * mui; } // mui^2
// TODO
// a is weight ?
// deviance residual
double devfunc(double yi, double mui, double a) const {
return -2 * (log(yi == 0 ? 1 : yi / MAX(mintol, mui)) -
(yi - mui) / MAX(mintol, mui));
}
// standard functions, but I am not sure if the parameterization is
double pdf(double yi, double mui, double a) const {
return Rf_dgamma(yi, n, mui, FALSE);
}
double cdf(double yi, double mui, double a) const {
return Rf_pgamma(yi, n, mui, TRUE, FALSE);
}
unsigned int cdfinv(double ui, double mui, double a) const {
return (unsigned int)Rf_qgamma(ui, n, mui, TRUE, FALSE);
}
unsigned int genRandist(double mui, double a) const {
return Rf_rgamma(n, mui);
}
// I think this might be the first derivative of the link funtion so in our
// case using the inverse link
double LinkDash(double mui) const { return -1 / MAX(mui * mui, mintol); }
// I am assuming that k = a, will need to discuss more
double llfunc(double yi, double mui, double a) const {
return 2 * (n - 1) * log(yi) - yi * mui + n * log(MAX(mintol, mui)) -
gsl_sf_lngamma(n);
}
// Weight function, I am not sure what this should be
double weifunc(double mui, double a) const { return 1; }
};
example code:
n <- 100; shape = 1;
gen <- function(rate) rgamma(n, shape, rate)
Y <- sapply(1:4, gen)
Y <- mvabund(Y)
#To fit a log-linear model assuming counts are poisson:
glm.spid <- manyglm(Y~ 1, family="gamma", show.warning = T)
output:
test-manyglm.RWarning: EstIRLS reached max iterations, may not converge in the 0-th variable (dev=nan, err=nan)!
Warning: EstIRLS reached max iterations, may not converge in the 1-th variable (dev=nan, err=nan)!
Warning: EstIRLS reached max iterations, may not converge in the 2-th variable (dev=nan, err=nan)!
Warning: EstIRLS reached max iterations, may not converge in the 3-th variable (dev=nan, err=nan)!
[,1] [,2] [,3] [,4]
[1,] NaN NaN NaN NaN
Something is not quite right.
Consulting with @dwarton he has said we are going to us the log link instead.
My sampTest.h
GammaGlm
class now looks like this
// Gamma regression y1 ~ GAMMA(shape = n, rate = shape / mui)
// Using inverse Link
// these have been taken from looking at Gamma() unless otherwise stated
// See issue #53
class GammaGlm : public PoissonGlm {
public:
GammaGlm(const reg_Method *);
virtual ~GammaGlm();
// we will use the log link inherited from PoissonGlm base class
// the functions below are using the inverse link function
// double link(double mui) const { return 1 / MAX(mintol, mui); }
// double linkinv(double etai) const { return 1 / MAX(mintol, etai); }
// double LinkDash(double mui) const { return -1 / MAX(mui * mui, mintol); }
// double weifunc(double mui, double a) const { return mui; }
// the variance as a function of the mean
// EX = shape / rate = mui, VarX = shape/(rate^2)
// Var (mui) = mui^2/shape
double varfunc(double mui, double a) const { return (mui * mui) / n; }
// deviance residual HELP I am not sure if this is right.
double devfunc(double yi, double mui, double a) const {
return -2 * (log(yi == 0 ? 1 : yi / MAX(mintol, mui)) -
(yi - mui) / MAX(mintol, mui));
}
// n = k
double llfunc(double yi, double mui, double a) const {
return 2 * ((n - 1) * log(yi) - (yi * n) / mui +
n * log(n * MAX(mintol, mui)) - gsl_sf_lngamma(n));
}
// standard functions, but I am not sure if the parameterization is right
// mui = shape / rate => rate = shape / mui,
double pdf(double yi, double mui, double a) const {
return Rf_dgamma(yi, n, n / MAX(mintol, mui), FALSE);
}
double cdf(double yi, double mui, double a) const {
return Rf_pgamma(yi, n, n / MAX(mintol, mui), TRUE, FALSE);
}
unsigned int cdfinv(double ui, double mui, double a) const {
return (unsigned int)Rf_qgamma(ui, n, n / MAX(mintol, mui), TRUE, FALSE);
}
unsigned int genRandist(double mui, double a) const {
return Rf_rgamma(n, n / MAX(mintol, mui));
}
};
Example
library(mvabund)
set.seed(100)
n <- 1000; shape = 1; rates <- 1:4
gen <- function(rate) rgamma(n, shape, rate)
Y <- sapply(rates, gen)
Y <- mvabund(Y)
# using the log link
gamma_glm <- manyglm(Y ~ 1, family="gamma", show.warning = T, k = shape)
(rate_estimates <- 1 / exp(coef(gamma_glm)))
Output
X1 X2 X3 X4
[1,] 1.001443 2.055378 3.190494 3.989842
However the summary method seems to fail in the same way as the inverse link did.
summary(gamma_glm)
EstIRLS reached max iterations, may not converge in the 0-th variable (dev=-649.7591, err=0.0000)!
Warning: EstIRLS reached max iterations, may not converge in the 1-th variable (dev=-1035.5353, err=0.0002)!
Warning: EstIRLS reached max iterations, may not converge in the 2-th variable (dev=-1531.4585, err=0.0001)!
Warning: EstIRLS reached max iterations, may not converge in the 3-th variable (dev=-1695.1861, err=0.0001)!
lda must be >= MAX(N,1): lda=0 N=0incX cannot be zero; is set to 0.BLAS error: Parameter number 7 passed to cblas_dtrsv had an invalid value
make: *** [test] Error 255
Any idea what is going wrong?
The call to regression
in Rinterface.cpp
seems to fail in the RtoGlmSmry
but not in the RtoGlm
one when the setup seems to be identical.
This has been resolved in pull request #56.