- BFGS, LBFGS: Anders Skajaa.Limited Memory BFGS for Nonsmooth Optimization.2010
- LBFGS:J. Nocedal and S. Wright. Numerical Optimization. Springer, 2nd edition, 2006.
- OWLQN:Galen Andrew and Jianfeng Gao.Scalable Training of L1-Regularized Log-Linear Models.2007
附带的代码是Galen Andrew and Jianfeng Gao. 2007论文中提供的。在这里对代码添加了比较详细的注释,详细代码参见目录中的其它文件。其中对虚梯度部分计算的注释如下:
//OWLQN
//计算下降方向dir(参数的一阶梯度,虚梯度的负方向)
void OptimizerState::MakeSteepestDescDir() {
if (l1weight == 0) {
//l1正则化项权值为0时,查找方向dir为损失函数梯度的负方向
scaleInto(dir, grad, -1);
} else {
//l1正则化项权值不为0时,根据损失函数的梯度和l1正则化项权值来确定查找方向
for (size_t i=0; i<dim; i++) {
if (x[i] < 0) {
//xi<0时,|xi| = - xi,l1处的倒数为-l1weight,下降方向为梯度的反方向
dir[i] = -grad[i] + l1weight;
} else if (x[i] > 0) {
//xi>0时,|xi| = xi,l1处的倒数为l1weight,下降方向为梯度的反方向
dir[i] = -grad[i] - l1weight;
} else {//xi == 0
if (grad[i] < -l1weight) {
//xi == 0,右导grad[i] + l1weight < 0,虚梯度取右导,下降方向为虚梯度的反方向,dir[i] > 0,偏向正象限
dir[i] = -grad[i] - l1weight;
} else if (grad[i] > l1weight) {
//xi == 0,左导grad[i] - l1weight > 0,虚梯度取左导,下降方向为虚梯度的反方向,dir[i] < 0,偏向负象限
dir[i] = -grad[i] + l1weight;
} else {
//xi == 0,左右导数都为0,下降方向为0
dir[i] = 0;
}
}
}
}
//记录当前的最速下降方向
steepestDescDir = dir;
}