prdwb/cuda-convnet

a small bug in NVMatrix::rightMult()

Opened this issue · 0 comments

The result would be incorrect if the target is same as the first operand. The 
target==this version would require this to be of column major. I modified it so 
that this requirement is no longer needed:

void NVMatrix::rightMult(const NVMatrix &b, float scaleAB, NVMatrix &target) 
const {
    assert(isContiguous() && b.isContiguous() && target.isContiguous());
//    assert(&target != &b);
    assert(_numCols == b.getNumRows());
    if(&target != this) {
        target.resize(_numRows, b.getNumCols());
        //target.setTrans(true); // default column major
    }
    assert(target.getNumRows() == _numRows);
    assert(target.getNumCols() == b.getNumCols());
    if(_numRows % 64 != 0 || _numCols % 64 != 0 || b.getNumCols() % 64 != 0) {
        WARN("Matrix dimensions not divisible by 64 -- cublasSgemm performance may suffer.");
    }
    cublasSgemm(getTransChar(), b.getTransChar(), _numRows, b.getNumCols(), _numCols,
                scaleAB, _devData, getLeadingDim(), b.getDevData(), b.getLeadingDim(),
                0, target.getDevData(), getNumRows());
    target.setTrans(true); // added isTrans specification
    checkCublasError("cublasSgemm failed");
//    cudaThreadSynchronize();
}

Original issue reported on code.google.com by qiwang...@gmail.com on 12 Jul 2013 at 3:47