a small bug in NVMatrix::rightMult()
Opened this issue · 0 comments
GoogleCodeExporter commented
The result would be incorrect if the target is same as the first operand. The
target==this version would require this to be of column major. I modified it so
that this requirement is no longer needed:
void NVMatrix::rightMult(const NVMatrix &b, float scaleAB, NVMatrix &target)
const {
assert(isContiguous() && b.isContiguous() && target.isContiguous());
// assert(&target != &b);
assert(_numCols == b.getNumRows());
if(&target != this) {
target.resize(_numRows, b.getNumCols());
//target.setTrans(true); // default column major
}
assert(target.getNumRows() == _numRows);
assert(target.getNumCols() == b.getNumCols());
if(_numRows % 64 != 0 || _numCols % 64 != 0 || b.getNumCols() % 64 != 0) {
WARN("Matrix dimensions not divisible by 64 -- cublasSgemm performance may suffer.");
}
cublasSgemm(getTransChar(), b.getTransChar(), _numRows, b.getNumCols(), _numCols,
scaleAB, _devData, getLeadingDim(), b.getDevData(), b.getLeadingDim(),
0, target.getDevData(), getNumRows());
target.setTrans(true); // added isTrans specification
checkCublasError("cublasSgemm failed");
// cudaThreadSynchronize();
}
Original issue reported on code.google.com by qiwang...@gmail.com
on 12 Jul 2013 at 3:47