@article {13948,
title = {EFFICIENT PARALLEL NON-NEGATIVE LEAST SQUARES ON MULTI-CORE ARCHITECTURES},
journal = {SIAM Journal on Scientific Computing},
volume = {33},
year = {2011},
month = {2011///},
pages = {2848 - 2863},
abstract = {We parallelize a version of the active-set iterative algorithm derived from the originalworks of Lawson and Hanson [Solving Least Squares Problems, Prentice-Hall, 1974] on multicore
architectures. This algorithm requires the solution of an unconstrained least squares problem in
every step of the iteration for a matrix composed of the passive columns of the original system
matrix. To achieve improved performance, we use parallelizable procedures to efficiently update and
downdate the QR factorization of the matrix at each iteration, to account for inserted and removed
columns. We use a reordering strategy of the columns in the decomposition to reduce computation
and memory access costs. We consider graphics processing units (GPUs) as a new mode for efficient
parallel computations and compare our implementations to that of multicore CPUs. Both synthetic
and nonsynthetic data are used in the experiments.
},
author = {Luo,Y. and Duraiswami, Ramani}
}