I'm creating some functions to do things like the "separated sum" of negative and positive number, kahan, pairwise and other stuff where it doesn't matter the order I take the elements from the matrix, for example:
template <typename T, int R, int C>
inline T sum(const Eigen::Matrix<T,R,C>& xs)
{
T sumP(0);
T sumN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nRows; ++i)
for (size_t j = 0; j < nCols; ++j)
{
if (xs(i,j)>0)
sumP += xs(i,j);
else if (xs(i,j)<0) //ignore 0 elements: improvement for sparse matrices I think
sumN += xs(i,j);
}
return sumP+sumN;
}
Now, I would like to make this as efficient as possible, so my question is, would it be better to loop through each column of each row like the above, or do the opposite like the the following:
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nCols; ++i)
for (size_t j = 0; j < nRows; ++j)
(I suppose this depends on the order that the matrix elements are allocated in memory, but I couldn't find this in Eigen's manual).
Also, are there other alternate ways like using iterators (do they exist in Eigen?) that might be slightly faster?
I did some benchmarks to checkout which way is quicker, I got the following results (in seconds):
12
30
3
6
23
3
The first line is doing iteration as suggested by @jleahy.
The second line is doing iteration as I've done in my code in the question (the inverse order of @jleahy).
The third line is doing iteration using PlainObjectBase::data()
like this for (int i = 0; i < matrixObject.size(); i++)
.
The other 3 lines repeat the same as the above, but with an temporary as suggest by @lucas92
I also did the same tests but using substituting /if else.*/ for /else/ (no special treatment for sparse matrix) and I got the following (in seconds):
10
27
3
6
24
2
Doing the tests again gave me results pretty similar. I used g++ 4.7.3
with -O3
. The code:
#include <ctime>
#include <iostream>
#include <Eigen/Dense>
using namespace std;
template <typename T, int R, int C>
inline T sum_kahan1(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nCols; ++i)
for (size_t j = 0; j < nRows; ++j)
{
if (xs(j,i)>0)
{
yP = xs(j,i) - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else if (xs(j,i)<0)
{
yN = xs(j,i) - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan2(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nRows; ++i)
for (size_t j = 0; j < nCols; ++j)
{
if (xs(i,j)>0)
{
yP = xs(i,j) - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else if (xs(i,j)<0)
{
yN = xs(i,j) - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan3(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, size = xs.size(); i < size; i++)
{
if ((*(xs.data() + i))>0)
{
yP = (*(xs.data() + i)) - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else if ((*(xs.data() + i))<0)
{
yN = (*(xs.data() + i)) - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan1t(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nCols; ++i)
for (size_t j = 0; j < nRows; ++j)
{
T temporary = xs(j,i);
if (temporary>0)
{
yP = temporary - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else if (temporary<0)
{
yN = temporary - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan2t(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nRows; ++i)
for (size_t j = 0; j < nCols; ++j)
{
T temporary = xs(i,j);
if (temporary>0)
{
yP = temporary - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else if (temporary<0)
{
yN = temporary - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan3t(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, size = xs.size(); i < size; i++)
{
T temporary = (*(xs.data() + i));
if (temporary>0)
{
yP = temporary - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else if (temporary<0)
{
yN = temporary - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan1e(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nCols; ++i)
for (size_t j = 0; j < nRows; ++j)
{
if (xs(j,i)>0)
{
yP = xs(j,i) - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else
{
yN = xs(j,i) - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan2e(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nRows; ++i)
for (size_t j = 0; j < nCols; ++j)
{
if (xs(i,j)>0)
{
yP = xs(i,j) - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else
{
yN = xs(i,j) - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan3e(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, size = xs.size(); i < size; i++)
{
if ((*(xs.data() + i))>0)
{
yP = (*(xs.data() + i)) - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else
{
yN = (*(xs.data() + i)) - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan1te(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nCols; ++i)
for (size_t j = 0; j < nRows; ++j)
{
T temporary = xs(j,i);
if (temporary>0)
{
yP = temporary - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else
{
yN = temporary - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan2te(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nRows; ++i)
for (size_t j = 0; j < nCols; ++j)
{
T temporary = xs(i,j);
if (temporary>0)
{
yP = temporary - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else
{
yN = temporary - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan3te(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, size = xs.size(); i < size; i++)
{
T temporary = (*(xs.data() + i));
if (temporary>0)
{
yP = temporary - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else
{
yN = temporary - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
int main() {
Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> test = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>::Random(10000,10000);
cout << "start" << endl;
int now;
now = time(0);
sum_kahan1(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan2(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan3(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan1t(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan2t(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan3t(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan1e(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan2e(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan3e(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan1te(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan2te(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan3te(test);
cout << time(0) - now << endl;
return 0;
}