extend the sparse matrix assembly benchmark
diff --git a/bench/sparse_setter.cpp b/bench/sparse_setter.cpp
index 6f7a19d..9c22636 100644
--- a/bench/sparse_setter.cpp
+++ b/bench/sparse_setter.cpp
@@ -12,7 +12,15 @@
 #endif
 
 #ifndef REPEAT
-#define REPEAT 1
+#define REPEAT 2
+#endif
+
+#ifndef NBTRIES
+#define NBTRIES 2
+#endif
+
+#ifndef KK
+#define KK 10
 #endif
 
 #ifndef NOGOOGLE
@@ -22,7 +30,7 @@
 
 #include "BenchSparseUtil.h"
 
-#define CHECK_MEM 
+#define CHECK_MEM
 // #define CHECK_MEM  std/**/::cout << "check mem\n"; getchar();
 
 #define BENCH(X) \
@@ -37,9 +45,13 @@
 typedef std::vector<float> Values;
 
 EIGEN_DONT_INLINE Scalar* setinnerrand_eigen(const Coordinates& coords, const Values& vals);
+EIGEN_DONT_INLINE Scalar* setrand_eigen_dynamic(const Coordinates& coords, const Values& vals);
+EIGEN_DONT_INLINE Scalar* setrand_eigen_compact(const Coordinates& coords, const Values& vals);
+EIGEN_DONT_INLINE Scalar* setrand_eigen_sumeq(const Coordinates& coords, const Values& vals);
 EIGEN_DONT_INLINE Scalar* setrand_eigen_gnu_hash(const Coordinates& coords, const Values& vals);
 EIGEN_DONT_INLINE Scalar* setrand_eigen_google_dense(const Coordinates& coords, const Values& vals);
 EIGEN_DONT_INLINE Scalar* setrand_eigen_google_sparse(const Coordinates& coords, const Values& vals);
+EIGEN_DONT_INLINE Scalar* setrand_scipy(const Coordinates& coords, const Values& vals);
 EIGEN_DONT_INLINE Scalar* setrand_ublas_mapped(const Coordinates& coords, const Values& vals);
 EIGEN_DONT_INLINE Scalar* setrand_ublas_coord(const Coordinates& coords, const Values& vals);
 EIGEN_DONT_INLINE Scalar* setrand_ublas_compressed(const Coordinates& coords, const Values& vals);
@@ -50,17 +62,36 @@
 {
   int rows = SIZE;
   int cols = SIZE;
-  bool fullyrand = false;
-  //float density = float(NBPERROW)/float(SIZE);
-  
+  bool fullyrand = true;
+
   BenchTimer timer;
   Coordinates coords;
   Values values;
   if(fullyrand)
   {
-    for (int i=0; i<cols*NBPERROW; ++i)
+    Coordinates pool;
+    pool.reserve(cols*NBPERROW);
+    std::cerr << "fill pool" << "\n";
+    for (int i=0; i<cols*NBPERROW; )
     {
-      coords.push_back(Vector2i(ei_random<int>(0,rows-1),ei_random<int>(0,cols-1)));
+//       DynamicSparseMatrix<int> stencil(SIZE,SIZE);
+      Vector2i ij(ei_random<int>(0,rows-1),ei_random<int>(0,cols-1));
+//       if(stencil.coeffRef(ij.x(), ij.y())==0)
+      {
+//         stencil.coeffRef(ij.x(), ij.y()) = 1;
+        pool.push_back(ij);
+
+      }
+      ++i;
+    }
+    std::cerr << "pool ok" << "\n";
+    int n = cols*NBPERROW*KK;
+    coords.reserve(n);
+    values.reserve(n);
+    for (int i=0; i<n; ++i)
+    {
+      int i = ei_random<int>(0,pool.size());
+      coords.push_back(pool[i]);
       values.push_back(ei_random<Scalar>());
     }
   }
@@ -79,67 +110,55 @@
     // dense matrices
     #ifdef DENSEMATRIX
     {
-      timer.reset();
-      timer.start();
-      for (int k=0; k<REPEAT; ++k)
-        setrand_eigen_dense(coords,values);
-      timer.stop();
+      BENCH(setrand_eigen_dense(coords,values);)
       std::cout << "Eigen Dense\t" << timer.value() << "\n";
     }
     #endif
 
     // eigen sparse matrices
-    if (!fullyrand)
+//     if (!fullyrand)
+//     {
+//       BENCH(setinnerrand_eigen(coords,values);)
+//       std::cout << "Eigen fillrand\t" << timer.value() << "\n";
+//     }
     {
-      timer.reset();
-      timer.start();
-      for (int k=0; k<REPEAT; ++k)
-        setinnerrand_eigen(coords,values);
-      timer.stop();
-      std::cout << "Eigen fillrand\t" << timer.value() << "\n";
+      BENCH(setrand_eigen_dynamic(coords,values);)
+      std::cout << "Eigen dynamic\t" << timer.value() << "\n";
+    }
+//     {
+//       BENCH(setrand_eigen_compact(coords,values);)
+//       std::cout << "Eigen compact\t" << timer.value() << "\n";
+//     }
+    {
+      BENCH(setrand_eigen_sumeq(coords,values);)
+      std::cout << "Eigen sumeq\t" << timer.value() << "\n";
     }
     {
-      timer.reset();
-      timer.start();
-      for (int k=0; k<REPEAT; ++k)
-        setrand_eigen_gnu_hash(coords,values);
-      timer.stop();
-      std::cout << "Eigen std::map\t" << timer.value() << "\n";
+//       BENCH(setrand_eigen_gnu_hash(coords,values);)
+//       std::cout << "Eigen std::map\t" << timer.value() << "\n";
+    }
+    {
+      BENCH(setrand_scipy(coords,values);)
+      std::cout << "scipy\t" << timer.value() << "\n";
     }
     #ifndef NOGOOGLE
     {
-      timer.reset();
-      timer.start();
-      for (int k=0; k<REPEAT; ++k)
-        setrand_eigen_google_dense(coords,values);
-      timer.stop();
+      BENCH(setrand_eigen_google_dense(coords,values);)
       std::cout << "Eigen google dense\t" << timer.value() << "\n";
     }
     {
-      timer.reset();
-      timer.start();
-      for (int k=0; k<REPEAT; ++k)
-        setrand_eigen_google_sparse(coords,values);
-      timer.stop();
+      BENCH(setrand_eigen_google_sparse(coords,values);)
       std::cout << "Eigen google sparse\t" << timer.value() << "\n";
     }
     #endif
-    
+
     #ifndef NOUBLAS
     {
-      timer.reset();
-      timer.start();
-      for (int k=0; k<REPEAT; ++k)
-        setrand_ublas_mapped(coords,values);
-      timer.stop();
-      std::cout << "ublas mapped\t" << timer.value() << "\n";
+//       BENCH(setrand_ublas_mapped(coords,values);)
+//       std::cout << "ublas mapped\t" << timer.value() << "\n";
     }
     {
-      timer.reset();
-      timer.start();
-      for (int k=0; k<REPEAT; ++k)
-        setrand_ublas_genvec(coords,values);
-      timer.stop();
+      BENCH(setrand_ublas_genvec(coords,values);)
       std::cout << "ublas vecofvec\t" << timer.value() << "\n";
     }
     /*{
@@ -159,16 +178,12 @@
       std::cout << "ublas coord\t" << timer.value() << "\n";
     }*/
     #endif
-    
-    
+
+
     // MTL4
     #ifndef NOMTL
     {
-      timer.reset();
-      timer.start();
-      for (int k=0; k<REPEAT; ++k)
-        setrand_mtl(coords,values);
-      timer.stop();
+      BENCH(setrand_mtl(coords,values));
       std::cout << "MTL\t" << timer.value() << "\n";
     }
     #endif
@@ -180,16 +195,63 @@
 {
   using namespace Eigen;
   SparseMatrix<Scalar> mat(SIZE,SIZE);
-  mat.startFill(2000000/*coords.size()*/);
+  //mat.startFill(2000000/*coords.size()*/);
   for (int i=0; i<coords.size(); ++i)
   {
-    mat.fillrand(coords[i].x(), coords[i].y()) = vals[i];
+    mat.insert(coords[i].x(), coords[i].y()) = vals[i];
   }
-  mat.endFill();
+  mat.finalize();
   CHECK_MEM;
   return 0;
 }
 
+EIGEN_DONT_INLINE Scalar* setrand_eigen_dynamic(const Coordinates& coords, const Values& vals)
+{
+  using namespace Eigen;
+  DynamicSparseMatrix<Scalar> mat(SIZE,SIZE);
+  mat.reserve(coords.size()/10);
+  for (int i=0; i<coords.size(); ++i)
+  {
+    mat.coeffRef(coords[i].x(), coords[i].y()) += vals[i];
+  }
+  mat.finalize();
+  CHECK_MEM;
+  return &mat.coeffRef(coords[0].x(), coords[0].y());
+}
+
+EIGEN_DONT_INLINE Scalar* setrand_eigen_sumeq(const Coordinates& coords, const Values& vals)
+{
+  using namespace Eigen;
+  int n = coords.size()/KK;
+  DynamicSparseMatrix<Scalar> mat(SIZE,SIZE);
+  for (int j=0; j<KK; ++j)
+  {
+    DynamicSparseMatrix<Scalar> aux(SIZE,SIZE);
+    mat.reserve(n);
+    for (int i=j*n; i<(j+1)*n; ++i)
+    {
+      aux.insert(coords[i].x(), coords[i].y()) += vals[i];
+    }
+    aux.finalize();
+    mat += aux;
+  }
+  return &mat.coeffRef(coords[0].x(), coords[0].y());
+}
+
+EIGEN_DONT_INLINE Scalar* setrand_eigen_compact(const Coordinates& coords, const Values& vals)
+{
+  using namespace Eigen;
+  DynamicSparseMatrix<Scalar> setter(SIZE,SIZE);
+  setter.reserve(coords.size()/10);
+  for (int i=0; i<coords.size(); ++i)
+  {
+    setter.coeffRef(coords[i].x(), coords[i].y()) += vals[i];
+  }
+  SparseMatrix<Scalar> mat = setter;
+  CHECK_MEM;
+  return &mat.coeffRef(coords[0].x(), coords[0].y());
+}
+
 EIGEN_DONT_INLINE Scalar* setrand_eigen_gnu_hash(const Coordinates& coords, const Values& vals)
 {
   using namespace Eigen;
@@ -198,11 +260,11 @@
     RandomSetter<SparseMatrix<Scalar>, StdMapTraits > setter(mat);
     for (int i=0; i<coords.size(); ++i)
     {
-      setter(coords[i].x(), coords[i].y()) = vals[i];
+      setter(coords[i].x(), coords[i].y()) += vals[i];
     }
     CHECK_MEM;
   }
-  return 0;//&mat.coeffRef(coords[0].x(), coords[0].y());
+  return &mat.coeffRef(coords[0].x(), coords[0].y());
 }
 
 #ifndef NOGOOGLE
@@ -213,10 +275,10 @@
   {
     RandomSetter<SparseMatrix<Scalar>, GoogleDenseHashMapTraits> setter(mat);
     for (int i=0; i<coords.size(); ++i)
-      setter(coords[i].x(), coords[i].y()) = vals[i];
+      setter(coords[i].x(), coords[i].y()) += vals[i];
     CHECK_MEM;
   }
-  return 0;//&mat.coeffRef(coords[0].x(), coords[0].y());
+  return &mat.coeffRef(coords[0].x(), coords[0].y());
 }
 
 EIGEN_DONT_INLINE Scalar* setrand_eigen_google_sparse(const Coordinates& coords, const Values& vals)
@@ -226,13 +288,139 @@
   {
     RandomSetter<SparseMatrix<Scalar>, GoogleSparseHashMapTraits> setter(mat);
     for (int i=0; i<coords.size(); ++i)
-      setter(coords[i].x(), coords[i].y()) = vals[i];
+      setter(coords[i].x(), coords[i].y()) += vals[i];
     CHECK_MEM;
   }
-  return 0;//&mat.coeffRef(coords[0].x(), coords[0].y());
+  return &mat.coeffRef(coords[0].x(), coords[0].y());
 }
 #endif
 
+
+template <class T>
+void coo_tocsr(const int n_row,
+               const int n_col,
+               const int nnz,
+               const Coordinates Aij,
+               const Values Ax,
+                     int Bp[],
+                     int Bj[],
+                     T Bx[])
+{
+    //compute number of non-zero entries per row of A coo_tocsr
+    std::fill(Bp, Bp + n_row, 0);
+
+    for (int n = 0; n < nnz; n++){
+        Bp[Aij[n].x()]++;
+    }
+
+    //cumsum the nnz per row to get Bp[]
+    for(int i = 0, cumsum = 0; i < n_row; i++){
+        int temp = Bp[i];
+        Bp[i] = cumsum;
+        cumsum += temp;
+    }
+    Bp[n_row] = nnz;
+
+    //write Aj,Ax into Bj,Bx
+    for(int n = 0; n < nnz; n++){
+        int row  = Aij[n].x();
+        int dest = Bp[row];
+
+        Bj[dest] = Aij[n].y();
+        Bx[dest] = Ax[n];
+
+        Bp[row]++;
+    }
+
+    for(int i = 0, last = 0; i <= n_row; i++){
+        int temp = Bp[i];
+        Bp[i]  = last;
+        last   = temp;
+    }
+
+    //now Bp,Bj,Bx form a CSR representation (with possible duplicates)
+}
+
+template< class T1, class T2 >
+bool kv_pair_less(const std::pair<T1,T2>& x, const std::pair<T1,T2>& y){
+    return x.first < y.first;
+}
+
+
+template<class I, class T>
+void csr_sort_indices(const I n_row,
+                      const I Ap[],
+                            I Aj[],
+                            T Ax[])
+{
+    std::vector< std::pair<I,T> > temp;
+
+    for(I i = 0; i < n_row; i++){
+        I row_start = Ap[i];
+        I row_end   = Ap[i+1];
+
+        temp.clear();
+
+        for(I jj = row_start; jj < row_end; jj++){
+            temp.push_back(std::make_pair(Aj[jj],Ax[jj]));
+        }
+
+        std::sort(temp.begin(),temp.end(),kv_pair_less<I,T>);
+
+        for(I jj = row_start, n = 0; jj < row_end; jj++, n++){
+            Aj[jj] = temp[n].first;
+            Ax[jj] = temp[n].second;
+        }
+    }
+}
+
+template <class I, class T>
+void csr_sum_duplicates(const I n_row,
+                        const I n_col,
+                              I Ap[],
+                              I Aj[],
+                              T Ax[])
+{
+    I nnz = 0;
+    I row_end = 0;
+    for(I i = 0; i < n_row; i++){
+        I jj = row_end;
+        row_end = Ap[i+1];
+        while( jj < row_end ){
+            I j = Aj[jj];
+            T x = Ax[jj];
+            jj++;
+            while( jj < row_end && Aj[jj] == j ){
+                x += Ax[jj];
+                jj++;
+            }
+            Aj[nnz] = j;
+            Ax[nnz] = x;
+            nnz++;
+        }
+        Ap[i+1] = nnz;
+    }
+}
+
+EIGEN_DONT_INLINE Scalar* setrand_scipy(const Coordinates& coords, const Values& vals)
+{
+  using namespace Eigen;
+  SparseMatrix<Scalar> mat(SIZE,SIZE);
+  mat.resizeNonZeros(coords.size());
+//   std::cerr << "setrand_scipy...\n";
+  coo_tocsr<Scalar>(SIZE,SIZE, coords.size(), coords, vals, mat._outerIndexPtr(), mat._innerIndexPtr(), mat._valuePtr());
+//   std::cerr << "coo_tocsr ok\n";
+
+  csr_sort_indices(SIZE, mat._outerIndexPtr(), mat._innerIndexPtr(), mat._valuePtr());
+
+  csr_sum_duplicates(SIZE, SIZE, mat._outerIndexPtr(), mat._innerIndexPtr(), mat._valuePtr());
+
+  mat.resizeNonZeros(mat._outerIndexPtr()[SIZE]);
+
+  return &mat.coeffRef(coords[0].x(), coords[0].y());
+}
+
+
 #ifndef NOUBLAS
 EIGEN_DONT_INLINE Scalar* setrand_ublas_mapped(const Coordinates& coords, const Values& vals)
 {
@@ -242,7 +430,7 @@
   mapped_matrix<Scalar> aux(SIZE,SIZE);
   for (int i=0; i<coords.size(); ++i)
   {
-    aux(coords[i].x(), coords[i].y()) = vals[i];
+    aux(coords[i].x(), coords[i].y()) += vals[i];
   }
   CHECK_MEM;
   compressed_matrix<Scalar> mat(aux);
@@ -278,12 +466,12 @@
   using namespace boost;
   using namespace boost::numeric;
   using namespace boost::numeric::ublas;
-  
+
 //   ublas::vector<coordinate_vector<Scalar> > foo;
   generalized_vector_of_vector<Scalar, row_major, ublas::vector<coordinate_vector<Scalar> > > aux(SIZE,SIZE);
   for (int i=0; i<coords.size(); ++i)
   {
-    aux(coords[i].x(), coords[i].y()) = vals[i];
+    aux(coords[i].x(), coords[i].y()) += vals[i];
   }
   CHECK_MEM;
   compressed_matrix<Scalar,row_major> mat(aux);