GetFEM++  5.3
bgeot_sparse_tensors.cc
1 /*===========================================================================
2 
3  Copyright (C) 2000-2017 Julien Pommier
4 
5  This file is a part of GetFEM++
6 
7  GetFEM++ is free software; you can redistribute it and/or modify it
8  under the terms of the GNU Lesser General Public License as published
9  by the Free Software Foundation; either version 3 of the License, or
10  (at your option) any later version along with the GCC Runtime Library
11  Exception either version 3.1 or (at your option) any later version.
12  This program is distributed in the hope that it will be useful, but
13  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15  License and GCC Runtime Library Exception for more details.
16  You should have received a copy of the GNU Lesser General Public License
17  along with this program; if not, write to the Free Software Foundation,
18  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 
20 ===========================================================================*/
21 
22 #include <bitset>
23 #include "gmm/gmm_blas_interface.h"
25 
26 
27 namespace bgeot {
28  std::ostream& operator<<(std::ostream& o, const tensor_ranges& r) {
29  for (size_type i=0; i < r.size(); ++i) {
30  if (i) o << "x";
31  o << "[0.." << r[i] << "]";
32  }
33  return o;
34  }
35 
36  /*
37  strides:
38  0 1 2 3 4 5 6 7
39  ---------------
40  x x x
41  x x x
42  x x x x x x
43  x x x x x
44 
45  => n={0,4,6,8,8}
46 
47  basic iterator on a set of full tensors, just used for iterating over binary masks
48  */
49  template<typename IT> class basic_multi_iterator {
50  unsigned N;
51  index_set idxnums;
52  tensor_ranges ranges;
53  tensor_strides strides;
54  tensor_ranges cnt;
55  index_set ilst2idxnums;
56  std::vector<const tensor_strides*> slst;
57  std::vector<IT> iter;
58  std::vector<int> n;
59  public:
60  const tensor_ranges& getcnt() const { return cnt; }
61  basic_multi_iterator() {
62  N = 0; idxnums.reserve(16); strides.reserve(64);
63  slst.reserve(16);
64  ilst2idxnums.reserve(64); iter.reserve(4);
65  }
66  const tensor_ranges& all_ranges() const { return ranges; }
67  const index_set& all_indexes() const { return idxnums; }
68  /* /!!!!!\ attention les strides ont 1 elt de plus que r et idxs (pour les tensor_masks)
69  (ca intervient aussi dans prepare()) */
70  void insert(const index_set& idxs, const tensor_ranges& r, const tensor_strides& s, IT it_) {
71  assert(idxs.size() == r.size()); assert(s.size() == r.size()+1);
72  slst.push_back(&s);
73  for (unsigned int i=0; i < idxs.size(); ++i) {
74  index_set::const_iterator f=std::find(idxnums.begin(), idxnums.end(), idxs[i]);
75  if (f == idxnums.end()) {
76  ilst2idxnums.push_back(dim_type(idxnums.size()));
77  idxnums.push_back(idxs[i]);
78  ranges.push_back(r[i]);
79  } else {
80  ilst2idxnums.push_back(dim_type(f-idxnums.begin()));
81  assert(ranges[ilst2idxnums.back()] == r[i]);
82  }
83  }
84  iter.push_back(it_);
85  N++;
86  }
87  void prepare() {
88  unsigned c = 0;
89  strides.assign(N*idxnums.size(),0);
90  for (unsigned int i=0; i < N; ++i) {
91  for (unsigned int j=0; j < slst[i]->size()-1; ++j) {
92  stride_type s = (*slst[i])[j];
93  strides[ilst2idxnums[c]*N + i] = s;
94  c++;
95  }
96  }
97  n.assign(N+1,-1);
98  for (unsigned int i=0; i < idxnums.size(); ++i) {
99  for (unsigned int j=0; j < N; ++j) {
100  if (strides[i*N+j]) n[j+1] = i;
101  }
102  }
103  cnt.assign(idxnums.size(),0);
104  }
105  /* unfortunatly these two function don't inline
106  and are not optimized by the compiler
107  (when b and N are known at compile-time) (at least for gcc-3.2) */
108  bool next(unsigned int b) { return next(b,N); }
109  bool next(unsigned int b, unsigned int NN) {
110  int i0 = n[b+1];
111  while (i0 > n[b]) {
112  if (++cnt[i0] < ranges[i0]) {
113  for (unsigned int i=b; i < NN; ++i) {
114  iter[i] += strides[i0*NN+i];
115  }
116  return true;
117  } else {
118  for (unsigned int i=b; i < NN; ++i) {
119  iter[i] -= strides[i0*NN+i]*(ranges[i0]-1);
120  }
121  cnt[i0]=0; i0--;
122  }
123  }
124  return false;
125  }
126  template<unsigned b, unsigned NN> bool qnext() { return next(b,NN); }
127  IT it(unsigned int b) const { return iter[b]; }
128  };
129 
130 
131  /*
132  constructeur par fusion de deux masques binaires
133  (avec potentiellement une intersection non vide)
134  */
135  tensor_mask::tensor_mask(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op) {
136  assign(tm1,tm2,and_op); unset_card();
137  }
138 #if 0
139  void tensor_mask::assign(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op) {
140  clear(); unset_card();
141  if (tm1.ndim()==0) { assign(tm2); return; }
142  if (tm2.ndim()==0) { assign(tm1); return; }
143  r = tm1.ranges();
144  idxs = tm1.indexes();
145 
146  /* ce tableau va permettre de faire une boucle commune sur les
147  deux masques, les dimension inutilisees etant mises a 1 pour
148  ne pas gener */
149  tensor_ranges global_r(std::max(tm1.max_dim(),tm2.max_dim())+1, index_type(1));
150 
151  for (index_type i = 0; i < tm1.indexes().size(); ++i)
152  global_r[tm1.indexes()[i]] = tm1.ranges()[i];
153 
154  /* detect new indices */
155  for (index_type i = 0; i < tm2.indexes().size(); ++i) {
156  index_set::const_iterator f=std::find(tm1.indexes().begin(), tm1.indexes().end(), tm2.indexes()[i]);
157  if (f == tm1.indexes().end()) {
158  assert(global_r[tm2.indexes()[i]]==1);
159  global_r[tm2.indexes()[i]] = tm2.ranges()[i];
160  r.push_back(tm2.ranges()[i]);
161  idxs.push_back(tm2.indexes()[i]);
162  }
163  else assert(global_r[tm2.indexes()[i]] = tm2.ranges()[i]);
164  }
165  eval_strides();
166  assert(size());
167  m.assign(size(),false);
168  /* sans doute pas tres optimal, mais la fonction n'est pas critique .. */
169  for (tensor_ranges_loop l(global_r); !l.finished(); l.next()) {
170  if (and_op) {
171  if (tm1(l.cnt) && tm2(l.cnt)) m.add(pos(l.cnt));
172  } else {
173  if (tm1(l.cnt) || tm2(l.cnt)) m.add(pos(l.cnt));
174  }
175  }
176  unset_card();
177  }
178 #endif
179 
180 
181  void tensor_mask::assign(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op) {
182  clear(); unset_card();
183  /* check simple cases first, since this function can be called often and
184  is quite expensive */
185  if (tm1.ndim()==0) { assign(tm2); return; }
186  if (tm2.ndim()==0) { assign(tm1); return; }
187 
188  /* same masks ? */
189  if (tm1.indexes() == tm2.indexes() &&
190  tm1.ranges() == tm2.ranges() &&
191  tm1.strides() == tm2.strides()) {
192  r = tm1.ranges(); idxs=tm1.indexes(); s=tm1.strides();
193  assert(tm1.m.size() == tm2.m.size());
194  m = tm1.m;
195  if (and_op) {
196  for (index_type i=0; i < tm2.m.size(); ++i)
197  if (!tm2.m[i]) m[i] = false;
198  } else {
199  for (index_type i=0; i < tm2.m.size(); ++i)
200  if (tm2.m[i]) m[i] = true;
201  }
202  return;
203  }
204 
205  basic_multi_iterator<unsigned> bmit;
206  bmit.insert(tm1.indexes(), tm1.ranges(), tm1.strides(), 0);
207  bmit.insert(tm2.indexes(), tm2.ranges(), tm2.strides(), 0);
208  r = bmit.all_ranges(); idxs = bmit.all_indexes(); eval_strides();
209  assert(size());
210  m.assign(size(),false);
211  bmit.insert(indexes(), ranges(), strides(), 0);
212  bmit.prepare();
213  //cout << "tm1=" << tm1 << "\ntm2=" << tm2 << endl;
214  if (and_op) {
215  do {
216  if (tm1.m[bmit.it(0)]) {
217  do {
218  if (tm2.m[bmit.it(1)]) {
219  m[bmit.it(2)] = 1;
220  }
221  // cout << "at cnt=" << bmit.getcnt() << ", it0=" << bmit.it(0) << "=" << tm1.m[bmit.it(0)]
222  // << ", it1=" << bmit.it(1) << "=" << tm2.m[bmit.it(1)] << ", res=" << bmit.it(2) << "=" << m[bmit.it(2)] << endl;
223  } while (bmit.qnext<1,3>());
224  }
225  } while (bmit.qnext<0,3>());
226  } else {
227  do {
228  bool v1 = tm1.m[bmit.it(0)];
229  do {
230  if (v1 || (tm2.m[bmit.it(1)]))
231  m[bmit.it(2)] = 1;
232  } while (bmit.qnext<1,3>());
233  } while (bmit.qnext<0,3>());
234  }
235  // cout << "output: " << *this << endl;
236  }
237 
238 
239  /* construit un masque forme du produit cartesien de plusieurs masques (disjoints)
240  /!\ aucune verif sur la validite des arguments */
241  tensor_mask::tensor_mask(const std::vector<const tensor_mask*>& tm) {
242  assign(tm);
243  }
244 #if 0
245  // REMPLACE PAR LA FONCTION SUIVANTE
246  void tensor_mask::assign(const std::vector<const tensor_mask*> & tm) {
247  index_set mask_start; unset_card();
248  clear();
249  r.reserve(255); idxs.reserve(255); mask_start.reserve(255);
250  for (dim_type i=0; i < tm.size(); ++i) {
251  mask_start.push_back(r.size());
252  for (dim_type j=0; j < tm[i]->ndim(); ++j) {
253  r.push_back(tm[i]->ranges()[j]);
254  idxs.push_back(tm[i]->indexes()[j]);
255  }
256  }
257  eval_strides(); assert(size());
258  m.assign(size(),false);
259  for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
260  bool is_in = true;
261  for (dim_type i=0; is_in && i < tm.size(); ++i) {
262  index_type s_ = 0;
263  for (dim_type j=0; j < tm[i]->ndim(); ++j)
264  s_ += l.cnt[j+mask_start[i]]*tm[i]->strides()[j];
265  if (!tm[i]->m[s_]) is_in = false;
266  }
267  if (is_in) m.add(lpos(l.cnt));
268  }
269  }
270 #endif
271  // PRODUIT CARTESIEN DE MASQUES BINAIRES DISJOINTS
272  void tensor_mask::assign(const std::vector<const tensor_mask*> & tm) {
273  unset_card();
274  if (tm.size() == 0) { clear(); return; }
275  if (tm.size() == 1) { assign(*tm[0]); return; }
276  clear();
277  basic_multi_iterator<unsigned> bmit;
278  for (dim_type i=0; i < tm.size(); ++i)
279  bmit.insert(tm[i]->indexes(), tm[i]->ranges(), tm[i]->strides(), 0);
280  r = bmit.all_ranges(); idxs = bmit.all_indexes(); eval_strides();
281  assert(size());
282  m.assign(size(), false);
283  bmit.insert(indexes(), ranges(), strides(), 0);
284  bmit.prepare();
285  unsigned N = unsigned(tm.size());
286  bool finished = false;
287  while (!finished) {
288  bool is_in = true;
289  unsigned int b;
290  for (b=0; b < N; ++b) {
291  if (!tm[b]->m[bmit.it(b)]) {
292  is_in = false; break;
293  }
294  }
295  if (is_in) { m[bmit.it(N)] = 1; b = N-1; }
296  while (!finished && !bmit.next(b)) { if (b == 0) finished = true; b--; }
297  }
298  }
299 
300  void tensor_mask::unpack_strides(const tensor_strides& packed, tensor_strides& unpacked) const {
301  if (packed.size() != card())
302  assert(packed.size()==card());
303  unpacked.assign(size(),INT_MIN);
304  index_type i=0;
305  for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
306  if (m[lpos(l.cnt)]) unpacked[lpos(l.cnt)] = packed[i++];
307  }
308  }
309 
310  void tensor_mask::check_assertions() const {
311  GMM_ASSERT3(r.size() == idxs.size(), "");
312  GMM_ASSERT3(s.size() == idxs.size()+1, "");
313  GMM_ASSERT3(m.size() == size(), "");
314  dal::bit_vector bv;
315  for (dim_type i=0; i < idxs.size(); ++i) {
316  GMM_ASSERT3(!bv.is_in(i), "");
317  bv.add(i);
318  }
319  }
320 
321  tensor_mask::tensor_mask(const std::vector<const tensor_mask*> tm1, const std::vector<const tensor_mask*> tm2, bool and_op) {
322  assign(tensor_mask(tm1), tensor_mask(tm2), and_op);
323  }
324 
325  void tensor_ref::set_sub_tensor(const tensor_ref& tr, const tensor_shape& sub) {
326  assign_shape(sub);
327  /* fusionne sub et tr.shape */
328  merge(tr);
329 
330  /* reserve l'espace pour les strides */
331  strides_.resize(masks().size());
332  for (dim_type i = 0; i < strides_.size(); ++i)
333  strides_[i].resize(mask(i).card());
334 
335  pbase_ = tr.pbase_; base_shift_ = tr.base_shift();
336 
337  /*
338  cout << "\n -> entree dans set_sub_tensor: " << endl
339  << "tr.shape=" << (tensor_shape&)(tr) << endl
340  << " sub=" << sub << endl;
341  */
342  /* pre-calcul rapide des strides sur tr, pour chacun de ses masques */
343  std::vector<tensor_strides> trstrides_unpacked(tr.masks().size());
344  for (dim_type im = 0; im < tr.masks().size(); ++im) {
345  tr.mask(im).check_assertions();
346  tr.mask(im).unpack_strides(tr.strides()[im], trstrides_unpacked[im]);
347  }
348 
349 
350  /* on parcours chaque masque (merge) */
351  for (dim_type im = 0; im < masks().size(); ++im) {
352  const tensor_mask& m = masks()[im];
353  /* par construction, tous les masques de tr sont inclus (ou egaux)
354  dans un masque de l'objet courant
355 
356  on construit donc la liste des masques de tr qui sont inclus dans m
357  */
358  std::vector<dim_type> trmasks; trmasks.reserve(tr.masks().size());
359  for (dim_type i=0; i < m.indexes().size(); ++i) {
360  if (tr.index_is_valid(m.indexes()[i])) { /* l'index n'est pas forcement valide pour tr !*/
361  dim_type im2 = tr.index_to_mask_num(m.indexes()[i]);
362  if (std::find(trmasks.begin(), trmasks.end(), im2)==trmasks.end()) trmasks.push_back(im2);
363  }
364  }
365 
366  tensor_ranges gcnt(tr.ndim(),0);
367  stride_type stcnt = 0;
368 
369  for (tensor_ranges_loop l(m.ranges()); !l.finished(); l.next()) {
370  /* recopie les indice de la boucle dans les indices globaux */
371  for (dim_type i=0; i < m.ranges().size(); ++i) gcnt[m.indexes()[i]] = l.index(i);
372 
373  bool in_m = m(gcnt);
374  bool in_trm = true;
375  stride_type tr_s = 0;
376 
377  /* verifie si l'element est bien marque non nul dans les masques de tr
378  et met a jour le stride */
379  for (dim_type i=0; i < trmasks.size(); ++i) {
380  const tensor_mask &mm = tr.mask(trmasks[i]);
381 
382  //cout << " mm=" << mm << endl << "gcnt=" << gcnt << endl;
383  if (mm(gcnt)) {
384  tr_s += trstrides_unpacked[trmasks[i]][mm.pos(gcnt)];
385  assert(trstrides_unpacked[trmasks[i]][mm.pos(gcnt)]>=0); // --- DEBUG ---
386  } else { in_trm = false; break; }
387  }
388  /* verifie que m est un sous-ensemble des masques de trmasks */
389  if (in_m) assert(in_trm);
390  if (!in_trm) assert(!in_m);
391  /* recopie le stride si l'element est non nul dans m */
392  if (in_m) {
393  // cout << "ajout du " << stcnt << "eme elt @ stride=" << tr_s << endl;
394  strides_[im][stcnt++] = tr_s;
395  }
396  }
397 
398  /* verif que yapa bug */
399  assert(stcnt == stride_type(m.card()));
400  }
401  ensure_0_stride(); /* c'est plus propre comme ca */
402  }
403 
404  /* slices a tensor_ref, at dimension 'dim', position 'islice'
405  ... not straightforward for sparse tensors !
406  */
407  tensor_ref::tensor_ref(const tensor_ref& tr, tensor_mask::Slice slice) {
408  set_sub_tensor(tr, tr.slice_shape(slice));
409 
410  /* shift the base according to the old stride */
411  ensure_0_stride();
412 
413  /* create a mask m2 with one less dimension than m1 */
414  const tensor_mask& m1(index_to_mask(slice.dim));
415  dim_type mdim = index_to_mask_dim(slice.dim);
416  if (m1.ndim() > 1) {
417  tensor_ranges r(m1.ranges()); r.erase(r.begin()+mdim);
418  index_set idx(m1.indexes()); idx.erase(idx.begin()+mdim);
419  tensor_mask m2(r,idx);
420  index_type pos1 = 0, pos2 = 0;
421  for (tensor_ranges_loop l(m1.ranges()); !l.finished(); l.next()) {
422  if (l.index(mdim) == slice.i0) {
423  m2.set_mask_val(pos2++, m1(pos1));
424  } else assert(m1(pos1) == 0);
425  pos1++;
426  }
427 
428 
429  /* replace the old mask by the new one */
430  assert(index_to_mask_num(slice.dim) < masks().size());
431  masks()[index_to_mask_num(slice.dim)] = m2;
432  } else {
433  /* simply remove the mask since it only contained the dimension 'dim' */
434  remove_mask(index_to_mask_num(slice.dim));
435  }
436  /* shift all indexes greater than dim */
437  shift_dim_num_ge(slice.dim,-1);
438  set_ndim_noclean(dim_type(ndim()-1));
439  update_idx2mask();
440  }
441 
442 
443  struct compare_packed_range {
444  const std::vector<packed_range_info>& pri;
445  std::vector<stride_type> mean_inc;
446  compare_packed_range(const std::vector<packed_range_info>& pri_) : pri(pri_) {}
447  bool operator()(dim_type a, dim_type b) {
448  if (pri[a].n < pri[b].n) return true;
449  else if (pri[a].n > pri[b].n) return false;
450  else { /* c'est la que ca devient interessant */
451  if (pri[a].mean_increm > pri[b].mean_increm) return true;
452  }
453  return false;
454  }
455  };
456 
457  void multi_tensor_iterator::init(std::vector<tensor_ref> trtab, bool with_index_values) {
458  N = index_type(trtab.size());
459  index_type N_packed_idx = 0;
460 
461  /* non null elements across all tensors */
462 
463  tensor_shape ts(trtab[0]);
464  for (dim_type i=1; i < trtab.size(); ++i)
465  ts.merge(trtab[i]);
466 
467 
468  /* apply the mask to all tensors,
469  updating strides according to it */
470  for (dim_type i = 0; i < N; ++i) {
471  tensor_ref tmp = trtab[i];
472  trtab[i].set_sub_tensor(tmp,ts);
473  }
474 
475  /* find significant indexes (ie remove indexes who only address 1 element) */
476  dal::bit_vector packed_idx; packed_idx.sup(0,ts.ndim());
477  for (index_type mi=0; mi < ts.masks().size(); ++mi) {
478  if (ts.masks()[mi].card() != 1) {
479  packed_idx.add(mi);
480  N_packed_idx++;
481  } else {
482  /* sanity check (TODO: s'assurer que sub_tensor_ref appelle bien ensure_0_stride) */
483  for (dim_type j=0; j < N; ++j) {
484  if (trtab[j].strides()[mi].size() != 0) {
485  assert(trtab[j].strides()[mi].size() == 1);
486  assert(trtab[j].strides()[mi][0] == 0);
487  }
488  }
489  }
490  }
491 
492  pr.resize(N_packed_idx);
493  pri.resize(N_packed_idx);
494 
495  /* evaluation of "ranks" per indice */
496 
497  index_type pmi = 0;
498  for (dim_type mi = 0; mi < ts.masks().size(); ++mi) {
499  if (packed_idx[mi]) {
500  dim_type n;
501  pri[pmi].original_masknum = mi;
502  pri[pmi].range = ts.masks()[mi].card();
503  for (n = 0; n < N; n++)
504  if (trtab[n].index_is_valid(mi)) break;
505  pri[pmi].n = n; pr[pmi].n = n;
506  pmi++;
507  }
508  }
509 
510  /* sort the packed_range_info according to the "ranks" */
511  std::sort(pri.begin(), pri.end());
512 
513 
514  /* eval bloc ranks */
515  bloc_rank.resize(N+1); bloc_rank[0] = 0;
516  bloc_nelt.resize(N+1); bloc_nelt[0] = 0;
517  for (index_type i=1; i <= N; ++i) {
518  bloc_rank[i] = bloc_rank[i-1];
519  while (bloc_rank[i] < pri.size() && pri[bloc_rank[i]].n == i-1) bloc_rank[i]++;
520  bloc_nelt[i] = bloc_rank[i] - bloc_rank[i-1];
521  }
522 
523  /* "package" the strides in structure pri */
524  for (pmi = 0; pmi < pri.size(); ++pmi) {
525  index_type mi = pri[pmi].original_masknum;
526  pri[pmi].mean_increm = 0;
527  pri[pmi].inc.assign(pri[pmi].range*(N-pri[pmi].n), 0);
528  pri[pmi].have_regular_strides.reset();
529  pri[pmi].have_regular_strides = std::bitset<32>((1 << N)-1);
530  for (dim_type n=pri[pmi].n; n < N; ++n) {
531  index_type pos0 = (n - pri[pmi].n);
532  for (index_type i = 0; i < pri[pmi].range; ++i) {
533  index_type pos = i * (N-pri[pmi].n) + pos0;
534  if (i != pri[pmi].range-1) {
535  stride_type increm = trtab[n].strides()[mi][i+1] - trtab[n].strides()[mi][i];
536  pri[pmi].inc[pos] = increm;
537  if (pri[pmi].inc[pos] != pri[pmi].inc[pos0])
538  pri[pmi].have_regular_strides[n] = false;
539  pri[pmi].mean_increm += increm;
540  } else { pri[pmi].inc[pos] = -trtab[n].strides()[mi][i]; }
541  }
542  }
543  if (pri[pmi].n!=N)
544  pri[pmi].mean_increm /= ((N-pri[pmi].n)*(pri[pmi].range-1));
545  }
546 
547  /* optimize them w/r to mean strides (without modifying the "ranks") */
548  index_set pr_reorder(pri.size());
549  for (pmi = 0; pmi < pri.size(); ++pmi) {
550  pr_reorder[pmi]=dim_type(pmi);
551  }
552  std::sort(pr_reorder.begin(), pr_reorder.end(), compare_packed_range(pri));
553  {
554  std::vector<packed_range> tmppr(pr);
555  std::vector<packed_range_info> tmppri(pri);
556  for (dim_type i =0; i < pri.size(); ++i) {
557  pr[i] = tmppr [pr_reorder[i]];
558  pri[i] = tmppri[pr_reorder[i]];
559  }
560  }
561 
562  /* setup data necessary to get back (quite quickly) the index values while iterating */
563  if (with_index_values) {
564  idxval.resize(ts.ndim());
565 
566  for (dim_type i=0; i < ts.ndim(); ++i) {
567  idxval[i].ppinc = NULL; idxval[i].pposbase = NULL; idxval[i].pos_ = 0;
568  }
569 
570  for (index_type mi = 0; mi < ts.masks().size(); ++mi) {
571  tensor_strides v;
572  pmi = index_type(-1);
573  for (dim_type i=0; i < pr.size(); ++i)
574  if (pri[i].original_masknum == mi) pmi = i;
575 
576  if (pmi != index_type(-1)) {
577  ts.masks()[mi].gen_mask_pos(pri[pmi].mask_pos);
578  } else { /* not very nice .. */
579  ts.masks()[mi].gen_mask_pos(v); assert(v.size()==1);
580  }
581  for (dim_type i=0; i < ts.masks()[mi].indexes().size(); ++i) {
582  dim_type ii = ts.masks()[mi].indexes()[i];
583  idxval[ii].cnt_num = dim_type(pmi); //packed_idx[mi] ? pmi : dim_type(-1);
584  idxval[ii].pos_ = (pmi != index_type(-1)) ? 0 : v[0];
585  idxval[ii].mod = ts.masks()[mi].strides()[i+1];
586  idxval[ii].div = ts.masks()[mi].strides()[i];
587  }
588  }
589  }
590 
591  /* check for opportunities to vectorize the loops with the mti
592  (assuming regular strides etc.)
593  */
594  vectorized_strides_.resize(N); vectorized_size_ = 0;
595  std::fill(vectorized_strides_.begin(), vectorized_strides_.end(), 0);
596  vectorized_pr_dim = index_type(pri.size());
597  for (vectorized_pr_dim = index_type(pri.size()-1); vectorized_pr_dim != index_type(-1); vectorized_pr_dim--) {
598  std::vector<packed_range_info>::const_iterator pp = pri.begin() + vectorized_pr_dim;
599  if (vectorized_pr_dim == pri.size()-1) {
600  if (pp->have_regular_strides.count() == N) vectorized_size_ = pp->range;
601  for (dim_type n=pp->n; n < N; ++n) {
602  GMM_ASSERT3(n < pp->inc.size(), ""); // expected failure here with "empty" tensors, see tests/test_assembly.cc, testbug() function.
603  vectorized_strides_[n] = pp->inc[n];
604  }
605  } else {
606  if (pp->have_regular_strides.count() != N) break;
607  bool still_ok = true;
608  for (dim_type n=pp->n; n < N; ++n) {
609  if (stride_type(vectorized_strides_[n]*vectorized_size_) != pp->inc[n]) still_ok = false;
610  }
611  if (still_ok) {
612  vectorized_size_ *= pp->range;
613  } else break;
614  }
615  }
616 
617  it.resize(N); pit0.resize(N); itbase.resize(N);
618  for (dim_type n=0; n < N; ++n) {
619  pit0[n]=trtab[n].pbase();
620  itbase[n]=trtab[n].base_shift();
621  }
622  rewind();
623  }
624 
625  void multi_tensor_iterator::print() const {
626  cout << "MTI(N=" << N << "): ";
627  for (dim_type i=0; i < pr.size(); ++i)
628  cout << " pri[" << int(i) << "]: n=" << int(pri[i].n)
629  << ", range=" << pri[i].range << ", mean_increm="
630  << pri[i].mean_increm << ", regular = " << pri[i].have_regular_strides
631  << ", inc=" << vref(pri[i].inc) << "\n";
632  cout << "bloc_rank: " << vref(bloc_rank) << ", bloc_nelt: " << vref(bloc_nelt) << "\n";
633  cout << "vectorized_size : " << vectorized_size_ << ", strides = " << vref(vectorized_strides_) << ", pr_dim=" << vectorized_pr_dim << "\n";
634  }
635 
636  void tensor_reduction::insert(const tensor_ref& tr_, const std::string& s) {
637  tensor_shape ts(tr_);
638  diag_shape(ts,s);
639  trtab.push_back(tref_or_reduction(tensor_ref(tr_, ts), s));
640  //cout << "reduction.insert('" << s << "')\n";
641  //cout << "Reduction: INSERT tr(ndim=" << int(tr_.ndim()) << ", s='" << s << "')\n";
642  //cout << "Content: " << tr_ << endl;
643  //cout << "shape: " << (tensor_shape&)tr_ << endl;
644  }
645 
646  void tensor_reduction::insert(const tref_or_reduction& t, const std::string& s) {
647  if (!t.is_reduction()) {
648  insert(t.tr(),s);
649  } else {
650  trtab.push_back(t); trtab.back().ridx = s;
651  //cout << "reduction.insert_reduction('" << s << "')\n";
652  //cout << "Reduction: INSERT REDUCTION tr(ndim=" << int(t.tr().ndim()) << ", s='" << s << "')\n";
653  }
654  }
655 
656  /* ensure that r(i,i).t(j,:,j,j,k,o)
657  becomes r(i,A).t(j,:,B,C,k,o)
658  and updates reduction_chars accordingly
659  */
660  void tensor_reduction::update_reduction_chars() {
661  reduction_chars.clear();
662  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
663  assert(it->ridx.size() == it->tr().ndim());
664  for (unsigned i =0; i < it->ridx.size(); ++i) {
665  if (it->ridx[i] != ' ' &&
666  reduction_chars.find(it->ridx[i]) == std::string::npos)
667  reduction_chars.push_back(it->ridx[i]);
668  }
669  }
670  /* for each tensor, if a diagonal reduction inside the tensor is used,
671  the mask of the tensor has been 'and'-ed with a diagonal mask
672  and a second 'virtual' reduction index is used */
673  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
674  it->gdim.resize(it->ridx.size());
675  for (unsigned i =0; i < it->ridx.size(); ++i) {
676  char c = it->ridx[i];
677  if (c != ' ' && it->ridx.find(c) != i) {
678  for (c = 'A'; c <= 'Z'; ++c)
679  if (reduction_chars.find(c) == std::string::npos) break;
680  it->ridx[i] = c;
681  reduction_chars.push_back(it->ridx[i]);
682  }
683  }
684  }
685  }
686 
687  /*
688  initialize 'reduced_range' and it->rdim
689  */
690  void tensor_reduction::pre_prepare() {
691  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
692  assert(it->ridx.size() == it->tr().ndim());
693  it->rdim.resize(it->ridx.size());
694  //cout << " rix = '" << it->ridx << "'\n";
695  for (dim_type i =0; i < it->ridx.size(); ++i) {
696  if (it->ridx[i] == ' ') {
697  reduced_range.push_back(it->tr().dim(i));
698  it->rdim[i] = dim_type(reduced_range.size()-1);
699  } else it->rdim[i] = dim_type(-1);
700  }
701  }
702  }
703 
704  /* look for a possible sub-reduction on a subset of the tensors.
705  returns the subset, and the list of concerned reduction indexes */
706  size_type tensor_reduction::find_best_sub_reduction(dal::bit_vector &best_lst, std::string &best_idxset) {
707  dal::bit_vector lst;
708  std::string idxset;
709  best_lst.clear(); best_idxset.clear();
710 
711  update_reduction_chars();
712 
713  //cout << "find_best_reduction: reduction_chars='" << reduction_chars << "'\n";
714  GMM_ASSERT2(trtab.size() <= 32, "wow it was assumed that nobody would "
715  "ever need a reduction on more than 32 tensors..");
716 
717  std::vector<std::bitset<32> > idx_occurences(reduction_chars.size());
718 
719  for (unsigned ir=0; ir < reduction_chars.size(); ++ir) {
720  char c = reduction_chars[ir];
721  for (unsigned tnum=0; tnum < trtab.size(); ++tnum)
722  idx_occurences[ir][tnum] = (trtab[tnum].ridx.find(c) != std::string::npos);
723  //cout << "find_best_reduction: idx_occurences[" << ir << "] = " << idx_occurences[ir] << "\n";
724  }
725  size_type best_redsz = 100000000;
726  for (unsigned ir=0; ir < reduction_chars.size(); ++ir) {
727  lst.clear(); lst.add(ir);
728  idxset.resize(0); idxset.push_back(reduction_chars[ir]);
729  /* add other possible reductions */
730  for (unsigned ir2=0; ir2 < reduction_chars.size(); ++ir2) {
731  if (ir2 != ir) {
732  if ((idx_occurences[ir2] | idx_occurences[ir]) == idx_occurences[ir]) {
733  lst.add(ir2);
734  idxset.push_back(reduction_chars[ir2]);
735  }
736  }
737  }
738  /* evaluate the cost */
739  size_type redsz = 1;
740  for (unsigned tnum=0; tnum < trtab.size(); ++tnum) {
741  if (!idx_occurences[ir][tnum]) continue;
742  std::bitset<int(32)> once((int)reduction_chars.size());
743  for (dim_type i=0; i < trtab[tnum].tr().ndim(); ++i) {
744  bool ignore = false;
745  for (dal::bv_visitor j(lst); !j.finished(); ++j) {
746  if (trtab[tnum].ridx[i] == reduction_chars[(size_t)j]) {
747  if (once[j]) ignore = true; else once[j] = true;
748  }
749  }
750  if (!ignore)
751  redsz *= trtab[tnum].tr().dim(i);
752  }
753  }
754  //cout << " test " << reduction_chars[ir] << ": lst=" << lst << ", redsz=" << redsz << "\n";
755  if (redsz < best_redsz) {
756  best_redsz = redsz;
757  best_lst.clear();
758  for (unsigned i=0; i < trtab.size(); ++i)
759  if (idx_occurences[ir][i]) best_lst.add(i);
760  best_idxset = idxset;
761  }
762  }
763  /*cout << "find_best_reduction: lst = " << best_lst << " [nt="
764  << trtab.size() << "], idx_set='" << best_idxset
765  << "', redsz=" << best_redsz << "\n";
766  */
767  return best_redsz;
768  }
769 
770  void tensor_reduction::make_sub_reductions() {
771  dal::bit_vector bv; std::string red;
772  int iter = 1;
773  do {
774  find_best_sub_reduction(bv,red);
775  if (bv.card() < trtab.size() && red.size()) {
776  //cout << "making sub reduction\n";
777  auto sub = std::make_shared<tensor_reduction>();
778  std::vector<dim_type> new_rdim; new_rdim.reserve(16);
779  std::string new_reduction;
780  for (dal::bv_visitor tnum(bv); !tnum.finished(); ++tnum) {
781  tref_or_reduction &t = trtab[tnum];
782  std::string re = t.ridx; t.ridx.clear();
783  for (unsigned i = 0; i < re.size(); ++i) {
784  bool reduced = false;
785  char c = re[i];
786  if (c != ' ') {
787  if (red.find(re[i]) == std::string::npos) c = ' ';
788  else reduced = true;
789  }
790  if (!reduced) {
791  t.ridx.push_back(re[i]);
792  new_rdim.push_back(t.rdim[i]);
793  new_reduction.push_back(re[i]);
794  }
795  re[i] = c;
796  }
797  //cout << " sub-";
798  sub->insert(trtab[tnum], re);
799  }
800  //cout << " new_reduction = '" << new_reduction << "'\n";
801  sub->prepare();
802  /*cout << " " << new_reduction.size() << " == " << int(sub->trres.ndim()) << "?\n";
803  assert(new_reduction.size() == sub->trres.ndim());*/
804  trtab[bv.first_true()] = tref_or_reduction(sub, new_reduction);
805  trtab[bv.first_true()].rdim = new_rdim;
806  std::vector<tref_or_reduction> trtab2; trtab2.reserve(trtab.size() - bv.card() + 1);
807  for (size_type i=0; i < trtab.size(); ++i)
808  if (!bv.is_in(i) || i == bv.first_true())
809  trtab2.push_back(trtab[i]);
810  trtab.swap(trtab2);
811  //cout << "make_sub_reductions[" << iter << "] : still " << trtab.size() << " tensors\n";
812  /*for (size_type i=0; i < trtab.size(); ++i)
813  cout << " dim = " << trtab[i].tr().ndim() << " : '" << trtab[i].ridx << "'\n";
814  */
815  ++iter;
816  } else {
817  //cout << "Ok, no more reductions (bv.card() == " << bv.card() << ")\n\n";
818  break;
819  }
820  } while (1);
821  }
822 
823  void tensor_reduction::prepare(const tensor_ref* tr_out) {
824  pre_prepare();
825  make_sub_reductions();
826 
827  /* create the output tensor */
828  if (tr_out == NULL) {
829  trres = tensor_ref(reduced_range);
830  out_data.resize(trres.card());
831  pout_data = &out_data[0];
832  trres.set_base(pout_data);
833  } else {
834  GMM_ASSERT3(tr_out->ndim() == reduced_range.size(), "");
835  for (dim_type i=0; i < tr_out->ndim(); ++i)
836  GMM_ASSERT3(tr_out->dim(i) == reduced_range[i], "");
837  trres = *tr_out;
838  }
839 
840  /* prepare the mapping from each dimension of each tensor to the global range
841  (the first dimensions are reserved for non-reduced dimensions, i.e. those
842  of 'reduced_range'
843  */
844  tensor_ranges global_range; /* global range across all tensors of the
845  reduction */
846  std::string global_chars; /* name of indexes (or ' ') for each dimension
847  of global_range */
848  global_range.reserve(16);
849  global_range.assign(reduced_range.begin(), reduced_range.end());
850  global_chars.insert(size_type(0), reduced_range.size(), ' ');
851  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
852  assert(it->rdim.size() == it->tr().ndim());
853  it->gdim = it->rdim;
854  for (dim_type i=0; i < it->ridx.size(); ++i) {
855  if (it->rdim[i] == dim_type(-1)) {
856  assert(it->ridx[i] != ' ');
857  std::string::size_type p = global_chars.find(it->ridx[i]);
858  if (p == std::string::npos) {
859  global_chars.push_back(it->ridx[i]);
860  global_range.push_back(it->tr().dim(i));
861  it->gdim[i] = dim_type(global_range.size() - 1);
862  } else {
863  GMM_ASSERT1(it->tr().dim(i) == global_range[p],
864  "inconsistent dimensions for reduction index "
865  << it->ridx[i] << "(" << int(it->tr().dim(i))
866  << " != " << int(global_range[p]) << ")");
867  it->gdim[i] = dim_type(p);
868  }
869  }
870  }
871  //cout << " rdim = " << it->rdim << ", gdim = " << it->gdim << "\n";
872  }
873  //cout << "global_range = " << global_range << ", global_chars = '" << global_chars << "'\n";
874 
875  std::vector<dim_type> reorder(global_chars.size(), dim_type(-1));
876  /* increase the dimension of the tensor holding the result */
877  for (dim_type i=0; i < reduced_range.size(); ++i) reorder[i] = i;
878  //cout << "reorder = '" << reorder << "'\n";
879  trres.permute(reorder);
880  std::vector<tensor_ref> tt; tt.reserve(trtab.size()+1);
881  tt.push_back(trres);
882 
883  /* permute all tensors (and increase their number of dimensions) */
884  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
885  std::fill(reorder.begin(), reorder.end(), dim_type(-1));
886  for (dim_type i=0; i < it->gdim.size(); ++i) {
887  reorder[it->gdim[i]] = i;
888  }
889  //cout << "reorder = '" << reorder << "'\n";
890  it->tr().permute(reorder);
891  tt.push_back(it->tr());
892  //cout << "MTI[" << it-trtab.begin() << "/" << trtab.size() << "] : " << (tensor_shape&)it->tr();
893  }
894 
895  /* now, the iterator can be built */
896  mti.init(tt,false);
897  }
898 
899  static void do_reduction1(bgeot::multi_tensor_iterator &mti) {
900  do {
901  scalar_type s1 = 0;
902  do {
903  s1 += mti.p(1);
904  } while (mti.bnext(1));
905  mti.p(0) += s1;
906  } while (mti.bnext(0));
907  }
908 
909  static bool do_reduction2v(bgeot::multi_tensor_iterator &mti) {
910  long n = mti.vectorized_size();
911  const std::vector<stride_type> &s = mti.vectorized_strides();
912  if (n && s[0] && s[1] && s[2] == 0) {
913  long incx = s[1], incy = s[0];
914  /*mti.print();
915  scalar_type *b[3];
916  for (int i=0; i < 3; ++i) b[i] = &mti.p(i);*/
917  do {
918  /*cout << "vectorized_ reduction2a : n=" << n << ", s = " << s << " mti.p=" << &mti.p(0)-b[0] << ","
919  << &mti.p(1)-b[1] << "," << &mti.p(2)-b[2] << "\n";*/
920  gmm::daxpy_(&n, &mti.p(2), &mti.p(1), &incx, &mti.p(0), &incy);
921  } while (mti.vnext());
922  return true;
923  } else return false;
924  }
925  static void do_reduction2a(bgeot::multi_tensor_iterator &mti) {
926  if (!do_reduction2v(mti)) {
927  do {
928  mti.p(0) += mti.p(1)*mti.p(2);
929  } while (mti.bnext(0));
930  }
931  }
932 
933  static void do_reduction2b(bgeot::multi_tensor_iterator &mti) {
934  do {
935  scalar_type s1 = 0;
936  do {
937  scalar_type s2 = 0;
938  do {
939  s2 += mti.p(2);
940  } while (mti.bnext(2));
941  s1 += mti.p(1)*s2;
942  } while (mti.bnext(1));
943  mti.p(0) += s1;
944  } while (mti.bnext(0));
945  }
946 
947  static bool do_reduction3v(bgeot::multi_tensor_iterator &mti) {
948  long n = mti.vectorized_size();
949  const std::vector<stride_type> &s = mti.vectorized_strides();
950  if (n && s[0] && s[1] && s[2] == 0 && s[3] == 0) {
951  long incx = s[1], incy = s[0];
952  do {
953  double v = mti.p(2)*mti.p(3);
954  gmm::daxpy_(&n, &v, &mti.p(1), &incx, &mti.p(0), &incy);
955  } while (mti.vnext());
956  return true;
957  } else return false;
958  }
959 
960  static void do_reduction3a(bgeot::multi_tensor_iterator &mti) {
961  if (!do_reduction3v(mti)) {
962  do {
963  mti.p(0) += mti.p(1)*mti.p(2)*mti.p(3);
964  } while (mti.bnext(0));
965  }
966  }
967 
968  static void do_reduction3b(bgeot::multi_tensor_iterator &mti) {
969  do {
970  scalar_type s1 = 0;
971  do {
972  scalar_type s2 = 0;
973  do {
974  scalar_type s3 = 0;
975  do {
976  s3 += mti.p(3);
977  } while (mti.bnext(3));
978  s2 += mti.p(2)*s3;
979  } while (mti.bnext(2));
980  s1 += mti.p(1)*s2;
981  } while (mti.bnext(1));
982  mti.p(0) += s1;
983  } while (mti.bnext(0));
984  }
985 
986  void tensor_reduction::do_reduction() {
987  /* on s'assure que le tenseur destination a bien ete remis a zero
988  avant le calcul (c'est obligatoire malheureusement, consequence
989  de l'utilisation de masque qui ne s'arretent pas forcement sur les
990  'frontieres' entre les differents tenseurs reduits) */
991  //std::fill(out_data.begin(), out_data.end(), 0.);
992  if (out_data.size()) memset(&out_data[0], 0, out_data.size()*sizeof(out_data[0]));
993  for (unsigned i=0; i < trtab.size(); ++i) {
994  if (trtab[i].is_reduction()) {
995  trtab[i].reduction->do_reduction();
996  trtab[i].reduction->result(trtab[i].tr());
997  //cout << "resultat intermediaire: " << trtab[i].tr() << endl;
998  }
999  }
1000  mti.rewind();
1001  dim_type N = dim_type(trtab.size());
1002  if (N == 1) {
1003  do_reduction1(mti);
1004  } else if (N == 2) {
1005  if (!mti.bnext_useful(2) && !mti.bnext_useful(1)) {
1006  do_reduction2a(mti);
1007  } else {
1008  do_reduction2b(mti);
1009  }
1010  } else if (N == 3) {
1011  if (!mti.bnext_useful(1) && (!mti.bnext_useful(2)) && !mti.bnext_useful(3)) {
1012  do_reduction3a(mti);
1013  } else {
1014  do_reduction3b(mti);
1015  }
1016  } else if (N == 4) {
1017  do {
1018  scalar_type s1 = 0;
1019  do {
1020  scalar_type s2 = 0;
1021  do {
1022  scalar_type s3 = 0;
1023  do {
1024  scalar_type s4 = 0;
1025  do {
1026  s4 += mti.p(4);
1027  } while (mti.bnext(4));
1028  s3 += mti.p(3)*s4;
1029  } while (mti.bnext(3));
1030  s2 += mti.p(2)*s3;
1031  } while (mti.bnext(2));
1032  s1 += mti.p(1)*s2;
1033  } while (mti.bnext(1));
1034  mti.p(0) += s1;
1035  } while (mti.bnext(0));
1036  } else {
1037  GMM_ASSERT1(false, "unhandled reduction case ! (N=" << int(N) << ")");
1038  }
1039  }
1040 
1041  void tensor_reduction::clear() {
1042  trtab.clear();
1043  trres.clear();
1044  reduced_range.resize(0);
1045  reduction_chars.clear();
1046 
1047  out_data.resize(0);
1048  pout_data = 0; trtab.reserve(10);
1049  mti.clear();
1050  }
1051 
1052 
1053  void tensor_mask::print(std::ostream &o) const {
1054  index_type c=card(true);
1055  check_assertions();
1056  o << " mask : card=" << c << "(card_=" << card_ << ", uptodate=" << card_uptodate << "), indexes=";
1057  for (dim_type i=0; i < idxs.size(); ++i)
1058  o << (i==0?"":", ") << int(idxs[i]) << ":" << int(r[i]);
1059  o << " ";
1060  if (c == size()) o << " FULL" << endl;
1061  else {
1062  o << "m={";
1063  if (idxs.size() == 1) {
1064  for (index_type i=0; i < m.size(); ++i) o << (m[i] ? 1 : 0);
1065  } else {
1066  for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
1067  if (l.cnt[0] == 0 && l.cnt[1] == 0 && r.size()>2) {
1068  o << "\n -> (:,:";
1069  for (dim_type i=2; i < r.size(); ++i) o << "," << l.cnt[i];
1070  o << ")={";
1071  }
1072  o << (m[lpos(l.cnt)] ? 1 : 0);
1073  if (l.cnt[0] == r[0]-1) {
1074  if (l.cnt[1] != r[1]-1) o << ",";
1075  else if (idxs.size() > 2) o << "}";
1076  }
1077  }
1078  }
1079  o << "}" << endl;
1080  }
1081  }
1082 
1083 
1084 
1085  void tensor_shape::print(std::ostream& o) const {
1086  o << " tensor_shape: n=" << idx2mask.size() << ", idx2mask=";
1087  for (dim_type i=0; i < idx2mask.size(); ++i) {
1088  if (i) o << ",";
1089  if (idx2mask[i].is_valid()) {
1090  o << "r" << dim(i) << ":m" << int(idx2mask[i].mask_num) << "/" << int(idx2mask[i].mask_dim);
1091  } else o << " (na) ";
1092  }
1093  o << endl;
1094  // o << " masks[1.."<< masks_.size() << "]={" << endl;
1095  for (dim_type i=0; i < masks_.size(); ++i) o << masks_[i];
1096  o << " ^-- end tensor_shape" << endl;
1097  }
1098 
1099  void tensor_ref::print(std::ostream& o) const {
1100  o << "tensor_ref, n=" << int(ndim()) << ", card=" << card(true) << ", base=" << base() << endl;
1101  for (dim_type i=0; i < strides().size(); ++i) {
1102  o << " * strides["<<i<<"]={";
1103  for (size_type j=0; j < strides()[i].size(); ++j) o << (j>0?",":"") << strides()[i][j];
1104  o << "}" << endl;
1105  }
1106  multi_tensor_iterator mti(*this, true);
1107  do {
1108  for (dim_type i = 0; i < mti.ndim(); ++i) {
1109  o << (i==0?"(":",");
1110  if (index_is_valid(i))
1111  o << mti.index(i);
1112  else o << "*";
1113  }
1114  o << ")";
1115  if (base()) {
1116  o << " = " << mti.p(0) << "\t@base+" << &mti.p(0) - base();
1117  } else o << "\t@" << size_t(&mti.p(0))/sizeof(scalar_type);
1118  o << endl;
1119  } while (mti.qnext1());
1120 
1121  o << "^---- end tensor_ref" << endl;
1122  }
1123 
1124  std::ostream& operator<<(std::ostream& o, const tensor_mask& m) {
1125  m.print(o); return o;
1126  }
1127  std::ostream& operator<<(std::ostream& o, const tensor_shape& ts) {
1128  ts.print(o); return o;
1129  }
1130  std::ostream& operator<<(std::ostream& o, const tensor_ref& tr) {
1131  tr.print(o); return o;
1132  }
1133  void print_tm(const tensor_mask& tm) { tm.print_(); }
1134  void print_ts(const tensor_shape& ts) { ts.print_(); }
1135  void print_tr(const tensor_ref& tr) { tr.print_(); }
1136 }
std::ostream & operator<<(std::ostream &o, const convex_structure &cv)
Print the details of the convex structure cvs to the output stream o.
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:49
Sparse tensors, used during the assembly.
void resize(V &v, size_type n)
*/
Definition: gmm_blas.h:209
void clear(L &l)
clear (fill with zeros) a vector or matrix.
Definition: gmm_blas.h:59
gmm interface for fortran BLAS.
Basic Geometric Tools.