GetFEM++  5.3
bgeot_sparse_tensors.h
Go to the documentation of this file.
1 /* -*- c++ -*- (enables emacs c++ mode) */
2 /*===========================================================================
3 
4  Copyright (C) 2000-2017 Julien Pommier
5 
6  This file is a part of GetFEM++
7 
8  GetFEM++ is free software; you can redistribute it and/or modify it
9  under the terms of the GNU Lesser General Public License as published
10  by the Free Software Foundation; either version 3 of the License, or
11  (at your option) any later version along with the GCC Runtime Library
12  Exception either version 3.1 or (at your option) any later version.
13  This program is distributed in the hope that it will be useful, but
14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16  License and GCC Runtime Library Exception for more details.
17  You should have received a copy of the GNU Lesser General Public License
18  along with this program; if not, write to the Free Software Foundation,
19  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
20 
21  As a special exception, you may use this file as it is a part of a free
22  software library without restriction. Specifically, if other files
23  instantiate templates or use macros or inline functions from this file,
24  or you compile this file and link it with other files to produce an
25  executable, this file does not by itself cause the resulting executable
26  to be covered by the GNU Lesser General Public License. This exception
27  does not however invalidate any other reasons why the executable file
28  might be covered by the GNU Lesser General Public License.
29 
30 ===========================================================================*/
31 
32 /**@file bgeot_sparse_tensors.h
33  @author Julien Pommier <Julien.Pommier@insa-toulouse.fr>
34  @date January 2003.
35  @brief Sparse tensors, used during the assembly.
36 
37  "sparse" tensors: these are not handled like sparse matrices
38 
39  As an example, let say that we have a tensor t(i,j,k,l) of
40  dimensions 4x2x3x3, with t(i,j,k,l!=k) == 0.
41 
42  Then the tensor shape will be represented by a set of 3 objects of type
43  'tensor_mask':
44  mask1: {i}, "1111"
45  mask2: {j}, "11"
46  mask3: {k,l}, "100"
47  "010"
48  "001"
49  They contain a binary tensor indicating the non-null elements.
50 
51  The set of these three masks define the shape of the tensor
52  (class tensor_shape)
53 
54  If we add information about the location of the non-null elements
55  (by mean of strides), then we have an object of type 'tensor_ref'
56 
57  Iteration on the data of one or more tensor should be done via the
58  'multi_tensor_iterator', which can iterate over common non-null
59  elements of a set of tensors.
60 
61 
62  maximum (virtual) number of elements in a tensor : 2^31
63  maximum number of dimensions : 254
64 
65  "ought to be enough for anybody"
66 */
67 #ifndef BGEOT_SPARSE_TENSORS
68 #define BGEOT_SPARSE_TENSORS
69 
70 #include "gmm/gmm_except.h"
71 #include "bgeot_config.h"
72 #include "dal_bit_vector.h"
73 // #include "gmm/gmm_kernel.h" // for i/o on vectors it is convenient
74 #include <iostream>
75 #include <bitset>
76 
77 namespace bgeot {
78  typedef gmm::uint32_type index_type;
79  typedef gmm::int32_type stride_type; /* signé! */
80 
81  // typedef std::vector<index_type> tensor_ranges;
82  class tensor_ranges : public std::vector<index_type> {
83  public:
84  tensor_ranges() : std::vector<index_type>() {}
85  tensor_ranges(size_type n) : std::vector<index_type>(n) {}
86  tensor_ranges(size_type n, index_type V) : std::vector<index_type>(n,V) {}
87  bool is_zero_size() const
88  {
89  for (dim_type i=0; i < this->size(); ++i)
90  if ((*this)[i] == 0)
91  return true;
92  return false;
93  }
94  };
95  typedef std::vector<stride_type> tensor_strides;
96  typedef std::vector<dim_type> index_set;
97 
98  typedef scalar_type * TDIter;
99 
100  std::ostream& operator<<(std::ostream& o, const tensor_ranges& r);
101 
102  /* stupid && inefficient loop structure */
103  struct tensor_ranges_loop {
104  tensor_ranges sz;
105  tensor_ranges cnt;
106  bool finished_;
107  public:
108  tensor_ranges_loop(const tensor_ranges& t) : sz(t), cnt(t.size()), finished_(t.size() == 0) {
109  std::fill(cnt.begin(), cnt.end(), 0);
110  }
111  index_type index(dim_type i) { return cnt[i]; }
112  bool finished() const { return finished_; }
113  bool next() {
114  index_type i = 0;
115  while (++cnt[i] >= sz[i]) {
116  cnt[i] = 0; i++; if (i >= sz.size()) { finished_ = true; break; }
117  }
118  return finished_;
119  }
120  };
121 
122  /* handle a binary mask over a given number of indices */
123  class tensor_mask {
124  tensor_ranges r;
125  index_set idxs;
126  std::vector<bool> m;
127  tensor_strides s; /* strides in m */
128  mutable index_type card_; /* finally i should have kept m as a dal::bit_vector ... */
129  mutable bool card_uptodate;
130  public:
131  tensor_mask() { set_card(0); }
132  explicit tensor_mask(const tensor_ranges& r_, const index_set& idxs_) {
133  assign(r_,idxs_);
134  }
135  /* constructeur par fusion */
136  explicit tensor_mask(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op);
137  explicit tensor_mask(const std::vector<const tensor_mask*>& tm);
138  explicit tensor_mask(const std::vector<const tensor_mask*> tm1,
139  const std::vector<const tensor_mask*> tm2, bool and_op);
140  void swap(tensor_mask &tm) {
141  r.swap(tm.r); idxs.swap(tm.idxs);
142  m.swap(tm.m); s.swap(tm.s);
143  std::swap(card_, tm.card_);
144  std::swap(card_uptodate, tm.card_uptodate);
145  }
146  void assign(const tensor_ranges& r_, const index_set& idxs_) {
147  r = r_; idxs = idxs_; eval_strides(); m.assign(size(),false);
148  set_card(0);
149  }
150  void assign(const tensor_mask& tm) {
151  r = tm.r;
152  idxs = tm.idxs;
153  m = tm.m;
154  s = tm.s;
155  card_ = tm.card_; card_uptodate = tm.card_uptodate;
156  }
157  void assign(const std::vector<const tensor_mask* >& tm);
158  void assign(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op);
159 
160  void clear() { r.resize(0); idxs.resize(0); m.clear(); s.resize(0); set_card(0); }
161  const tensor_ranges& ranges() const { return r; }
162  const index_set& indexes() const { return idxs; }
163  const tensor_strides& strides() const { return s; }
164  index_set& indexes() { return idxs; }
165  void eval_strides() {
166  s.resize(r.size()+1); s[0]=1;
167  for (index_type i=0; i < r.size(); ++i) {
168  s[i+1]=s[i]*r[i];
169  }
170  }
171  index_type ndim() const { return index_type(r.size()); }
172  index_type size() const { return s[r.size()]; }
173  void set_card(index_type c) const { card_ = c; card_uptodate = true; }
174  void unset_card() const { card_uptodate = false; }
175  index_type card(bool just_look=false) const {
176  if (!card_uptodate || just_look) {
177  index_type c = index_type(std::count_if(m.begin(), m.end(),
178  std::bind2nd(std::equal_to<bool>(),true)));
179  if (just_look) return c;
180  card_ = c;
181  }
182  return card_;
183  }
184  index_type pos(tensor_ranges& global_r) const {
185  index_type p = 0;
186  for (index_type i=0; i < r.size(); ++i)
187  p+= s[i]*global_r[idxs[i]];
188  return p;
189  }
190  index_type lpos(tensor_ranges& local_r) const {
191  index_type p = 0;
192  for (index_type i=0; i < r.size(); ++i)
193  p+= s[i]*local_r[i];
194  return p;
195  }
196  bool operator()(tensor_ranges& global_r) const {
197  return m[pos(global_r)];
198  }
199  bool operator()(stride_type p) const { return m[p]; }
200  void set_mask_val(stride_type p, bool v) { m[p]=v; card_uptodate = false; }
201  struct Slice {
202  dim_type dim;
203  index_type i0;
204  Slice(dim_type d, index_type i0_) : dim(d), i0(i0_) {}
205  };
206 
207  /* cree un masque de tranche */
208  void set_slice(index_type dim, index_type range, index_type islice) {
209  r.resize(1); r[0] = range;
210  idxs.resize(1); idxs[0] = dim_type(dim);
211  m.clear(); m.assign(range,false); m[islice] = 1; set_card(1);
212  eval_strides();
213  }
214  explicit tensor_mask(index_type range, Slice slice) {
215  set_slice(slice.dim, range, slice.i0);
216  }
217 
218  struct Diagonal {
219  dim_type i0, i1;
220  Diagonal(dim_type i0_, dim_type i1_) : i0(i0_), i1(i1_) {}
221  };
222 
223  /* cree un masque diagonal */
224  void set_diagonal(index_type n, index_type i0, index_type i1) {
225  assert(n);
226  r.resize(2); r[0] = r[1] = n;
227  idxs.resize(2); idxs[0] = dim_type(i0); idxs[1] = dim_type(i1);
228  m.assign(n*n, false);
229  for (index_type i=0; i < n; ++i) m[n*i+i]=true;
230  set_card(n);
231  eval_strides();
232  }
233  explicit tensor_mask(index_type n, Diagonal diag) {
234  set_diagonal(n, diag.i0, diag.i1);
235  }
236  void set_triangular(index_type n, index_type i0, index_type i1) {
237  assert(n);
238  r.resize(2); r[0] = r[1] = n;
239  idxs.resize(2); idxs[0] = dim_type(i0); idxs[1] = dim_type(i1);
240  m.assign(n*n,false); unset_card();
241  for (index_type i=0; i < n; ++i)
242  for (index_type j=i; j < n; ++j) m[i*n+j]=true;
243  eval_strides();
244  }
245  void set_full(index_type dim, index_type range) {
246  // assert(range); // not sure if permitting range==0 can have any side effects
247  r.resize(1); r[0] = range;
248  idxs.resize(1); idxs[0] = dim_type(dim);
249  m.assign(range, true); set_card(range);
250  eval_strides();
251  }
252  void set_empty(index_type dim, index_type range) {
253  // assert(range); // not sure if permitting range==0 can have any side effects
254  r.resize(1); r[0] = range;
255  idxs.resize(1); idxs[0] = dim_type(dim);
256  m.assign(range,false); set_card(0);
257  eval_strides();
258  }
259  explicit tensor_mask(index_type dim, index_type range) {
260  set_full(dim, range);
261  }
262  void set_zero() {
263  m.assign(size(),false); set_card(0);
264  }
265  void shift_dim_num_ge(dim_type dim, int shift) {
266  for (dim_type i=0; i < idxs.size(); ++i) {
267  if (idxs[i] >= dim) idxs[i] = dim_type(idxs[i] + shift);
268  }
269  check_assertions();
270  }
271  void gen_mask_pos(tensor_strides& p) const {
272  check_assertions();
273  p.resize(card());
274  index_type i = 0;
275  for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
276  if (m[lpos(l.cnt)]) p[i++] = lpos(l.cnt);
277  }
278  assert(i==card());
279  }
280  void unpack_strides(const tensor_strides& packed, tensor_strides& unpacked) const;
281 
282  /* c'est mieux que celle ci renvoie un int ..
283  ou alors un unsigned mais dim_type c'est dangereux */
284  int max_dim() const {
285  index_set::const_iterator it = std::max_element(idxs.begin(),idxs.end());
286  return (it == idxs.end() ? -1 : *it);
287  }
288  void check_assertions() const;
289  void print(std::ostream &o) const;
290  void print_() const { print(cerr); }
291  };
292 
293 
294 
295  typedef std::vector<tensor_mask> tensor_mask_container;
296 
297  struct tensor_index_to_mask {
298  short_type mask_num;
299  short_type mask_dim;
300  tensor_index_to_mask() : mask_num(short_type(-1)),
301  mask_dim(short_type(-1)) {}
302  bool is_valid() { return mask_num != short_type(-1) &&
303  mask_dim != short_type(-1); }
304  };
305 
306 
307  /*
308  defini une "forme" de tenseur creux
309  la fonction merge permet de faire des unions / intersections entre ces formes
310  */
311  class tensor_shape {
312  mutable std::vector<tensor_index_to_mask> idx2mask;
313  tensor_mask_container masks_;
314 
315  /* verifie si un masque est completement vide,
316  si c'est le cas alors tous les autres masques sont vidés
317  (le tenseur est identiquement nul) */
318  void check_empty_mask() {
319  if (card() == 0) {
320  for (dim_type i=0; i < masks_.size(); ++i) {
321  masks_[i].set_zero();
322  }
323  }
324  }
325 
326  static void find_linked_masks(dim_type mnum, const tensor_shape &ts1, const tensor_shape &ts2,
327  dal::bit_vector& treated1, dal::bit_vector& treated2,
328  std::vector<const tensor_mask*>& lstA,
329  std::vector<const tensor_mask*>& lstB) {
330  // gare aux boucles infinies si aucun des indices n'est valide
331  assert(mnum < ts1.masks().size());
332  assert(!treated1[mnum]);
333  treated1.add(mnum);
334  lstA.push_back(&ts1.mask(mnum));
335  for (dim_type i=0; i < ts1.mask(mnum).indexes().size(); ++i) {
336  dim_type ii = ts1.mask(mnum).indexes()[i];
337  if (ts2.index_is_valid(ii) && !treated2[ts2.index_to_mask_num(ii)])
338  find_linked_masks(ts2.index_to_mask_num(ii),ts2,ts1,treated2,treated1,lstB,lstA);
339  }
340  }
341 
342  protected:
343  dim_type index_to_mask_num(dim_type ii) const {
344  if (index_is_valid(ii))
345  return dim_type(idx2mask[ii].mask_num); else return dim_type(-1);
346  }
347  public:
348  void clear() { masks_.resize(0); idx2mask.resize(0); }
349  void swap(tensor_shape& ts) {
350  idx2mask.swap(ts.idx2mask);
351  masks_.swap(ts.masks_);
352  }
353  dim_type ndim() const { return dim_type(idx2mask.size()); }
354  bool index_is_valid(dim_type ii) const {
355  assert(ii < idx2mask.size()); return idx2mask[ii].is_valid();
356  }
357  const tensor_mask& index_to_mask(dim_type ii) const {
358  assert(index_is_valid(ii)); return masks_[idx2mask[ii].mask_num];
359  }
360  dim_type index_to_mask_dim(dim_type ii) const {
361  assert(index_is_valid(ii)); return dim_type(idx2mask[ii].mask_dim);
362  }
363  index_type dim(dim_type ii) const
364  { assert(index_is_valid(ii)); return index_to_mask(ii).ranges()[index_to_mask_dim(ii)];
365  }
366  tensor_mask_container& masks() { return masks_; }
367  const tensor_mask_container& masks() const { return masks_; }
368  const tensor_mask& mask(dim_type i) const { assert(i<masks_.size()); return masks_[i]; }
369  stride_type card(bool just_look=false) const {
370  stride_type n = 1;
371  for (dim_type i=0; i < masks().size(); ++i)
372  n *= masks()[i].card(just_look);
373  return n;
374  }
375  void push_mask(const tensor_mask& m) { masks_.push_back(m); update_idx2mask(); }
376  void remove_mask(dim_type mdim) { /* be careful with this function.. remove
377  only useless mask ! */
378  masks_.erase(masks_.begin()+mdim);
379  update_idx2mask();
380  }
381  void remove_unused_dimensions() {
382  dim_type nd = 0;
383  for (dim_type i=0; i < ndim(); ++i) {
384  if (index_is_valid(i)) {
385  masks_[idx2mask[i].mask_num].indexes()[idx2mask[i].mask_dim] = nd++;
386  }
387  }
388  set_ndim_noclean(nd);
389  update_idx2mask();
390  }
391 
392  void update_idx2mask() const {
393  /*
394  dim_type N=0;
395  for (dim_type i=0; i < masks_.size(); ++i) {
396  N = std::max(N, std::max_element(masks_.indexes().begin(), masks_.indexes.end()));
397  }
398  idx2mask.resize(N);
399  */
400 
401  std::fill(idx2mask.begin(), idx2mask.end(), tensor_index_to_mask());
402  for (dim_type i=0; i < masks_.size(); ++i) {
403  for (dim_type j=0; j < masks_[i].indexes().size(); ++j) {
404  dim_type k = masks_[i].indexes()[j];
405  GMM_ASSERT3(k < idx2mask.size() && !idx2mask[k].is_valid(), "");
406  idx2mask[k].mask_num = i; idx2mask[k].mask_dim = j;
407  }
408  }
409  }
410  void assign_shape(const tensor_shape& other) {
411  masks_ = other.masks_;
412  idx2mask = other.idx2mask;
413  // update_idx2mask();
414  }
415  void set_ndim(dim_type n) {
416  clear();
417  idx2mask.resize(n); update_idx2mask();
418  }
419  void set_ndim_noclean(dim_type n) {idx2mask.resize(n);}
420 
421  tensor_shape() {}
422 
423  /* create an "empty" shape of dimensions nd */
424  explicit tensor_shape(dim_type nd) : idx2mask(nd,tensor_index_to_mask()) {
425  masks_.reserve(16);
426  }
427  explicit tensor_shape(const tensor_ranges& r) {
428  masks_.reserve(16);
429  set_full(r);
430  }
431  void set_full(const tensor_ranges& r) {
432  idx2mask.resize(r.size());
433  masks_.resize(r.size());
434  for (dim_type i=0; i < r.size(); ++i) masks_[i].set_full(i,r[i]);
435  update_idx2mask();
436  }
437 
438  void set_empty(const tensor_ranges& r) {
439  idx2mask.resize(r.size());
440  masks_.resize(r.size());
441  for (dim_type i=0; i < r.size(); ++i) masks_[i].set_empty(i,r[i]);
442  update_idx2mask();
443  }
444 
445 
446  /* fusion d'une autre forme */
447  void merge(const tensor_shape &ts2, bool and_op = true) {
448  /* quelques verifs de base */
449  GMM_ASSERT3(ts2.ndim() == ndim(), "");
450  if (ts2.ndim()==0) return; /* c'est un scalaire */
451  for (dim_type i = 0; i < ndim(); ++i)
452  if (index_is_valid(i) && ts2.index_is_valid(i))
453  GMM_ASSERT3(ts2.dim(i) == dim(i), "");
454 
455  tensor_mask_container new_mask;
456  dal::bit_vector mask_treated1; mask_treated1.sup(0,masks().size());
457  dal::bit_vector mask_treated2; mask_treated2.sup(0,ts2.masks().size());
458  std::vector<const tensor_mask*> lstA, lstB; lstA.reserve(10); lstB.reserve(10);
459  for (dim_type i = 0; i < ndim(); ++i) {
460  dim_type i1 = dim_type(index_to_mask_num(i));
461  dim_type i2 = dim_type(ts2.index_to_mask_num(i));
462  lstA.clear(); lstB.clear();
463  if (index_is_valid(i) && !mask_treated1[i1])
464  find_linked_masks(i1, *this, ts2, mask_treated1, mask_treated2,
465  lstA, lstB);
466  else if (ts2.index_is_valid(i) && !mask_treated2[i2])
467  find_linked_masks(i2, ts2, *this, mask_treated2, mask_treated1,
468  lstB, lstA);
469  else continue;
470  GMM_ASSERT3(lstA.size() || lstB.size(), "");
471  new_mask.push_back(tensor_mask(lstA,lstB,and_op));
472  }
473  masks_ = new_mask;
474  update_idx2mask();
475  check_empty_mask();
476  }
477 
478  void shift_dim_num_ge(dim_type dim_num, int shift) {
479  for (dim_type m = 0; m < masks().size(); ++m) {
480  masks()[m].shift_dim_num_ge(dim_num,shift);
481  }
482  }
483  /* the permutation vector might be greater than the current ndim,
484  in which case some indexes will be unused (when p[i] == dim_type(-1))
485  */
486  void permute(const std::vector<dim_type> p, bool revert=false) {
487  std::vector<dim_type> invp(ndim()); std::fill(invp.begin(), invp.end(), dim_type(-1));
488 
489  /* build the inverse permutation and check that this IS really a permuation */
490  for (dim_type i=0; i < p.size(); ++i) {
491  if (p[i] != dim_type(-1)) { assert(invp[p[i]] == dim_type(-1)); invp[p[i]] = i; }
492  }
493  for (dim_type i=0; i < invp.size(); ++i) assert(invp[i] != dim_type(-1));
494 
495  /* do the permutation (quite simple!) */
496  for (dim_type m=0; m < masks().size(); ++m) {
497  for (dim_type i=0; i < masks()[m].indexes().size(); ++i) {
498  if (!revert) {
499  masks()[m].indexes()[i] = invp[masks()[m].indexes()[i]];
500  } else {
501  masks()[m].indexes()[i] = p[masks()[m].indexes()[i]];
502  }
503  }
504  }
505  set_ndim_noclean(dim_type(p.size()));
506  update_idx2mask();
507  }
508 
509  /* forme d'une tranche (c'est la forme qu'on applique à un tenseur pour
510  en extraire la tranche) */
511  tensor_shape slice_shape(tensor_mask::Slice slice) const {
512  assert(slice.dim < ndim() && slice.i0 < dim(slice.dim));
513  tensor_shape ts(ndim());
514  ts.push_mask(tensor_mask(dim(slice.dim), slice));
515  ts.merge(*this); /* le masque peut se retrouver brutalement vidé si on a tranché au mauvais endroit! */
516  return ts;
517  }
518 
519  tensor_shape diag_shape(tensor_mask::Diagonal diag) const {
520  assert(diag.i1 != diag.i0 && diag.i0 < ndim() && diag.i1 < ndim());
521  assert(dim(diag.i0) == dim(diag.i1));
522  tensor_shape ts(ndim());
523  ts.push_mask(tensor_mask(dim(diag.i0), diag));
524  ts.merge(*this);
525  return ts;
526  }
527 
528  /*
529  void diag(index_type i0, index_type i1) {
530  assert(i0 < idx.size() && i1 < idx.size());
531  assert(idx[i0].n == idx[i1].n);
532  tensor_shape ts2 = *this;
533  ts2.masks.resize(1);
534  ts2.masks[0].set_diagonal(idx[i0].n, i0, i1);
535  ts2.idx[i0].mask_num = ts2.idx[i1].mask_num = 0;
536  ts2.idx[i0].mask_dim = 0; ts2.idx[i1].mask_dim = 1;
537  }
538  */
539  void print(std::ostream& o) const;
540  void print_() const { print(cerr); }
541  };
542 
543 
544  /* reference to a tensor:
545  - a shape
546  - a data pointer
547  - a set of strides
548  */
549  class tensor_ref : public tensor_shape {
550  std::vector< tensor_strides > strides_;
551  TDIter *pbase_; /* pointeur sur un pointeur qui designe les données
552  ça permet de changer la base pour toute une serie
553  de tensor_ref en un coup */
554 
555  stride_type base_shift_;
556 
557  void remove_mask(dim_type mdim) {
558  tensor_shape::remove_mask(mdim);
559  assert(strides_[mdim].size() == 0 ||
560  (strides_[mdim].size() == 1 && strides_[mdim][0] == 0)); /* sanity check.. */
561  strides_.erase(strides_.begin()+mdim);
562  }
563  public:
564  void swap(tensor_ref& tr) {
565  tensor_shape::swap(tr);
566  strides_.swap(tr.strides_);
567  std::swap(pbase_, tr.pbase_);
568  std::swap(base_shift_, tr.base_shift_);
569  }
570  const std::vector< tensor_strides >& strides() const { return strides_; }
571  std::vector< tensor_strides >& strides() { return strides_; }
572  TDIter base() const { return (pbase_ ? (*pbase_) : 0); }
573  TDIter *pbase() const { return pbase_; }
574  stride_type base_shift() const { return base_shift_; }
575  void set_base(TDIter &new_base) { pbase_ = &new_base; base_shift_ = 0; }
576 
577  void clear() { strides_.resize(0); pbase_ = 0; base_shift_ = 0; tensor_shape::clear(); }
578 
579 
580 
581  /* s'assure que le stride du premier indice est toujours bien égal à zéro */
582  void ensure_0_stride() {
583  for (index_type i=0; i < strides_.size(); ++i) {
584  if (strides_[i].size() >= 1 && strides_[i][0] != 0) {
585  stride_type s = strides_[i][0];
586  base_shift_ += s;
587  for (index_type j=0; j < strides_[i].size(); ++j) strides_[i][j] -= s;
588  }
589  }
590  }
591 
592  /* constructeur à partir d'une forme : ATTENTION ce constructeur n'alloue pas la
593  mémoire nécessaire pour les données !! */
594  explicit tensor_ref(const tensor_shape& ts) : tensor_shape(ts), pbase_(0), base_shift_(0) {
595  strides_.reserve(16);
596  init_strides();
597  }
598  explicit tensor_ref(const tensor_ranges& r, TDIter *pbase__=0)
599  : tensor_shape(r), pbase_(pbase__), base_shift_(0) {
600  strides_.reserve(16);
601  init_strides();
602  }
603  void init_strides() {
604  strides_.resize(masks().size());
605  stride_type s = 1;
606  for (dim_type i = 0; i < strides_.size(); ++i) {
607  index_type n = mask(i).card();
608  strides_[i].resize(n);
609  for (index_type j=0;j<n;++j) strides_[i][j] = j*s;
610  s *= n;
611  }
612  }
613  tensor_ref() : pbase_(0), base_shift_(0) { strides_.reserve(16); }
614 
615  void set_sub_tensor(const tensor_ref& tr, const tensor_shape& sub);
616 
617  /* constructeur à partir d'un sous-tenseur à partir d'un tenseur et d'une forme
618  hypothese: la forme 'sub' doit être un sous-ensemble de la forme du tenseur
619  */
620  explicit tensor_ref(const tensor_ref& tr, const tensor_shape& sub) {
621  set_sub_tensor(tr,sub);
622  }
623 
624  /* slices a tensor_ref, at dimension 'dim', position 'islice'
625  ... not straightforward for sparse tensors !
626  */
627  explicit tensor_ref(const tensor_ref& tr, tensor_mask::Slice slice);
628 
629  /* create a diagonal of another tensor */
630  explicit tensor_ref(const tensor_ref& tr, tensor_mask::Diagonal diag) {
631  set_sub_tensor(tr, tr.diag_shape(diag));
632  ensure_0_stride();
633  }
634 
635  void print(std::ostream& o) const;
636 
637  void print_() const { print(cerr); }
638  };
639 
640  std::ostream& operator<<(std::ostream& o, const tensor_mask& m);
641  std::ostream& operator<<(std::ostream& o, const tensor_shape& ts);
642  std::ostream& operator<<(std::ostream& o, const tensor_ref& tr);
643 
644  /* minimalistic data for iterations */
645  struct packed_range {
646  const stride_type *pinc;
647  const stride_type *begin, *end;
648  index_type n;
649  /* index_type cnt;*/
650  };
651  /* additionnal data */
652  struct packed_range_info {
653  index_type range;
654  dim_type original_masknum;
655  dim_type n;
656  std::vector<stride_type> mask_pos; /* pour l'iteration avec maj de la valeur des indices */
657  bool operator<(const packed_range_info& pi) const {
658  if (n < pi.n) return true;
659  else return false;
660  }
661  stride_type mean_increm; /* valeur moyenne de l'increment (utilisé pour le tri) */
662  tensor_strides inc; /* not strides but increments to the next index value,
663  with inc[range-1] == -sum(inc[0..range-2]) (automatic rewinding!)
664  of course, stride_type MUST be signed
665  */
666  std::bitset<32> have_regular_strides;
667  };
668 
669  /* the big one */
670  class multi_tensor_iterator {
671  index_type N; /* number of simultaneous tensors */
672  std::vector<packed_range> pr;
673  std::vector<packed_range_info> pri;
674 
675  std::vector<index_type> bloc_rank;
676  std::vector<index_type> bloc_nelt;
677 
678  std::vector<TDIter> it;
679  std::vector<TDIter*> pit0;
680  tensor_strides itbase;
681  struct index_value_data {
682  dim_type cnt_num;
683  const stride_type **ppinc; /* pointe vers pr[cnt_num].pinc, initialisé par rewind()
684  et pas avant (à cause de pbs lors de la copie de multi_tensor_iterator sinon)
685  permet de déduire la valeur du compteur: (*ppinc - pincbase) (à diviser par nn=(pri[cnt_num].n-N))
686  */
687  const stride_type *pincbase;
688  const stride_type *pposbase; /* pointe dans pri[cnt_num].mask_pos, retrouve la position dans le masque en fonction
689  du compteur déduit ci-dessus et des champs div et mod ci-dessous */
690  index_type div, mod, nn;
691  stride_type pos_; /* stores the position when the indexe is not part of the pri array
692  (hence the index only has 1 value, and ppos == &pos_, and pcnt = &zero */
693  };
694  std::vector<index_value_data> idxval;
695  std::vector<stride_type> vectorized_strides_; /* if the tensor have regular strides, the mti might be vectorizable */
696  index_type vectorized_size_; /* the size of each vectorizable chunk */
697  index_type vectorized_pr_dim; /* all pr[i], i >= vectorized_pr_dim, can be accessed via vectorized_strides */
698  public:
699  void clear() {
700  N = 0; pr.clear(); pri.clear(); bloc_rank.clear(); bloc_nelt.clear();
701  it.clear(); pit0.clear(); itbase.clear(); idxval.clear();
702  }
703  void swap(multi_tensor_iterator& m) {
704  std::swap(N,m.N); pr.swap(m.pr); pri.swap(m.pri);
705  bloc_rank.swap(m.bloc_rank); bloc_nelt.swap(m.bloc_nelt);
706  it.swap(m.it); pit0.swap(m.pit0); itbase.swap(m.itbase);
707  idxval.swap(m.idxval);
708  }
709  void rewind() {
710  for (dim_type i=0; i < pr.size(); ++i) {
711  pr[i].pinc = pr[i].begin = &pri[i].inc[0]; pr[i].end = pr[i].begin+pri[i].inc.size();
712  }
713  for (dim_type n=0; n < N; ++n) it[n] = *(pit0[n]) + itbase[n];
714  for (dim_type i=0; i < idxval.size(); ++i) {
715  if (idxval[i].cnt_num != dim_type(-1)) {
716  idxval[i].ppinc = &pr[idxval[i].cnt_num].pinc;
717  idxval[i].pincbase = &pri[idxval[i].cnt_num].inc[0];
718  idxval[i].pposbase = &pri[idxval[i].cnt_num].mask_pos[0];
719  idxval[i].nn = (N-pri[idxval[i].cnt_num].n);
720  } else {
721  static const stride_type *null=0;
722  idxval[i].ppinc = &null;
723  idxval[i].pincbase = 0;
724  idxval[i].pposbase = &idxval[i].pos_;
725  idxval[i].nn = 1;
726  }
727  }
728  }
729  dim_type ndim() const { return dim_type(idxval.size()); }
730  /* get back the value of an index from then current iterator position */
731  index_type index(dim_type ii) {
732  index_value_data& iv = idxval[ii];
733  index_type cnt = index_type((*iv.ppinc - iv.pincbase)/iv.nn);
734  return ((iv.pposbase[cnt]) % iv.mod)/ iv.div;
735  }
736  index_type vectorized_size() const { return vectorized_size_; }
737  const std::vector<stride_type>& vectorized_strides() const { return vectorized_strides_; }
738  bool next(unsigned i_stop = unsigned(-1), unsigned i0_ = unsigned(-2)) {//=pr.size()-1) {
739  unsigned i0 = unsigned(i0_ == unsigned(-2) ? pr.size()-1 : i0_);
740  while (i0 != i_stop) {
741  for (unsigned n = pr[i0].n; n < N; ++n) {
742  // index_type pos = pr[i0].cnt * (N-pri[i0].n) + (n - pri[i0].n);
743  it[n] += *pr[i0].pinc; pr[i0].pinc++;
744  }
745  if (pr[i0].pinc != pr[i0].end) {
746  return true;
747  } else {
748  pr[i0].pinc = pr[i0].begin; i0--;
749  }
750  }
751  return false;
752  }
753  bool vnext() { return next(unsigned(-1), vectorized_pr_dim); }
754  bool bnext(dim_type b) { return next(bloc_rank[b]-1, bloc_rank[b+1]-1); }
755  bool bnext_useful(dim_type b) { return bloc_rank[b] != bloc_rank[b+1]; }
756  /* version speciale pour itérer sur des tenseurs de même dimensions
757  (doit être un poil plus rapide) */
758  bool qnext1() {
759  if (pr.size() == 0) return false;
760  std::vector<packed_range>::reverse_iterator p_ = pr.rbegin();
761  while (p_!=pr.rend()) {
762  it[0] += *(p_->pinc++);
763  if (p_->pinc != p_->end) {
764  return true;
765  } else {
766  p_->pinc = p_->begin; p_++;
767  }
768  }
769  return false;
770  }
771 
772  bool qnext2() {
773  if (pr.size() == 0) return false;
774  std::vector<packed_range>::reverse_iterator p_ = pr.rbegin();
775  while (p_!=pr.rend()) {
776  it[0] += *(p_->pinc++);
777  it[1] += *(p_->pinc++);
778  if (p_->pinc != p_->end) {
779  return true;
780  } else {
781  p_->pinc = p_->begin; p_++;
782  }
783  }
784  return false;
785  }
786 
787  scalar_type& p(dim_type n) { return *it[n]; }
788 
789  multi_tensor_iterator() {}
790  multi_tensor_iterator(std::vector<tensor_ref> trtab, bool with_index_values) {
791  init(trtab, with_index_values);
792  }
793  void assign(std::vector<tensor_ref> trtab, bool with_index_values) {
794  multi_tensor_iterator m(trtab, with_index_values);
795  swap(m);
796  }
797  multi_tensor_iterator(const tensor_ref& tr0, bool with_index_values) {
798  std::vector<tensor_ref> trtab(1); trtab[0] = tr0;
799  init(trtab, with_index_values);
800  }
801  void assign(const tensor_ref& tr0, bool with_index_values) {
802  multi_tensor_iterator m(tr0, with_index_values);
803  swap(m);
804  }
805  multi_tensor_iterator(const tensor_ref& tr0,
806  const tensor_ref& tr1, bool with_index_values) {
807  std::vector<tensor_ref> trtab(2); trtab[0] = tr0; trtab[1] = tr1;
808  init(trtab, with_index_values);
809  }
810  void assign(const tensor_ref& tr0, const tensor_ref& tr1, bool with_index_values) {
811  multi_tensor_iterator m(tr0, tr1, with_index_values);
812  swap(m);
813  }
814  multi_tensor_iterator(const tensor_ref& tr0,
815  const tensor_ref& tr1,
816  const tensor_ref& tr2, bool with_index_values) {
817  std::vector<tensor_ref> trtab(3); trtab[0] = tr0; trtab[1] = tr1; trtab[2] = tr2;
818  init(trtab, with_index_values);
819  }
820  void assign(const tensor_ref& tr0, const tensor_ref& tr1, const tensor_ref& tr2, bool with_index_values) {
821  multi_tensor_iterator m(tr0, tr1, tr2, with_index_values);
822  swap(m);
823  }
824  void init(std::vector<tensor_ref> trtab, bool with_index_values);
825  void print() const;
826  };
827 
828 
829  /* handles a tree of reductions
830  The tree is used if more than two tensors are reduced, i.e.
831  z(:,:)=t(:,i).u(i,j).v(j,:)
832  in that case, the reduction against j can be performed on u(:,j).v(j,:) = w(:,:)
833  and then, z(:,:) = t(:,i).w(i,:)
834  */
835  struct tensor_reduction {
836  struct tref_or_reduction {
837  tensor_ref tr_;
838  std::shared_ptr<tensor_reduction> reduction;
839  tensor_ref &tr() { return tr_; }
840  const tensor_ref &tr() const { return tr_; }
841  explicit tref_or_reduction(const tensor_ref &tr__, const std::string& s)
842  : tr_(tr__), ridx(s) {}
843  explicit tref_or_reduction(const std::shared_ptr<tensor_reduction> &p, const std::string& s)
844  : reduction(p), ridx(s) {
845  reduction->result(tr_);
846  }
847  bool is_reduction() const { return reduction != 0; }
848  void swap(tref_or_reduction &other) { tr_.swap(other.tr_); std::swap(reduction, other.reduction); }
849  std::string ridx; /* reduction indexes, no index can appear
850  twice in the same tensor */
851  std::vector<dim_type> gdim; /* mapping to the global virtual
852  tensor whose range is the
853  union of the ranges of each
854  reduced tensor */
855  std::vector<dim_type> rdim; /* mapping to the dimensions of the
856  reduced tensor ( = dim_type(-1) for
857  dimensions i s.t. ridx[i] != ' ' ) */
858 
859  };
860  tensor_ranges reduced_range;
861  std::string reduction_chars; /* list of all indexes used for reduction */
862  tensor_ref trres;
863  typedef std::vector<tref_or_reduction>::iterator trtab_iterator;
864  std::vector<tref_or_reduction> trtab;
865  multi_tensor_iterator mti;
866  std::vector<scalar_type> out_data; /* optional storage of output */
867  TDIter pout_data;
868  public:
869  tensor_reduction() { clear(); }
870  virtual ~tensor_reduction() { clear(); }
871  void clear();
872 
873  /* renvoie les formes diagonalisées
874  pour bien faire, il faudrait que cette fonction prenne en argument
875  le required_shape de l'objet ATN_reducted_tensor, et fasse le merge
876  avec ce qu'elle renvoie... non trivial
877  */
878  static void diag_shape(tensor_shape& ts, const std::string& s) {
879  for (index_type i=0; i < s.length(); ++i) {
880  size_type pos = s.find(s[i]);
881  if (s[i] != ' ' && pos != i) { // ce n'est pas de l'indice => reduction sur la diagonale
882  ts = ts.diag_shape(tensor_mask::Diagonal(dim_type(pos),dim_type(i)));
883  }
884  }
885  }
886 
887  void insert(const tensor_ref& tr_, const std::string& s);
888  void prepare(const tensor_ref* tr_out = NULL);
889  void do_reduction();
890  void result(tensor_ref& res) const {
891  res=trres;
892  res.remove_unused_dimensions();
893  }
894  private:
895  void insert(const tref_or_reduction& tr_, const std::string& s);
896  void update_reduction_chars();
897  void pre_prepare();
898  void make_sub_reductions();
899  size_type find_best_sub_reduction(dal::bit_vector &best_lst, std::string &best_idxset);
900  };
901 
902 } /* namespace bgeot */
903 
904 #endif
std::ostream & operator<<(std::ostream &o, const convex_structure &cv)
Print the details of the convex structure cvs to the output stream o.
Provide a dynamic bit container.
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:49
Definition of basic exceptions.
void clear(L &l)
clear (fill with zeros) a vector or matrix.
Definition: gmm_blas.h:59
gmm::uint16_type short_type
used as the common short type integer in the library
Definition: bgeot_config.h:79
Basic Geometric Tools.
defines and typedefs for namespace bgeot