GetFEM++  5.3
getfem_generic_assembly_compile_and_exec.cc
1 /*===========================================================================
2 
3  Copyright (C) 2013-2018 Yves Renard
4 
5  This file is a part of GetFEM++
6 
7  GetFEM++ is free software; you can redistribute it and/or modify it
8  under the terms of the GNU Lesser General Public License as published
9  by the Free Software Foundation; either version 3 of the License, or
10  (at your option) any later version along with the GCC Runtime Library
11  Exception either version 3.1 or (at your option) any later version.
12  This program is distributed in the hope that it will be useful, but
13  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15  License and GCC Runtime Library Exception for more details.
16  You should have received a copy of the GNU Lesser General Public License
17  along with this program; if not, write to the Free Software Foundation,
18  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 
20 ===========================================================================*/
21 
25 #include "getfem/getfem_generic_assembly_compile_and_exec.h"
26 #include "getfem/getfem_generic_assembly_functions_and_operators.h"
27 
28 // #define GA_USES_BLAS // not so interesting, at least for debian blas
29 
30 // #define GA_DEBUG_INFO(a) { cout << a << endl; }
31 #define GA_DEBUG_INFO(a)
32 
33 
34 
35 namespace getfem {
36 
37 
38  bool operator <(const gauss_pt_corresp &gpc1,
39  const gauss_pt_corresp &gpc2) {
40  if (gpc1.pai != gpc2.pai)
41  return (gpc1.pai < gpc2.pai );
42  if (gpc1.nodes.size() != gpc2.nodes.size())
43  return (gpc1.nodes.size() < gpc2.nodes.size());
44  for (size_type i = 0; i < gpc1.nodes.size(); ++i)
45  if (gpc1.nodes[i] != gpc2.nodes[i])
46  return (gpc1.nodes[i] < gpc2.nodes[i]);
47  if (gpc1.pgt1 != gpc2.pgt1)
48  return (gpc1.pgt1 < gpc2.pgt1);
49  if (gpc1.pgt2 != gpc2.pgt2)
50  return (gpc1.pgt2 < gpc2.pgt2);
51  return false;
52  }
53 
54  //=========================================================================
55  // Instructions for compilation: basic optimized operations on tensors
56  //=========================================================================
57 
58  struct ga_instruction_extract_local_im_data : public ga_instruction {
59  base_tensor &t;
60  const im_data &imd;
61  papprox_integration &pai;
62  const base_vector &U;
63  const fem_interpolation_context &ctx;
64  size_type qdim, cv_old;
65  virtual int exec() {
66  GA_DEBUG_INFO("Instruction: extract local im data");
67  size_type cv = ctx.convex_num();
68  if (cv != cv_old) {
69  cv_old = cv;
70  GMM_ASSERT1(imd.linked_mesh_im().int_method_of_element(cv)
71  ->approx_method() == pai, "Im data have to be used only "
72  "on their original integration method.");
73  }
74  size_type ipt = imd.filtered_index_of_point(cv, ctx.ii());
75  GMM_ASSERT1(ipt != size_type(-1),
76  "Im data with no data on the current integration point.");
77  auto it = U.begin()+ipt*qdim;
78  std::copy(it, it+qdim, t.begin());
79  return 0;
80  }
81  ga_instruction_extract_local_im_data
82  (base_tensor &t_, const im_data &imd_, const base_vector &U_,
83  papprox_integration &pai_, const fem_interpolation_context &ctx_,
84  size_type qdim_)
85  : t(t_), imd(imd_), pai(pai_), U(U_), ctx(ctx_), qdim(qdim_),
86  cv_old(-1)
87  {}
88  };
89 
90  struct ga_instruction_slice_local_dofs : public ga_instruction {
91  const mesh_fem &mf;
92  const base_vector &U;
93  const fem_interpolation_context &ctx;
94  base_vector &coeff;
95  size_type qmult1, qmult2;
96  virtual int exec() {
97  GA_DEBUG_INFO("Instruction: Slice local dofs");
98  GMM_ASSERT1(qmult1 != 0 && qmult2 != 0, "Internal error");
99  slice_vector_on_basic_dof_of_element(mf, U, ctx.convex_num(),
100  coeff, qmult1, qmult2);
101  return 0;
102  }
103  ga_instruction_slice_local_dofs(const mesh_fem &mf_, const base_vector &U_,
104  const fem_interpolation_context &ctx_,
105  base_vector &coeff_,
106  size_type qmult1_, size_type qmult2_)
107  : mf(mf_), U(U_), ctx(ctx_), coeff(coeff_),
108  qmult1(qmult1_), qmult2(qmult2_) {}
109  };
110 
111  struct ga_instruction_update_pfp : public ga_instruction {
112  const mesh_fem &mf;
113  const fem_interpolation_context &ctx;
114  fem_precomp_pool &fp_pool;
115  pfem_precomp &pfp;
116 
117  virtual int exec() {
118  GA_DEBUG_INFO("Instruction: Pfp update");
119  if (ctx.have_pgp()) {
120  size_type cv = ctx.is_convex_num_valid()
121  ? ctx.convex_num() : mf.convex_index().first_true();
122  pfem pf = mf.fem_of_element(cv);
123  if (!pfp || pf != pfp->get_pfem() ||
124  ctx.pgp()->get_ppoint_tab() != pfp->get_ppoint_tab()) {
125  pfp = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
126  }
127  } else {
128  pfp = 0;
129  }
130  return 0;
131  }
132 
133  ga_instruction_update_pfp(const mesh_fem &mf_, pfem_precomp &pfp_,
134  const fem_interpolation_context &ctx_,
135  fem_precomp_pool &fp_pool_)
136  : mf(mf_), ctx(ctx_), fp_pool(fp_pool_), pfp(pfp_) {}
137  };
138 
139  struct ga_instruction_first_ind_tensor : public ga_instruction {
140  base_tensor &t;
141  const fem_interpolation_context &ctx;
142  size_type qdim;
143  const mesh_fem *mfn, **mfg;
144 
145  virtual int exec() {
146  GA_DEBUG_INFO("Instruction: adapt first index of tensor");
147  const mesh_fem &mf = *(mfg ? *mfg : mfn);
148  GA_DEBUG_ASSERT(mfg ? *mfg : mfn, "Internal error");
149  size_type cv_1 = ctx.is_convex_num_valid()
150  ? ctx.convex_num() : mf.convex_index().first_true();
151  pfem pf = mf.fem_of_element(cv_1);
152  GMM_ASSERT1(pf, "An element without finite element method defined");
153  size_type Qmult = qdim / pf->target_dim();
154  size_type s = pf->nb_dof(cv_1) * Qmult;
155  if (t.sizes()[0] != s)
156  { bgeot::multi_index mi = t.sizes(); mi[0] = s; t.adjust_sizes(mi); }
157  return 0;
158  }
159 
160  ga_instruction_first_ind_tensor(base_tensor &t_,
161  const fem_interpolation_context &ctx_,
162  size_type qdim_, const mesh_fem *mfn_,
163  const mesh_fem **mfg_)
164  : t(t_), ctx(ctx_), qdim(qdim_), mfn(mfn_), mfg(mfg_) {}
165  };
166 
167  struct ga_instruction_second_ind_tensor
168  : public ga_instruction_first_ind_tensor {
169 
170  virtual int exec() {
171  GA_DEBUG_INFO("Instruction: adapt second index of tensor");
172  const mesh_fem &mf = *(mfg ? *mfg : mfn);
173  size_type cv_1 = ctx.is_convex_num_valid()
174  ? ctx.convex_num() : mf.convex_index().first_true();
175  pfem pf = mf.fem_of_element(cv_1);
176  GMM_ASSERT1(pf, "An element without finite element methode defined");
177  size_type Qmult = qdim / pf->target_dim();
178  size_type s = pf->nb_dof(cv_1) * Qmult;
179  if (t.sizes()[1] != s)
180  { bgeot::multi_index mi = t.sizes(); mi[1] = s; t.adjust_sizes(mi); }
181  return 0;
182  }
183 
184  ga_instruction_second_ind_tensor(base_tensor &t_,
185  fem_interpolation_context &ctx_,
186  size_type qdim_, const mesh_fem *mfn_,
187  const mesh_fem **mfg_)
188  : ga_instruction_first_ind_tensor(t_, ctx_, qdim_, mfn_, mfg_) {}
189 
190  };
191 
192  struct ga_instruction_two_first_ind_tensor : public ga_instruction {
193  base_tensor &t;
194  const fem_interpolation_context &ctx1, &ctx2;
195  size_type qdim1;
196  const mesh_fem *mfn1, **mfg1;
197  size_type qdim2;
198  const mesh_fem *mfn2, **mfg2;
199 
200  virtual int exec() {
201  GA_DEBUG_INFO("Instruction: adapt two first indices of tensor");
202  const mesh_fem &mf1 = *(mfg1 ? *mfg1 : mfn1);
203  const mesh_fem &mf2 = *(mfg2 ? *mfg2 : mfn2);
204  size_type cv_1 = ctx1.is_convex_num_valid()
205  ? ctx1.convex_num() : mf1.convex_index().first_true();
206  size_type cv_2 = ctx2.is_convex_num_valid()
207  ? ctx2.convex_num() : mf2.convex_index().first_true();
208  pfem pf1 = mf1.fem_of_element(cv_1);
209  GMM_ASSERT1(pf1, "An element without finite element method defined");
210  pfem pf2 = mf2.fem_of_element(cv_2);
211  GMM_ASSERT1(pf2, "An element without finite element method defined");
212  size_type Qmult1 = qdim1 / pf1->target_dim();
213  size_type s1 = pf1->nb_dof(cv_1) * Qmult1;
214  size_type Qmult2 = qdim2 / pf2->target_dim();
215  size_type s2 = pf2->nb_dof(cv_2) * Qmult2;
216  if (t.sizes()[0] != s1 || t.sizes()[1] != s2) {
217  bgeot::multi_index mi = t.sizes();
218  mi[0] = s1; mi[1] = s2;
219  t.adjust_sizes(mi);
220  }
221  return 0;
222  }
223 
224  ga_instruction_two_first_ind_tensor
225  (base_tensor &t_, const fem_interpolation_context &ctx1_,
226  const fem_interpolation_context &ctx2_,
227  size_type qdim1_, const mesh_fem *mfn1_, const mesh_fem **mfg1_,
228  size_type qdim2_, const mesh_fem *mfn2_, const mesh_fem **mfg2_)
229  : t(t_), ctx1(ctx1_), ctx2(ctx2_), qdim1(qdim1_), mfn1(mfn1_),
230  mfg1(mfg1_), qdim2(qdim2_), mfn2(mfn2_), mfg2(mfg2_) {}
231  };
232 
233 
234  struct ga_instruction_X_component : public ga_instruction {
235  scalar_type &t;
236  const fem_interpolation_context &ctx;
237  size_type n;
238 
239  virtual int exec() {
240  GA_DEBUG_INFO("Instruction: X component");
241  t = ctx.xreal()[n];
242  return 0;
243  }
244 
245  ga_instruction_X_component
246  (scalar_type &t_, const fem_interpolation_context &ctx_, size_type n_)
247  : t(t_), ctx(ctx_), n(n_) {}
248  };
249 
250  struct ga_instruction_X : public ga_instruction {
251  base_tensor &t;
252  const fem_interpolation_context &ctx;
253 
254  virtual int exec() {
255  GA_DEBUG_INFO("Instruction: X");
256  GA_DEBUG_ASSERT(t.size() == ctx.xreal().size(), "dimensions mismatch");
257  gmm::copy(ctx.xreal(), t.as_vector());
258  return 0;
259  }
260 
261  ga_instruction_X(base_tensor &t_, const fem_interpolation_context &ctx_)
262  : t(t_), ctx(ctx_) {}
263  };
264 
265  struct ga_instruction_copy_small_vect : public ga_instruction {
266  base_tensor &t;
267  const base_small_vector &vec;
268 
269  virtual int exec() {
270  GA_DEBUG_INFO("Instruction: copy small vector");
271  GMM_ASSERT1(t.size() == vec.size(), "Invalid vector size.");
272  gmm::copy(vec, t.as_vector());
273  return 0;
274  }
275  ga_instruction_copy_small_vect(base_tensor &t_,
276  const base_small_vector &vec_)
277  : t(t_), vec(vec_) {}
278  };
279 
280  struct ga_instruction_copy_Normal : public ga_instruction_copy_small_vect {
281 
282  virtual int exec() {
283  GA_DEBUG_INFO("Instruction: unit normal vector");
284  GMM_ASSERT1(t.size() == vec.size(), "Invalid outward unit normal "
285  "vector. Possible reasons: not on boundary or "
286  "transformation failed.");
287  gmm::copy(vec, t.as_vector());
288  return 0;
289  }
290  ga_instruction_copy_Normal(base_tensor &t_,
291  const base_small_vector &Normal_)
292  : ga_instruction_copy_small_vect(t_, Normal_) {}
293  };
294 
295  struct ga_instruction_level_set_normal_vector : public ga_instruction {
296  base_tensor &t;
297  const mesh_im_level_set *mimls;
298  const fem_interpolation_context &ctx;
299  base_small_vector vec;
300 
301  virtual int exec() {
302  GA_DEBUG_INFO("Instruction: unit normal vector to a level-set");
303  mimls->compute_normal_vector(ctx, vec);
304  GMM_ASSERT1(t.size() == vec.size(), "Invalid outward unit normal "
305  "vector. Possible reasons: not on boundary or "
306  "transformation failed.");
307  gmm::copy(vec, t.as_vector());
308  return 0;
309  }
310  ga_instruction_level_set_normal_vector
311  (base_tensor &t_, const mesh_im_level_set *mimls_,
312  const fem_interpolation_context &ctx_)
313  : t(t_), mimls(mimls_), ctx(ctx_), vec(t.size()) {}
314  };
315 
316  struct ga_instruction_element_size : public ga_instruction {
317  base_tensor &t;
318  scalar_type &es;
319 
320  virtual int exec() {
321  GA_DEBUG_INFO("Instruction: element_size");
322  GMM_ASSERT1(t.size() == 1, "Invalid element size.");
323  t[0] = es;
324  return 0;
325  }
326  ga_instruction_element_size(base_tensor &t_, scalar_type &es_)
327  : t(t_), es(es_) {}
328  };
329 
330  struct ga_instruction_element_K : public ga_instruction {
331  base_tensor &t;
332  const fem_interpolation_context &ctx;
333 
334  virtual int exec() {
335  GA_DEBUG_INFO("Instruction: element_K");
336  GMM_ASSERT1(t.size() == (ctx.K()).size(), "Invalid tensor size.");
337  gmm::copy(ctx.K().as_vector(), t.as_vector());
338  return 0;
339  }
340  ga_instruction_element_K(base_tensor &t_,
341  const fem_interpolation_context &ct)
342  : t(t_), ctx(ct) {}
343  };
344 
345  struct ga_instruction_element_B : public ga_instruction {
346  base_tensor &t;
347  const fem_interpolation_context &ctx;
348 
349  virtual int exec() {
350  GA_DEBUG_INFO("Instruction: element_B");
351  GMM_ASSERT1(t.size() == (ctx.B()).size(), "Invalid tensor size.");
352  gmm::copy(ctx.B().as_vector(), t.as_vector());
353  return 0;
354  }
355  ga_instruction_element_B(base_tensor &t_,
356  const fem_interpolation_context &ct)
357  : t(t_), ctx(ct) {}
358  };
359 
360  struct ga_instruction_val_base : public ga_instruction {
361  base_tensor &t;
362  fem_interpolation_context &ctx;
363  const mesh_fem &mf;
364  const pfem_precomp &pfp;
365 
366  virtual int exec() { // --> t(ndof,target_dim)
367  GA_DEBUG_INFO("Instruction: compute value of base functions");
368  // if (ctx.have_pgp()) ctx.set_pfp(pfp);
369  // else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
370  // GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
371  // ctx.base_value(t);
372  if (ctx.have_pgp()) ctx.pfp_base_value(t, pfp);
373  else {
374  ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
375  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
376  ctx.base_value(t);
377  }
378  return 0;
379  }
380 
381  ga_instruction_val_base(base_tensor &tt, fem_interpolation_context &ct,
382  const mesh_fem &mf_, const pfem_precomp &pfp_)
383  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
384  };
385 
386  struct ga_instruction_xfem_plus_val_base : public ga_instruction {
387  base_tensor &t;
388  fem_interpolation_context &ctx;
389  const mesh_fem &mf;
390  pfem_precomp &pfp;
391 
392  virtual int exec() { // --> t(ndof,target_dim)
393  GA_DEBUG_INFO("Instruction: compute value of base functions");
394  if (ctx.have_pgp()) ctx.set_pfp(pfp);
395  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
396  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
397  int old_xfem_side = ctx.xfem_side();
398  ctx.set_xfem_side(1);
399  ctx.base_value(t);
400  ctx.set_xfem_side(old_xfem_side);
401  return 0;
402  }
403 
404  ga_instruction_xfem_plus_val_base(base_tensor &tt,
405  fem_interpolation_context &ct,
406  const mesh_fem &mf_, pfem_precomp &pfp_)
407  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
408  };
409 
410  struct ga_instruction_xfem_minus_val_base : public ga_instruction {
411  base_tensor &t;
412  fem_interpolation_context &ctx;
413  const mesh_fem &mf;
414  pfem_precomp &pfp;
415 
416  virtual int exec() { // --> t(ndof,target_dim)
417  GA_DEBUG_INFO("Instruction: compute value of base functions");
418  if (ctx.have_pgp()) ctx.set_pfp(pfp);
419  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
420  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
421  int old_xfem_side = ctx.xfem_side();
422  ctx.set_xfem_side(-1);
423  ctx.base_value(t);
424  ctx.set_xfem_side(old_xfem_side);
425  return 0;
426  }
427 
428  ga_instruction_xfem_minus_val_base
429  (base_tensor &tt, fem_interpolation_context &ct,
430  const mesh_fem &mf_, pfem_precomp &pfp_)
431  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
432  };
433 
434  struct ga_instruction_grad_base : public ga_instruction_val_base {
435 
436  virtual int exec() { // --> t(ndof,target_dim,N)
437  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
438  // if (ctx.have_pgp()) ctx.set_pfp(pfp);
439  // else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
440  // GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
441  // ctx.grad_base_value(t);
442  if (ctx.have_pgp()) ctx.pfp_grad_base_value(t, pfp);
443  else {
444  ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
445  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
446  ctx.grad_base_value(t);
447  }
448  return 0;
449  }
450 
451  ga_instruction_grad_base(base_tensor &tt, fem_interpolation_context &ct,
452  const mesh_fem &mf_, pfem_precomp &pfp_)
453  : ga_instruction_val_base(tt, ct, mf_, pfp_)
454  {}
455  };
456 
457  struct ga_instruction_xfem_plus_grad_base : public ga_instruction_val_base {
458 
459  virtual int exec() { // --> t(ndof,target_dim,N)
460  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
461  if (ctx.have_pgp()) ctx.set_pfp(pfp);
462  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
463  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
464  int old_xfem_side = ctx.xfem_side();
465  ctx.set_xfem_side(1);
466  ctx.grad_base_value(t);
467  ctx.set_xfem_side(old_xfem_side);
468  return 0;
469  }
470 
471  ga_instruction_xfem_plus_grad_base
472  (base_tensor &tt, fem_interpolation_context &ct,
473  const mesh_fem &mf_, pfem_precomp &pfp_)
474  : ga_instruction_val_base(tt, ct, mf_, pfp_)
475  {}
476  };
477 
478  struct ga_instruction_xfem_minus_grad_base : public ga_instruction_val_base {
479 
480  virtual int exec() { // --> t(ndof,target_dim,N)
481  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
482  if (ctx.have_pgp()) ctx.set_pfp(pfp);
483  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
484  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
485  int old_xfem_side = ctx.xfem_side();
486  ctx.set_xfem_side(-1);
487  ctx.grad_base_value(t);
488  ctx.set_xfem_side(old_xfem_side);
489  return 0;
490  }
491 
492  ga_instruction_xfem_minus_grad_base
493  (base_tensor &tt, fem_interpolation_context &ct,
494  const mesh_fem &mf_, pfem_precomp &pfp_)
495  : ga_instruction_val_base(tt, ct, mf_, pfp_)
496  {}
497  };
498 
499 
500  struct ga_instruction_hess_base : public ga_instruction_val_base {
501 
502  virtual int exec() { // --> t(ndof,target_dim,N*N)
503  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
504  if (ctx.have_pgp()) ctx.set_pfp(pfp);
505  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
506  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
507  ctx.hess_base_value(t);
508  return 0;
509  }
510 
511  ga_instruction_hess_base(base_tensor &tt, fem_interpolation_context &ct,
512  const mesh_fem &mf_, pfem_precomp &pfp_)
513  : ga_instruction_val_base(tt, ct, mf_, pfp_)
514  {}
515  };
516 
517  struct ga_instruction_xfem_plus_hess_base : public ga_instruction_val_base {
518 
519  virtual int exec() { // --> t(ndof,target_dim,N*N)
520  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
521  if (ctx.have_pgp()) ctx.set_pfp(pfp);
522  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
523  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
524  int old_xfem_side = ctx.xfem_side();
525  ctx.set_xfem_side(1);
526  ctx.hess_base_value(t);
527  ctx.set_xfem_side(old_xfem_side);
528  return 0;
529  }
530 
531  ga_instruction_xfem_plus_hess_base
532  (base_tensor &tt, fem_interpolation_context &ct,
533  const mesh_fem &mf_, pfem_precomp &pfp_)
534  : ga_instruction_val_base(tt, ct, mf_, pfp_)
535  {}
536  };
537 
538  struct ga_instruction_xfem_minus_hess_base : public ga_instruction_val_base {
539 
540  virtual int exec() { // --> t(ndof,target_dim,N*N)
541  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
542  if (ctx.have_pgp()) ctx.set_pfp(pfp);
543  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
544  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
545  int old_xfem_side = ctx.xfem_side();
546  ctx.set_xfem_side(-1);
547  ctx.hess_base_value(t);
548  ctx.set_xfem_side(old_xfem_side);
549  return 0;
550  }
551 
552  ga_instruction_xfem_minus_hess_base
553  (base_tensor &tt, fem_interpolation_context &ct,
554  const mesh_fem &mf_, pfem_precomp &pfp_)
555  : ga_instruction_val_base(tt, ct, mf_, pfp_)
556  {}
557  };
558 
559  struct ga_instruction_val : public ga_instruction {
560  scalar_type &a;
561  base_tensor &t;
562  const base_tensor &Z;
563  const base_vector &coeff;
564  size_type qdim;
565  // Z(ndof,target_dim), coeff(Qmult,ndof) --> t(target_dim*Qmult)
566  virtual int exec() {
567  GA_DEBUG_INFO("Instruction: variable value");
568  size_type ndof = Z.sizes()[0];
569  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
570  GA_DEBUG_ASSERT(t.size() == qdim, "dimensions mismatch");
571 
572  if (qdim == 1) {
573  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
574  "Wrong size for coeff vector");
575  auto itc = coeff.begin(); auto itZ = Z.begin();
576  a = (*itc++) * (*itZ++);
577  while (itc != coeff.end()) a += (*itc++) * (*itZ++);
578  } else {
579  size_type target_dim = Z.sizes()[1];
580  if (target_dim == 1) {
581  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
582  "Wrong size for coeff vector");
583  auto itc = coeff.begin(); auto itZ = Z.begin();
584  for (auto it = t.begin(); it != t.end(); ++it)
585  *it = (*itc++) * (*itZ);
586  ++itZ;
587  for (size_type j = 1; j < ndof; ++j, ++itZ) {
588  for (auto it = t.begin(); it != t.end(); ++it)
589  *it += (*itc++) * (*itZ);
590  }
591  } else {
592  size_type Qmult = qdim / target_dim;
593  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
594  "Wrong size for coeff vector");
595 
596  gmm::clear(t.as_vector());
597  auto itc = coeff.begin();
598  for (size_type j = 0; j < ndof; ++j) {
599  auto it = t.begin();
600  for (size_type q = 0; q < Qmult; ++q, ++itc) {
601  for (size_type r = 0; r < target_dim; ++r)
602  *it++ += (*itc) * Z[j + r*ndof];
603  }
604  }
605  }
606  }
607  return 0;
608  }
609 
610  ga_instruction_val(base_tensor &tt, const base_tensor &Z_,
611  const base_vector &co, size_type q)
612  : a(tt[0]), t(tt), Z(Z_), coeff(co), qdim(q) {}
613  };
614 
615  struct ga_instruction_grad : public ga_instruction_val {
616  // Z(ndof,target_dim,N), coeff(Qmult,ndof) --> t(target_dim*Qmult,N)
617  virtual int exec() {
618  GA_DEBUG_INFO("Instruction: gradient");
619  size_type ndof = Z.sizes()[0];
620  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
621  size_type N = Z.sizes()[2];
622  if (qdim == 1) {
623  GA_DEBUG_ASSERT(t.size() == N, "dimensions mismatch");
624  GA_DEBUG_ASSERT(coeff.size() == ndof, "Wrong size for coeff vector");
625  auto itZ = Z.begin();
626  for (auto it = t.begin(); it != t.end(); ++it) {
627  auto itc = coeff.begin();
628  *it = (*itc++) * (*itZ++);
629  while (itc != coeff.end()) *it += (*itc++) * (*itZ++);
630  }
631  } else {
632  size_type target_dim = Z.sizes()[1];
633  if (target_dim == 1) {
634  GA_DEBUG_ASSERT(t.size() == N*qdim, "dimensions mismatch");
635  GA_DEBUG_ASSERT(coeff.size() == ndof*qdim,
636  "Wrong size for coeff vector");
637  for (size_type q = 0; q < qdim; ++q) {
638  auto itZ = Z.begin(); auto it = t.begin() + q;
639  for (size_type k = 0; k < N; ++k) {
640  if (k) it += qdim;
641  auto itc = coeff.begin() + q;
642  *it = (*itc) * (*itZ++);
643  for (size_type j = 1; j < ndof; ++j)
644  { itc += qdim; *it += (*itc) * (*itZ++); }
645  }
646  }
647  } else {
648  size_type Qmult = qdim / target_dim;
649  GA_DEBUG_ASSERT(t.size() == N*qdim, "dimensions mismatch");
650  GA_DEBUG_ASSERT(coeff.size() == ndof*Qmult,
651  "Wrong size for coeff vector");
652  gmm::clear(t.as_vector());
653  for (size_type q = 0; q < Qmult; ++q) {
654  auto itZ = Z.begin();
655  for (size_type k = 0; k < N; ++k)
656  for (size_type r = 0; r < target_dim; ++r)
657  for (size_type j = 0; j < ndof; ++j)
658  t[r + q*target_dim + k*qdim] += coeff[j*Qmult+q] * (*itZ++);
659  }
660  }
661  }
662  return 0;
663  }
664 
665  ga_instruction_grad(base_tensor &tt, const base_tensor &Z_,
666  const base_vector &co, size_type q)
667  : ga_instruction_val(tt, Z_, co, q)
668  {}
669 
670  };
671 
672  struct ga_instruction_hess : public ga_instruction_val {
673  // Z(ndof,target_dim,N*N), coeff(Qmult,ndof) --> t(target_dim*Qmult,N,N)
674  virtual int exec() {
675  GA_DEBUG_INFO("Instruction: Hessian");
676  size_type ndof = Z.sizes()[0];
677  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
678  size_type NN = gmm::sqr(t.sizes().back());
679  GA_DEBUG_ASSERT(NN == Z.sizes()[2], "Internal error");
680  if (qdim == 1) {
681  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
682  "Wrong size for coeff vector");
683  auto it = Z.begin(); auto itt = t.begin();
684  for (size_type kl = 0; kl < NN; ++kl, ++itt) {
685  *itt = scalar_type(0);
686  for (auto itc = coeff.begin(); itc != coeff.end(); ++itc, ++it)
687  *itt += (*itc) * (*it);
688  }
689  GMM_ASSERT1(itt == t.end(), "dimensions mismatch");
690  } else {
691  size_type target_dim = Z.sizes()[1];
692  if (target_dim == 1) {
693  GA_DEBUG_ASSERT(t.size() == NN*qdim, "dimensions mismatch");
694  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
695  "Wrong size for coeff vector");
696  gmm::clear(t.as_vector());
697  for (size_type q = 0; q < qdim; ++q) {
698  base_tensor::const_iterator it = Z.begin();
699  for (size_type kl = 0; kl < NN; ++kl)
700  for (size_type j = 0; j < ndof; ++j, ++it)
701  t[q + kl*qdim] += coeff[j*qdim+q] * (*it);
702  }
703  } else {
704  size_type Qmult = qdim / target_dim;
705  GA_DEBUG_ASSERT(t.size() == NN*qdim, "dimensions mismatch");
706  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
707  "Wrong size for coeff vector");
708  gmm::clear(t.as_vector());
709  for (size_type q = 0; q < Qmult; ++q) {
710  base_tensor::const_iterator it = Z.begin();
711  for (size_type kl = 0; kl < NN; ++kl)
712  for (size_type r = 0; r < target_dim; ++r)
713  for (size_type j = 0; j < ndof; ++j, ++it)
714  t[r + q*target_dim + kl*qdim] += coeff[j*Qmult+q] * (*it);
715  }
716  }
717  }
718  return 0;
719  }
720 
721  ga_instruction_hess(base_tensor &tt, const base_tensor &Z_,
722  const base_vector &co, size_type q)
723  : ga_instruction_val(tt, Z_, co, q)
724  {}
725  };
726 
727  struct ga_instruction_diverg : public ga_instruction_val {
728  // Z(ndof,target_dim,N), coeff(Qmult,ndof) --> t(1)
729  virtual int exec() {
730  GA_DEBUG_INFO("Instruction: divergence");
731  size_type ndof = Z.sizes()[0];
732  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
733  size_type target_dim = Z.sizes()[1];
734  size_type N = Z.sizes()[2];
735  size_type Qmult = qdim / target_dim;
736  GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
737  "Dimensions mismatch for divergence operator");
738  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
739  "Wrong size for coeff vector");
740 
741  t[0] = scalar_type(0);
742  base_tensor::const_iterator it = Z.begin();
743  if (Qmult == 1)
744  for (size_type k = 0; k < N; ++k) {
745  if (k) it += (N*ndof + 1);
746  for (size_type j = 0; j < ndof; ++j) {
747  if (j) ++it;
748  t[0] += coeff[j] * (*it);
749  }
750  }
751  else // if (target_dim() == 1)
752  for (size_type k = 0; k < N; ++k) {
753  if (k) ++it;
754  for (size_type j = 0; j < ndof; ++j) {
755  if (j) ++it;
756  t[0] += coeff[j*N+k] * (*it);
757  }
758  }
759  return 0;
760  }
761 
762  ga_instruction_diverg(base_tensor &tt, const base_tensor &Z_,
763  const base_vector &co, size_type q)
764  : ga_instruction_val(tt, Z_, co, q)
765  {}
766  };
767 
768  struct ga_instruction_copy_val_base : public ga_instruction {
769  base_tensor &t;
770  const base_tensor &Z;
771  size_type qdim;
772  // Z(ndof,target_dim) --> t(Qmult*ndof,Qmult*target_dim)
773  virtual int exec() {
774  GA_DEBUG_INFO("Instruction: value of test functions");
775  if (qdim == 1) {
776  GA_DEBUG_ASSERT(t.size() == Z.size(), "Wrong size for base vector");
777  std::copy(Z.begin(), Z.end(), t.begin());
778  } else {
779  size_type target_dim = Z.sizes()[1];
780  size_type Qmult = qdim / target_dim;
781  if (Qmult == 1) {
782  std::copy(Z.begin(), Z.end(), t.begin());
783  } else {
784  if (target_dim == 1) {
785  size_type ndof = Z.sizes()[0];
786  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
787  "Wrong size for base vector");
788  std::fill(t.begin(), t.end(), scalar_type(0));
789  auto itZ = Z.begin();
790  size_type s = t.sizes()[0], sss = s+1;
791 
792  // Performs t(i*Qmult+j, k*Qmult + j) = Z(i,k);
793  auto it = t.begin();
794  for (size_type i = 0; i < ndof; ++i, ++itZ) {
795  if (i) it += Qmult;
796  auto it2 = it;
797  *it2 = *itZ;
798  for (size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
799  }
800  } else {
801  size_type ndof = Z.sizes()[0];
802  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
803  "Wrong size for base vector");
804  std::fill(t.begin(), t.end(), scalar_type(0));
805  auto itZ = Z.begin();
806  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
807 
808  // Performs t(i*Qmult+j, k*Qmult + j) = Z(i,k);
809  for (size_type k = 0; k < target_dim; ++k) {
810  auto it = t.begin() + (ss * k);
811  for (size_type i = 0; i < ndof; ++i, ++itZ) {
812  if (i) it += Qmult;
813  auto it2 = it;
814  *it2 = *itZ;
815  for (size_type j = 1; j < Qmult; ++j)
816  { it2 += sss; *it2 = *itZ; }
817  }
818  }
819  }
820  }
821  }
822  return 0;
823  }
824 
825  ga_instruction_copy_val_base(base_tensor &tt, const base_tensor &Z_,
826  size_type q) : t(tt), Z(Z_), qdim(q) {}
827  };
828 
829  struct ga_instruction_copy_grad_base : public ga_instruction_copy_val_base {
830  // Z(ndof,target_dim,N) --> t(Qmult*ndof,Qmult*target_dim,N)
831  virtual int exec() {
832  GA_DEBUG_INFO("Instruction: gradient of test functions");
833  if (qdim == 1) {
834  std::copy(Z.begin(), Z.end(), t.begin());
835  } else {
836  size_type target_dim = Z.sizes()[1];
837  size_type Qmult = qdim / target_dim;
838  if (Qmult == 1) {
839  std::copy(Z.begin(), Z.end(), t.begin());
840  } else {
841  if (target_dim == 1) {
842  size_type ndof = Z.sizes()[0];
843  size_type N = Z.sizes()[2];
844  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
845  "Wrong size for gradient vector");
846  std::fill(t.begin(), t.end(), scalar_type(0));
847  base_tensor::const_iterator itZ = Z.begin();
848  size_type s = t.sizes()[0], sss = s+1, ssss = s*target_dim*Qmult;
849 
850  // Performs t(i*Qmult+j, k*Qmult + j, l) = Z(i,k,l);
851  for (size_type l = 0; l < N; ++l) {
852  base_tensor::iterator it = t.begin() + (ssss*l);
853  for (size_type i = 0; i < ndof; ++i, ++itZ) {
854  if (i) it += Qmult;
855  base_tensor::iterator it2 = it;
856  *it2 = *itZ;
857  for (size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
858  }
859  }
860  } else {
861  size_type ndof = Z.sizes()[0];
862  size_type N = Z.sizes()[2];
863  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
864  "Wrong size for gradient vector");
865  std::fill(t.begin(), t.end(), scalar_type(0));
866  base_tensor::const_iterator itZ = Z.begin();
867  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
868  size_type ssss = ss*target_dim;
869 
870  // Performs t(i*Qmult+j, k*Qmult + j, l) = Z(i,k,l);
871  for (size_type l = 0; l < N; ++l)
872  for (size_type k = 0; k < target_dim; ++k) {
873  base_tensor::iterator it = t.begin() + (ss * k + ssss*l);
874  for (size_type i = 0; i < ndof; ++i, ++itZ) {
875  if (i) it += Qmult;
876  base_tensor::iterator it2 = it;
877  *it2 = *itZ;
878  for (size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
879  }
880  }
881  }
882  }
883  }
884  return 0;
885  }
886 
887  ga_instruction_copy_grad_base(base_tensor &tt, const base_tensor &Z_,
888  size_type q)
889  : ga_instruction_copy_val_base(tt,Z_,q) {}
890  };
891 
892  struct ga_instruction_copy_vect_val_base : public ga_instruction {
893  base_tensor &t;
894  const base_tensor &Z;
895  size_type qdim;
896  // Z(ndof) --> t(qdim*ndof,qdim*target_dim)
897  virtual int exec() {
898  GA_DEBUG_INFO("Instruction: vectorized value of test functions");
899 
900  size_type ndof = Z.sizes()[0];
901  GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
902  "Wrong size for base vector");
903  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
904  auto itZ = Z.begin();
905  size_type s = t.sizes()[0], sss = s+1;
906 
907  // Performs t(i*qdim+j, k*qdim + j) = Z(i,k);
908  auto it = t.begin();
909  for (size_type i = 0; i < ndof; ++i, ++itZ) {
910  if (i) it += qdim;
911  auto it2 = it;
912  *it2 = *itZ;
913  for (size_type j = 1; j < qdim; ++j) { it2 += sss; *it2 = *itZ; }
914  }
915  return 0;
916  }
917 
918  ga_instruction_copy_vect_val_base(base_tensor &tt, const base_tensor &Z_,
919  size_type q) : t(tt), Z(Z_), qdim(q) {}
920  };
921 
922  struct ga_instruction_copy_vect_grad_base
923  : public ga_instruction_copy_vect_val_base {
924  // Z(ndof,N) --> t(qdim*ndof,qdim,N)
925  virtual int exec() {
926  GA_DEBUG_INFO("Instruction: vectorized gradient of test functions");
927  size_type ndof = Z.sizes()[0];
928  size_type N = Z.sizes()[2];
929  GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
930  "Wrong size for gradient vector");
931  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
932  base_tensor::const_iterator itZ = Z.begin();
933  size_type s = t.sizes()[0], sss = s+1, ssss = s*qdim;
934 
935  // Performs t(i*qdim+j, k*qdim + j, l) = Z(i,k,l);
936  for (size_type l = 0; l < N; ++l) {
937  base_tensor::iterator it = t.begin() + (ssss*l);
938  for (size_type i = 0; i < ndof; ++i, ++itZ) {
939  if (i) it += qdim;
940  base_tensor::iterator it2 = it;
941  *it2 = *itZ;
942  for (size_type j = 1; j < qdim; ++j) { it2+=sss; *it2=*itZ; }
943  }
944  }
945  return 0;
946  }
947 
948  ga_instruction_copy_vect_grad_base(base_tensor &tt, const base_tensor &Z_,
949  size_type q)
950  : ga_instruction_copy_vect_val_base(tt,Z_,q) {}
951  };
952 
953  struct ga_instruction_copy_hess_base : public ga_instruction_copy_val_base {
954  // Z(ndof,target_dim,N*N) --> t(Qmult*ndof,Qmult*target_dim,N,N)
955  virtual int exec() {
956  GA_DEBUG_INFO("Instruction: Hessian of test functions");
957  size_type target_dim = Z.sizes()[1];
958  size_type Qmult = qdim / target_dim;
959  if (Qmult == 1) {
960  gmm::copy(Z.as_vector(), t.as_vector());
961  } else {
962  size_type ndof = Z.sizes()[0];
963  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
964  "Wrong size for Hessian vector");
965  gmm::clear(t.as_vector());
966  base_tensor::const_iterator itZ = Z.begin();
967  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
968 
969  // Performs t(i*Qmult+j, k*Qmult + j, l, m) = Z(i,k,l*N+m)
970  size_type NNdim = Z.sizes()[2]*target_dim;
971  for (size_type klm = 0; klm < NNdim; ++klm) {
972  base_tensor::iterator it = t.begin() + (ss * klm);
973  for (size_type i = 0; i < ndof; ++i, ++itZ) {
974  if (i) it += Qmult;
975  base_tensor::iterator it2 = it;
976  *it2 = *itZ;
977  for (size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
978  }
979  }
980  }
981  return 0;
982  }
983 
984  ga_instruction_copy_hess_base(base_tensor &tt, const base_tensor &Z_,
985  size_type q)
986  : ga_instruction_copy_val_base(tt, Z_, q) {}
987  };
988 
989  struct ga_instruction_copy_diverg_base : public ga_instruction_copy_val_base {
990  // Z(ndof,target_dim,N) --> t(Qmult*ndof)
991  virtual int exec() {
992  GA_DEBUG_INFO("Instruction: divergence of test functions");
993  size_type ndof = Z.sizes()[0];
994  size_type target_dim = Z.sizes()[1];
995  size_type N = Z.sizes()[2];
996  size_type Qmult = qdim / target_dim;
997  GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
998  "Dimensions mismatch for divergence operator");
999  GA_DEBUG_ASSERT(t.size() == ndof * Qmult,
1000  "Wrong size for divergence vector");
1001  gmm::clear(t.as_vector());
1002  base_tensor::const_iterator itZ = Z.begin();
1003  if (Qmult == 1) { // target_dim == N
1004  // Performs t(i) = Trace(Z(i,:,:))
1005  for (size_type l = 0; l < N; ++l) {
1006  base_tensor::iterator it = t.begin();
1007  if (l) itZ += target_dim*ndof+1;
1008  for (size_type i = 0; i < ndof; ++i) {
1009  if (i) { ++it; ++itZ; }
1010  *it += *itZ;
1011  }
1012  }
1013  } else { // Qmult == N
1014  // Performs t(i*Qmult+j) = Z(i,1,j)
1015  for (size_type j = 0; j < N; ++j) {
1016  base_tensor::iterator it = t.begin() + j;
1017  if (j) ++itZ;
1018  for (size_type i = 0; i < ndof; ++i) {
1019  if (i) { it += Qmult; ++itZ; }
1020  *it += *itZ;
1021  }
1022  }
1023  }
1024  return 0;
1025  }
1026 
1027  ga_instruction_copy_diverg_base(base_tensor &tt, const base_tensor &Z_,
1028  size_type q)
1029  : ga_instruction_copy_val_base(tt, Z_, q) {}
1030  };
1031 
1032  struct ga_instruction_elementary_transformation {
1033  const base_vector &coeff_in;
1034  base_vector coeff_out;
1035  pelementary_transformation elemtrans;
1036  const mesh_fem &mf;
1037  const fem_interpolation_context &ctx;
1038  base_matrix &M;
1039  const mesh_fem **mf_M;
1040  size_type &icv;
1041 
1042  void do_transformation() {
1043  size_type nn = gmm::vect_size(coeff_in);
1044  if (M.size() == 0 || icv != ctx.convex_num() || &mf != *mf_M) {
1045  M.base_resize(nn, nn);
1046  *mf_M = &mf; icv = ctx.convex_num();
1047  elemtrans->give_transformation(mf, icv, M);
1048  }
1049  coeff_out.resize(nn);
1050  gmm::mult(M, coeff_in, coeff_out); // remember: coeff == coeff_out
1051  }
1052 
1053  ga_instruction_elementary_transformation
1054  (const base_vector &co, pelementary_transformation e,
1055  const mesh_fem &mf_, const fem_interpolation_context &ctx_,
1056  base_matrix &M_, const mesh_fem **mf_M_, size_type &icv_)
1057  : coeff_in(co), elemtrans(e), mf(mf_), ctx(ctx_),
1058  M(M_), mf_M(mf_M_), icv(icv_) {}
1059  ~ga_instruction_elementary_transformation() {};
1060  };
1061 
1062  struct ga_instruction_elementary_transformation_val
1063  : public ga_instruction_val, ga_instruction_elementary_transformation {
1064  // Z(ndof,target_dim), coeff_in(Qmult,ndof) --> t(target_dim*Qmult)
1065  virtual int exec() {
1066  GA_DEBUG_INFO("Instruction: variable value with elementary "
1067  "transformation");
1068  do_transformation();
1069  return ga_instruction_val::exec();
1070  }
1071 
1072  ga_instruction_elementary_transformation_val
1073  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1074  pelementary_transformation e, const mesh_fem &mf_,
1075  fem_interpolation_context &ctx_, base_matrix &M_,
1076  const mesh_fem **mf_M_, size_type &icv_)
1077  : ga_instruction_val(tt, Z_, coeff_out, q),
1078  ga_instruction_elementary_transformation(co, e, mf_, ctx_, M_,
1079  mf_M_, icv_) {}
1080  };
1081 
1082  struct ga_instruction_elementary_transformation_grad
1083  : public ga_instruction_grad, ga_instruction_elementary_transformation {
1084  // Z(ndof,target_dim,N), coeff_in(Qmult,ndof) --> t(target_dim*Qmult,N)
1085  virtual int exec() {
1086  GA_DEBUG_INFO("Instruction: gradient with elementary transformation");
1087  do_transformation();
1088  return ga_instruction_grad::exec();
1089  }
1090 
1091  ga_instruction_elementary_transformation_grad
1092  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1093  pelementary_transformation e, const mesh_fem &mf_,
1094  fem_interpolation_context &ctx_, base_matrix &M_,
1095  const mesh_fem **mf_M_, size_type &icv_)
1096  : ga_instruction_grad(tt, Z_, coeff_out, q),
1097  ga_instruction_elementary_transformation(co, e, mf_, ctx_, M_,
1098  mf_M_, icv_) {}
1099  };
1100 
1101  struct ga_instruction_elementary_transformation_hess
1102  : public ga_instruction_hess, ga_instruction_elementary_transformation {
1103  // Z(ndof,target_dim,N,N), coeff_in(Qmult,ndof) --> t(target_dim*Qmult,N,N)
1104  virtual int exec() {
1105  GA_DEBUG_INFO("Instruction: Hessian with elementary transformation");
1106  do_transformation();
1107  return ga_instruction_hess::exec();
1108  }
1109 
1110  ga_instruction_elementary_transformation_hess
1111  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1112  pelementary_transformation e, const mesh_fem &mf_,
1113  fem_interpolation_context &ctx_, base_matrix &M_,
1114  const mesh_fem **mf_M_, size_type &icv_)
1115  : ga_instruction_hess(tt, Z_, coeff_out, q),
1116  ga_instruction_elementary_transformation(co, e, mf_, ctx_, M_,
1117  mf_M_, icv_) {}
1118  };
1119 
1120  struct ga_instruction_elementary_transformation_diverg
1121  : public ga_instruction_diverg, ga_instruction_elementary_transformation {
1122  // Z(ndof,target_dim,N), coeff_in(Qmult,ndof) --> t(1)
1123  virtual int exec() {
1124  GA_DEBUG_INFO("Instruction: divergence with elementary transformation");
1125  do_transformation();
1126  return ga_instruction_diverg::exec();
1127  }
1128 
1129  ga_instruction_elementary_transformation_diverg
1130  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1131  pelementary_transformation e, const mesh_fem &mf_,
1132  fem_interpolation_context &ctx_, base_matrix &M_,
1133  const mesh_fem **mf_M_, size_type &icv_)
1134  : ga_instruction_diverg(tt, Z_, coeff_out, q),
1135  ga_instruction_elementary_transformation(co, e, mf_, ctx_, M_,
1136  mf_M_, icv_) {}
1137  };
1138 
1139  struct ga_instruction_update_group_info : public ga_instruction {
1140  const ga_workspace &workspace;
1141  ga_instruction_set &gis;
1142  ga_instruction_set::interpolate_info &inin;
1143  const std::string gname;
1144  ga_instruction_set::variable_group_info &vgi;
1145 
1146  virtual int exec() {
1147  GA_DEBUG_INFO("Instruction: Update group info for "+gname);
1148  if (vgi.varname &&
1149  &(workspace.associated_mf(*(vgi.varname))->linked_mesh())==inin.m)
1150  return 0;
1151  const std::string &varname
1152  = inin.m ? workspace.variable_in_group(gname, *(inin.m))
1153  : workspace.first_variable_of_group(gname);
1154  vgi.mf = workspace.associated_mf(varname);
1155  vgi.Ir = gis.var_intervals[varname];
1156  vgi.In = workspace.interval_of_variable(varname);
1157  vgi.alpha = workspace.factor_of_variable(varname);
1158  vgi.U = gis.extended_vars[varname];
1159  vgi.varname = &varname;
1160  return 0;
1161  }
1162 
1163  ga_instruction_update_group_info
1164  (const ga_workspace &workspace_, ga_instruction_set &gis_,
1165  ga_instruction_set::interpolate_info &inin_, const std::string &gname_,
1166  ga_instruction_set::variable_group_info &vgi_) :
1167  workspace(workspace_), gis(gis_), inin(inin_), gname(gname_),
1168  vgi(vgi_) {}
1169  };
1170 
1171  struct ga_instruction_interpolate_filter : public ga_instruction {
1172  base_tensor &t;
1173  const ga_instruction_set::interpolate_info &inin;
1174  size_type pt_type;
1175  int nb;
1176 
1177  virtual int exec() {
1178  GA_DEBUG_INFO("Instruction: interpolated filter");
1179  if ((pt_type == size_type(-1) && inin.pt_type) ||
1180  (pt_type != size_type(-1) && inin.pt_type == pt_type)) {
1181  GA_DEBUG_INFO("Instruction: interpolated filter: pass");
1182  return 0;
1183  }
1184  else {
1185  GA_DEBUG_INFO("Instruction: interpolated filter: filtered");
1186  gmm::clear(t.as_vector());
1187  return nb;
1188  }
1189  return 0;
1190  }
1191 
1192  ga_instruction_interpolate_filter
1193  (base_tensor &t_, const ga_instruction_set::interpolate_info &inin_,
1194  size_type ind_, int nb_)
1195  : t(t_), inin(inin_), pt_type(ind_), nb(nb_) {}
1196  };
1197 
1198 
1199  struct ga_instruction_interpolate : public ga_instruction {
1200  base_tensor &t;
1201  const mesh **m;
1202  const mesh_fem *mfn, **mfg;
1203  const base_vector *Un, **Ug;
1204  fem_interpolation_context &ctx;
1205  base_vector coeff;
1206  size_type qdim;
1207  const size_type &ipt;
1208  fem_precomp_pool &fp_pool;
1209  ga_instruction_set::interpolate_info &inin;
1210 
1211  virtual int exec() {
1212  GMM_ASSERT1(ctx.is_convex_num_valid(), "No valid element for the "
1213  "transformation. Probably transformation failed");
1214  const mesh_fem &mf = *(mfg ? *mfg : mfn);
1215  const base_vector &U = *(Ug ? *Ug : Un);
1216  GMM_ASSERT1(&(mf.linked_mesh()) == *m, "Interpolation of a variable "
1217  "on another mesh than the one it is defined on");
1218  slice_vector_on_basic_dof_of_element(mf, U, ctx.convex_num(), coeff);
1219  pfem pf = mf.fem_of_element(ctx.convex_num());
1220  GMM_ASSERT1(pf, "Undefined finite element method");
1221  if (ctx.have_pgp()) {
1222  if (ipt == 0)
1223  inin.pfps[&mf] = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
1224  ctx.set_pfp(inin.pfps[&mf]);
1225  } else {
1226  ctx.set_pf(pf);
1227  }
1228  return 0;
1229  }
1230 
1231  ga_instruction_interpolate
1232  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1233  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1234  fem_interpolation_context &ctx_, size_type q, const size_type &ipt_,
1235  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1236  : t(tt), m(m_), mfn(mfn_), mfg(mfg_), Un(Un_), Ug(Ug_),
1237  ctx(ctx_), qdim(q), ipt(ipt_), fp_pool(fp_pool_), inin(inin_) {}
1238  };
1239 
1240  struct ga_instruction_interpolate_val : public ga_instruction_interpolate {
1241  // --> t(target_dim*Qmult)
1242  virtual int exec() {
1243  GA_DEBUG_INFO("Instruction: interpolated variable value");
1244  ga_instruction_interpolate::exec();
1245  ctx.pf()->interpolation(ctx, coeff, t.as_vector(), dim_type(qdim));
1246  // cout << "interpolate " << &U << " result : " << t.as_vector() << endl;
1247  return 0;
1248  }
1249 
1250  ga_instruction_interpolate_val
1251  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1252  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1253  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1254  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1255  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_,ctx_, q, ipt_,
1256  fp_pool_, inin_)
1257  {}
1258  };
1259 
1260  struct ga_instruction_interpolate_grad : public ga_instruction_interpolate {
1261  // --> t(target_dim*Qmult,N)
1262  virtual int exec() {
1263  GA_DEBUG_INFO("Instruction: interpolated variable grad");
1264  ga_instruction_interpolate::exec();
1265  base_matrix v(qdim, ctx.N());
1266  ctx.pf()->interpolation_grad(ctx, coeff, v, dim_type(qdim));
1267  gmm::copy(v.as_vector(), t.as_vector());
1268  return 0;
1269  }
1270 
1271  ga_instruction_interpolate_grad
1272  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1273  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1274  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1275  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1276  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1277  fp_pool_, inin_)
1278  {}
1279  };
1280 
1281  struct ga_instruction_interpolate_hess : public ga_instruction_interpolate {
1282  // --> t(target_dim*Qmult,N,N)
1283  virtual int exec() {
1284  GA_DEBUG_INFO("Instruction: interpolated variable hessian");
1285  ga_instruction_interpolate::exec();
1286  base_matrix v(qdim, ctx.N()*ctx.N()); // To be optimized
1287  ctx.pf()->interpolation_hess(ctx, coeff, v, dim_type(qdim));
1288  gmm::copy(v.as_vector(), t.as_vector());
1289  return 0;
1290  }
1291 
1292  ga_instruction_interpolate_hess
1293  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1294  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1295  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1296  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1297  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1298  fp_pool_, inin_)
1299  {}
1300  };
1301 
1302  struct ga_instruction_interpolate_diverg : public ga_instruction_interpolate {
1303  // --> t(1)
1304  virtual int exec() {
1305  GA_DEBUG_INFO("Instruction: interpolated variable divergence");
1306  ga_instruction_interpolate::exec();
1307  ctx.pf()->interpolation_diverg(ctx, coeff, t[0]);
1308  return 0;
1309  }
1310 
1311  ga_instruction_interpolate_diverg
1312  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1313  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1314  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1315  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1316  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1317  fp_pool_, inin_)
1318  {}
1319  };
1320 
1321  struct ga_instruction_interpolate_base {
1322  base_tensor ZZ;
1323  const mesh **m;
1324  const mesh_fem *mfn, **mfg;
1325  const size_type &ipt;
1326  ga_instruction_set::interpolate_info &inin;
1327  fem_precomp_pool &fp_pool;
1328 
1329  virtual int exec() {
1330  GMM_ASSERT1(inin.ctx.is_convex_num_valid(), "No valid element for "
1331  "the transformation. Probably transformation failed");
1332  const mesh_fem &mf = *(mfg ? *mfg : mfn);
1333  GMM_ASSERT1(&(mf.linked_mesh()) == *m, "Interpolation of a variable "
1334  "on another mesh than the one it is defined on");
1335 
1336  pfem pf = mf.fem_of_element(inin.ctx.convex_num());
1337  GMM_ASSERT1(pf, "Undefined finite element method");
1338 
1339  if (inin.ctx.have_pgp()) {
1340  if (ipt == 0)
1341  inin.pfps[&mf] = fp_pool(pf, inin.ctx.pgp()->get_ppoint_tab());
1342  inin.ctx.set_pfp(inin.pfps[&mf]);
1343  } else {
1344  inin.ctx.set_pf(pf);
1345  }
1346  return 0;
1347  }
1348 
1349  ga_instruction_interpolate_base
1350  (const mesh **m_, const mesh_fem *mfn_, const mesh_fem **mfg_,
1351  const size_type &ipt_, ga_instruction_set::interpolate_info &inin_,
1352  fem_precomp_pool &fp_pool_)
1353  : m(m_), mfn(mfn_), mfg(mfg_), ipt(ipt_), inin(inin_),
1354  fp_pool(fp_pool_) {}
1355  };
1356 
1357  struct ga_instruction_interpolate_val_base
1358  : public ga_instruction_copy_val_base, ga_instruction_interpolate_base {
1359  // ctx --> Z(ndof,target_dim) --> t(Qmult*ndof,Qmult*target_dim)
1360  virtual int exec() {
1361  GA_DEBUG_INFO("Instruction: interpolated base value");
1362  ga_instruction_interpolate_base::exec();
1363  inin.ctx.pf()->real_base_value(inin.ctx, ZZ); // remember Z == ZZ
1364  return ga_instruction_copy_val_base::exec();
1365  }
1366 
1367  ga_instruction_interpolate_val_base
1368  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1369  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1370  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1371  : ga_instruction_copy_val_base(t_, ZZ, q),
1372  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1373  inin_, fp_pool_) {}
1374  };
1375 
1376  struct ga_instruction_interpolate_grad_base
1377  : public ga_instruction_copy_grad_base, ga_instruction_interpolate_base {
1378  // ctx --> Z(ndof,target_dim,N) --> t(Qmult*ndof,Qmult*target_dim,N)
1379  virtual int exec() {
1380  GA_DEBUG_INFO("Instruction: interpolated base grad");
1381  ga_instruction_interpolate_base::exec();
1382  inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ); // remember Z == ZZ
1383  return ga_instruction_copy_grad_base::exec();
1384  }
1385 
1386  ga_instruction_interpolate_grad_base
1387  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1388  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1389  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1390  : ga_instruction_copy_grad_base(t_, ZZ, q),
1391  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1392  inin_, fp_pool_) {}
1393  };
1394 
1395  struct ga_instruction_interpolate_hess_base
1396  : public ga_instruction_copy_hess_base, ga_instruction_interpolate_base {
1397  // ctx --> Z(ndof,target_dim,N*N) --> t(Qmult*ndof,Qmult*target_dim,N,N)
1398  virtual int exec() {
1399  GA_DEBUG_INFO("Instruction: interpolated base hessian");
1400  ga_instruction_interpolate_base::exec();
1401  inin.ctx.pf()->real_hess_base_value(inin.ctx, ZZ); // remember Z == ZZ
1402  return ga_instruction_copy_hess_base::exec();
1403  }
1404 
1405  ga_instruction_interpolate_hess_base
1406  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1407  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1408  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1409  : ga_instruction_copy_hess_base(t_, ZZ, q),
1410  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1411  inin_, fp_pool_) {}
1412  };
1413 
1414  struct ga_instruction_interpolate_diverg_base
1415  : public ga_instruction_copy_diverg_base, ga_instruction_interpolate_base {
1416  // ctx --> Z(ndof,target_dim,N*N) --> t(Qmult*ndof)
1417  virtual int exec() {
1418  GA_DEBUG_INFO("Instruction: interpolated base divergence");
1419  ga_instruction_interpolate_base::exec();
1420  inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ); // remember Z == ZZ
1421  return ga_instruction_copy_diverg_base::exec();
1422  }
1423 
1424  ga_instruction_interpolate_diverg_base
1425  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1426  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1427  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1428  : ga_instruction_copy_diverg_base(t_, ZZ, q),
1429  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1430  inin_, fp_pool_) {}
1431  };
1432 
1433 
1434  struct ga_instruction_elementary_transformation_base {
1435  base_tensor t_in;
1436  base_tensor &t_out;
1437  pelementary_transformation elemtrans;
1438  const mesh_fem &mf;
1439  const fem_interpolation_context &ctx;
1440  base_matrix &M;
1441  const mesh_fem **mf_M;
1442  size_type &icv;
1443 
1444  void do_transformation(size_type n) {
1445  if (M.size() == 0 || icv != ctx.convex_num() || &mf != *mf_M) {
1446  M.base_resize(n, n);
1447  *mf_M = &mf; icv = ctx.convex_num();
1448  elemtrans->give_transformation(mf, icv, M);
1449  }
1450  t_out.mat_reduction(t_in, M, 0);
1451  }
1452 
1453  ga_instruction_elementary_transformation_base
1454  (base_tensor &t_, pelementary_transformation e, const mesh_fem &mf_,
1455  const fem_interpolation_context &ctx_, base_matrix &M_,
1456  const mesh_fem **mf_M_, size_type &icv_)
1457  : t_out(t_), elemtrans(e), mf(mf_), ctx(ctx_),
1458  M(M_), mf_M(mf_M_), icv(icv_) {}
1459  };
1460 
1461  struct ga_instruction_elementary_transformation_val_base
1462  : public ga_instruction_copy_val_base,
1463  ga_instruction_elementary_transformation_base {
1464  // Z(ndof,target_dim) --> t_in --> t_out(Qmult*ndof,Qmult*target_dim)
1465  virtual int exec() {
1466  GA_DEBUG_INFO("Instruction: value of test functions with elementary "
1467  "transformation");
1468  size_type ndof = Z.sizes()[0];
1469  size_type Qmult = qdim / Z.sizes()[1];
1470  t_in.adjust_sizes(t_out.sizes());
1471  ga_instruction_copy_val_base::exec();
1472  do_transformation(ndof*Qmult);
1473  return 0;
1474  }
1475 
1476  ga_instruction_elementary_transformation_val_base
1477  (base_tensor &t_, const base_tensor &Z_, size_type q,
1478  pelementary_transformation e, const mesh_fem &mf_,
1479  fem_interpolation_context &ctx_, base_matrix &M_,
1480  const mesh_fem **mf_M_, size_type &icv_)
1481  : ga_instruction_copy_val_base(t_in, Z_, q),
1482  ga_instruction_elementary_transformation_base(t_, e, mf_, ctx_, M_,
1483  mf_M_, icv_) {}
1484  };
1485 
1486  struct ga_instruction_elementary_transformation_grad_base
1487  : public ga_instruction_copy_grad_base,
1488  ga_instruction_elementary_transformation_base {
1489  // Z(ndof,target_dim,N) --> t_in --> t_out(Qmult*ndof,Qmult*target_dim,N)
1490  virtual int exec() {
1491  GA_DEBUG_INFO("Instruction: gradient of test functions with elementary "
1492  "transformation");
1493  size_type ndof = Z.sizes()[0];
1494  size_type Qmult = qdim / Z.sizes()[1];
1495  t_in.adjust_sizes(t_out.sizes());
1496  ga_instruction_copy_grad_base::exec();
1497  do_transformation(ndof*Qmult);
1498  return 0;
1499  }
1500 
1501  ga_instruction_elementary_transformation_grad_base
1502  (base_tensor &t_, const base_tensor &Z_, size_type q,
1503  pelementary_transformation e, const mesh_fem &mf_,
1504  fem_interpolation_context &ctx_, base_matrix &M_,
1505  const mesh_fem **mf_M_, size_type &icv_)
1506  : ga_instruction_copy_grad_base(t_in, Z_, q),
1507  ga_instruction_elementary_transformation_base(t_, e, mf_, ctx_, M_,
1508  mf_M_, icv_) {}
1509  };
1510 
1511  struct ga_instruction_elementary_transformation_hess_base
1512  : public ga_instruction_copy_hess_base,
1513  ga_instruction_elementary_transformation_base {
1514  // Z(ndof,target_dim,N*N) --> t_out(Qmult*ndof,Qmult*target_dim,N,N)
1515  virtual int exec() {
1516  GA_DEBUG_INFO("Instruction: Hessian of test functions with elementary "
1517  "transformation");
1518  size_type ndof = Z.sizes()[0];
1519  size_type Qmult = qdim / Z.sizes()[1];
1520  t_in.adjust_sizes(t_out.sizes());
1521  ga_instruction_copy_hess_base::exec();
1522  do_transformation(ndof*Qmult);
1523  return 0;
1524  }
1525 
1526  ga_instruction_elementary_transformation_hess_base
1527  (base_tensor &t_, const base_tensor &Z_, size_type q,
1528  pelementary_transformation e, const mesh_fem &mf_,
1529  fem_interpolation_context &ctx_, base_matrix &M_,
1530  const mesh_fem **mf_M_, size_type &icv_)
1531  : ga_instruction_copy_hess_base(t_in, Z_, q),
1532  ga_instruction_elementary_transformation_base(t_, e, mf_, ctx_, M_,
1533  mf_M_, icv_) {}
1534  };
1535 
1536  struct ga_instruction_elementary_transformation_diverg_base
1537  : public ga_instruction_copy_diverg_base,
1538  ga_instruction_elementary_transformation_base {
1539  // Z(ndof,target_dim,N) --> t_out(Qmult*ndof)
1540  virtual int exec() {
1541  GA_DEBUG_INFO("Instruction: divergence of test functions with elementary "
1542  "transformation");
1543  size_type ndof = Z.sizes()[0];
1544  size_type Qmult = qdim / Z.sizes()[1];
1545  t_in.adjust_sizes(t_out.sizes());
1546  ga_instruction_copy_diverg_base::exec();
1547  do_transformation(ndof*Qmult);
1548  return 0;
1549  }
1550 
1551  ga_instruction_elementary_transformation_diverg_base
1552  (base_tensor &t_, const base_tensor &Z_, size_type q,
1553  pelementary_transformation e, const mesh_fem &mf_,
1554  fem_interpolation_context &ctx_, base_matrix &M_,
1555  const mesh_fem **mf_M_, size_type &icv_)
1556  : ga_instruction_copy_diverg_base(t_in, Z_, q),
1557  ga_instruction_elementary_transformation_base(t_, e, mf_, ctx_, M_,
1558  mf_M_, icv_) {}
1559  };
1560 
1561 
1562  struct ga_instruction_add : public ga_instruction {
1563  base_tensor &t;
1564  const base_tensor &tc1, &tc2;
1565  virtual int exec() {
1566  GA_DEBUG_INFO("Instruction: addition");
1567  GA_DEBUG_ASSERT(t.size() == tc1.size(),
1568  "internal error " << t.size() << " != " << tc1.size());
1569  GA_DEBUG_ASSERT(t.size() == tc2.size(),
1570  "internal error " << t.size() << " != " << tc2.size());
1571  gmm::add(tc1.as_vector(), tc2.as_vector(), t.as_vector());
1572  return 0;
1573  }
1574  ga_instruction_add(base_tensor &t_,
1575  const base_tensor &tc1_, const base_tensor &tc2_)
1576  : t(t_), tc1(tc1_), tc2(tc2_) {}
1577  };
1578 
1579  struct ga_instruction_add_to : public ga_instruction {
1580  base_tensor &t;
1581  const base_tensor &tc1;
1582  virtual int exec() {
1583  GA_DEBUG_INFO("Instruction: addition");
1584  GA_DEBUG_ASSERT(t.size() == tc1.size(), "internal error " << t.size()
1585  << " incompatible with " << tc1.size());
1586  gmm::add(tc1.as_vector(), t.as_vector());
1587  return 0;
1588  }
1589  ga_instruction_add_to(base_tensor &t_, const base_tensor &tc1_)
1590  : t(t_), tc1(tc1_) {}
1591  };
1592 
1593  struct ga_instruction_add_to_coeff : public ga_instruction {
1594  base_tensor &t;
1595  const base_tensor &tc1;
1596  scalar_type &coeff;
1597  virtual int exec() {
1598  GA_DEBUG_INFO("Instruction: addition with scale");
1599  GA_DEBUG_ASSERT(t.size() == tc1.size(), "internal error " << t.size()
1600  << " incompatible with " << tc1.size());
1601  gmm::add(gmm::scaled(tc1.as_vector(), coeff), t.as_vector());
1602  return 0;
1603  }
1604  ga_instruction_add_to_coeff(base_tensor &t_, const base_tensor &tc1_,
1605  scalar_type &coeff_)
1606  : t(t_), tc1(tc1_), coeff(coeff_) {}
1607  };
1608 
1609  struct ga_instruction_sub : public ga_instruction {
1610  base_tensor &t;
1611  const base_tensor &tc1, &tc2;
1612  virtual int exec() {
1613  GA_DEBUG_INFO("Instruction: subtraction");
1614  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
1615  "internal error");
1616  gmm::add(tc1.as_vector(), gmm::scaled(tc2.as_vector(), scalar_type(-1)),
1617  t.as_vector());
1618  return 0;
1619  }
1620  ga_instruction_sub(base_tensor &t_,
1621  const base_tensor &tc1_, const base_tensor &tc2_)
1622  : t(t_), tc1(tc1_), tc2(tc2_) {}
1623  };
1624 
1625  struct ga_instruction_opposite : public ga_instruction {
1626  base_tensor &t;
1627  virtual int exec() {
1628  GA_DEBUG_INFO("Instruction: multiplication with -1");
1629  gmm::scale(t.as_vector(), scalar_type(-1));
1630  return 0;
1631  }
1632  ga_instruction_opposite(base_tensor &t_) : t(t_) {}
1633  };
1634 
1635  struct ga_instruction_print_tensor : public ga_instruction {
1636  base_tensor &t;
1637  pga_tree_node pnode;
1638  const fem_interpolation_context &ctx;
1639  size_type &nbpt, &ipt;
1640  virtual int exec() {
1641  GA_DEBUG_INFO("Instruction: tensor print");
1642  cout << "Print term "; ga_print_node(pnode, cout);
1643  cout << " on Gauss point " << ipt << "/" << nbpt << " of element "
1644  << ctx.convex_num() << ": " << t << endl;
1645  return 0;
1646  }
1647  ga_instruction_print_tensor(base_tensor &t_, pga_tree_node pnode_,
1648  const fem_interpolation_context &ctx_,
1649  size_type &nbpt_, size_type &ipt_)
1650  : t(t_), pnode(pnode_), ctx(ctx_), nbpt(nbpt_), ipt(ipt_) {}
1651  };
1652 
1653  struct ga_instruction_copy_tensor : public ga_instruction {
1654  base_tensor &t;
1655  const base_tensor &tc1;
1656  virtual int exec() {
1657  GA_DEBUG_INFO("Instruction: tensor copy");
1658  std::copy(tc1.begin(), tc1.end(), t.begin());
1659  // gmm::copy(tc1.as_vector(), t.as_vector());
1660  return 0;
1661  }
1662  ga_instruction_copy_tensor(base_tensor &t_, const base_tensor &tc1_)
1663  : t(t_), tc1(tc1_) {}
1664  };
1665 
1666  struct ga_instruction_clear_tensor : public ga_instruction {
1667  base_tensor &t;
1668  virtual int exec() {
1669  GA_DEBUG_INFO("Instruction: clear tensor");
1670  std::fill(t.begin(), t.end(), scalar_type(0));
1671  return 0;
1672  }
1673  ga_instruction_clear_tensor(base_tensor &t_) : t(t_) {}
1674  };
1675 
1676  struct ga_instruction_copy_tensor_possibly_void : public ga_instruction {
1677  base_tensor &t;
1678  const base_tensor &tc1;
1679  virtual int exec() {
1680  GA_DEBUG_INFO("Instruction: tensor copy possibly void");
1681  if (tc1.size())
1682  gmm::copy(tc1.as_vector(), t.as_vector());
1683  else
1684  gmm::clear(t.as_vector());
1685  return 0;
1686  }
1687  ga_instruction_copy_tensor_possibly_void(base_tensor &t_,
1688  const base_tensor &tc1_)
1689  : t(t_), tc1(tc1_) {}
1690  };
1691 
1692  struct ga_instruction_copy_scalar : public ga_instruction {
1693  scalar_type &t; const scalar_type &t1;
1694  virtual int exec() {
1695  GA_DEBUG_INFO("Instruction: scalar copy");
1696  t = t1;
1697  return 0;
1698  }
1699  ga_instruction_copy_scalar(scalar_type &t_, const scalar_type &t1_)
1700  : t(t_), t1(t1_) {}
1701  };
1702 
1703  struct ga_instruction_copy_vect : public ga_instruction {
1704  base_vector &t;
1705  const base_vector &t1;
1706  virtual int exec() {
1707  GA_DEBUG_INFO("Instruction: fixed size tensor copy");
1708  gmm::copy(t1, t);
1709  return 0;
1710  }
1711  ga_instruction_copy_vect(base_vector &t_, const base_vector &t1_)
1712  : t(t_), t1(t1_) {}
1713  };
1714 
1715  struct ga_instruction_trace : public ga_instruction {
1716  base_tensor &t;
1717  const base_tensor &tc1;
1718  size_type n;
1719  // tc1(:,:,...,n,n) --> t(:,:,...)
1720  virtual int exec() {
1721  GA_DEBUG_INFO("Instruction: Trace");
1722  GA_DEBUG_ASSERT(t.size()*n*n == tc1.size(), "Wrong sizes");
1723  size_type s = t.size() * (n+1);
1724  auto it = t.begin();
1725  auto it1 = tc1.begin();
1726  for (; it != t.end(); ++it, ++it1) {
1727  auto it2 = it1;
1728  *it = *it2;
1729  for (size_type i = 1; i < n; ++i) { it2 += s; *it += *it2; }
1730  }
1731  return 0;
1732  }
1733 
1734  ga_instruction_trace(base_tensor &t_, const base_tensor &tc1_, size_type n_)
1735  : t(t_), tc1(tc1_), n(n_) {}
1736  };
1737 
1738  struct ga_instruction_deviator : public ga_instruction {
1739  base_tensor &t;
1740  const base_tensor &tc1;
1741  size_type n;
1742  // tc1(:,:,...,n,n) --> t(:,:,...,n,n)
1743  virtual int exec() {
1744  GA_DEBUG_INFO("Instruction: Deviator");
1745  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1746 
1747  gmm::copy(tc1.as_vector(), t.as_vector());
1748 
1749  size_type nb = t.size()/(n*n);
1750  size_type s = nb * (n+1), j = 0;
1751  base_tensor::iterator it = t.begin();
1752  base_tensor::const_iterator it1 = tc1.begin();
1753  for (; j < nb; ++it, ++it1, ++j) {
1754  scalar_type tr(0);
1755  base_tensor::const_iterator it2 = it1;
1756  tr += *it2;
1757  for (size_type i = 1; i < n; ++i) { it2 += s; tr += *it2; }
1758  tr /= scalar_type(n);
1759 
1760  base_tensor::iterator it3 = it;
1761  *it3 -= tr;
1762  for (size_type i = 1; i < n; ++i) { it3 += s; *it3 -= tr; }
1763  }
1764  return 0;
1765  }
1766 
1767  ga_instruction_deviator(base_tensor &t_, const base_tensor &tc1_,
1768  size_type n_)
1769  : t(t_), tc1(tc1_), n(n_) {}
1770  };
1771 
1772  struct ga_instruction_transpose : public ga_instruction { // To be optimized
1773  base_tensor &t;
1774  const base_tensor &tc1;
1775  size_type n1, n2, nn;
1776  virtual int exec() {
1777  GA_DEBUG_INFO("Instruction: transpose");
1778  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1779 
1780  size_type n0 = tc1.size() / (n1*n2*nn);
1781  auto it = t.begin();
1782  for (size_type i = 0; i < nn; ++i) {
1783  size_type s1 = i*n1*n2*n0;
1784  for (size_type j = 0; j < n1; ++j) {
1785  size_type s2 = s1 + j*n0;
1786  for (size_type k = 0; k < n2; ++k) {
1787  size_type s3 = s2 + k*n1*n0;
1788  for (size_type l = 0; l < n0; ++l, ++it)
1789  *it = tc1[s3+l];
1790  }
1791  }
1792  }
1793  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1794  return 0;
1795  }
1796  ga_instruction_transpose(base_tensor &t_, const base_tensor &tc1_,
1797  size_type n1_, size_type n2_, size_type nn_)
1798  : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1799  };
1800 
1801  struct ga_instruction_swap_indices : public ga_instruction {// To be optimized
1802  base_tensor &t;
1803  const base_tensor &tc1;
1804  size_type nn1, nn2, ii2, ii3;
1805  virtual int exec() {
1806  GA_DEBUG_INFO("Instruction: swap indices");
1807  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1808  size_type ii1 = t.size() / (nn1*nn2*ii2*ii3);
1809 
1810  auto it = t.begin();
1811  for (size_type i = 0; i < ii3; ++i)
1812  for (size_type j = 0; j < nn1; ++j)
1813  for (size_type k = 0; k < ii2; ++k)
1814  for (size_type l = 0; l < nn2; ++l) {
1815  size_type ind = j*ii1+k*ii1*nn1+l*ii1*nn1*ii2+i*ii1*nn1*ii2*nn2;
1816  for (size_type m = 0; m < ii1; ++m, ++it)
1817  *it = tc1[m+ind];
1818  }
1819  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1820  return 0;
1821  }
1822  ga_instruction_swap_indices(base_tensor &t_, const base_tensor &tc1_,
1823  size_type n1_, size_type n2_,
1824  size_type i2_, size_type i3_)
1825  : t(t_), tc1(tc1_), nn1(n1_), nn2(n2_), ii2(i2_), ii3(i3_) {}
1826  };
1827 
1828  struct ga_instruction_index_move_last : public ga_instruction {// To be optimized
1829  base_tensor &t;
1830  const base_tensor &tc1;
1831  size_type nn, ii2;
1832  virtual int exec() {
1833  GA_DEBUG_INFO("Instruction: swap indices");
1834  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1835  size_type ii1 = t.size() / (nn*ii2);
1836 
1837  auto it = t.begin();
1838  for (size_type i = 0; i < nn; ++i)
1839  for (size_type j = 0; j < ii2; ++j) {
1840  size_type ind = i*ii1+j*ii1*nn;
1841  for (size_type k = 0; k < ii1; ++k, ++it)
1842  *it = tc1[k+ind];
1843  }
1844  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1845  return 0;
1846  }
1847  ga_instruction_index_move_last(base_tensor &t_, const base_tensor &tc1_,
1848  size_type n_, size_type i2_)
1849  : t(t_), tc1(tc1_), nn(n_), ii2(i2_) {}
1850  };
1851 
1852  struct ga_instruction_transpose_no_test : public ga_instruction {
1853  base_tensor &t;
1854  const base_tensor &tc1;
1855  size_type n1, n2, nn;
1856  virtual int exec() {
1857  GA_DEBUG_INFO("Instruction: transpose");
1858  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1859 
1860  auto it = t.begin();
1861  for (size_type i = 0; i < nn; ++i) {
1862  size_type s1 = i*n1*n2;
1863  for (size_type j = 0; j < n1; ++j) {
1864  size_type s2 = s1 + j;
1865  for (size_type k = 0; k < n2; ++k, ++it)
1866  *it = tc1[s2 + k*n1];
1867  }
1868  }
1869  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1870  return 0;
1871  }
1872  ga_instruction_transpose_no_test(base_tensor &t_, const base_tensor &tc1_,
1873  size_type n1_, size_type n2_,
1874  size_type nn_)
1875  : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1876  };
1877 
1878  struct ga_instruction_transpose_test : public ga_instruction {
1879  base_tensor &t;
1880  const base_tensor &tc1;
1881  virtual int exec() {
1882  GA_DEBUG_INFO("Instruction: copy tensor and transpose test functions");
1883  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1884  GA_DEBUG_ASSERT(t.sizes().size() >= 2, "Wrong sizes");
1885 
1886  size_type s1 = t.sizes()[0], s2 = t.sizes()[1], s3 = s1*s2;
1887  size_type s = t.size() / s3;
1888  base_tensor::iterator it = t.begin();
1889  for (size_type k = 0; k < s; ++k)
1890  for (size_type j = 0; j < s2; ++j)
1891  for (size_type i = 0; i < s1; ++i, ++it)
1892  *it = tc1[j+s2*i+k*s3];
1893  return 0;
1894  }
1895  ga_instruction_transpose_test(base_tensor &t_, const base_tensor &tc1_)
1896  : t(t_), tc1(tc1_) {}
1897  };
1898 
1899  struct ga_instruction_sym : public ga_instruction {
1900  base_tensor &t;
1901  const base_tensor &tc1;
1902  virtual int exec() {
1903  GA_DEBUG_INFO("Instruction: symmetric part");
1904  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1905  size_type order = t.sizes().size();
1906  size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
1907  size_type s = t.size() / (s1*s2);
1908  for (size_type i = 0; i < s1; ++i)
1909  for (size_type j = 0; j < s2; ++j) {
1910  base_tensor::iterator it = t.begin() + s*(i + s1*j);
1911  base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
1912  it1T = tc1.begin() + s*(j + s2*i);
1913  for (size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ + *it1T++);
1914  }
1915  return 0;
1916  }
1917  ga_instruction_sym(base_tensor &t_, const base_tensor &tc1_)
1918  : t(t_), tc1(tc1_) {}
1919  };
1920 
1921  struct ga_instruction_skew : public ga_instruction {
1922  base_tensor &t;
1923  const base_tensor &tc1;
1924  virtual int exec() {
1925  GA_DEBUG_INFO("Instruction: skew-symmetric part");
1926  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1927  size_type order = t.sizes().size();
1928  size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
1929  size_type s = t.size() / (s1*s2);
1930  for (size_type i = 0; i < s1; ++i)
1931  for (size_type j = 0; j < s2; ++j) {
1932  base_tensor::iterator it = t.begin() + s*(i + s1*j);
1933  base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
1934  it1T = tc1.begin() + s*(j + s2*i);
1935  for (size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ - *it1T++);
1936  }
1937  return 0;
1938  }
1939  ga_instruction_skew(base_tensor &t_, const base_tensor &tc1_)
1940  : t(t_), tc1(tc1_) {}
1941  };
1942 
1943  struct ga_instruction_scalar_add : public ga_instruction {
1944  scalar_type &t;
1945  const scalar_type &c, &d;
1946  virtual int exec() {
1947  GA_DEBUG_INFO("Instruction: scalar addition");
1948  t = c + d;
1949  return 0;
1950  }
1951  ga_instruction_scalar_add(scalar_type &t_, const scalar_type &c_,
1952  const scalar_type &d_)
1953  : t(t_), c(c_), d(d_) {}
1954  };
1955 
1956  struct ga_instruction_scalar_sub : public ga_instruction {
1957  scalar_type &t;
1958  const scalar_type &c, &d;
1959  virtual int exec() {
1960  GA_DEBUG_INFO("Instruction: scalar subtraction");
1961  t = c - d;
1962  return 0;
1963  }
1964  ga_instruction_scalar_sub(scalar_type &t_, const scalar_type &c_,
1965  const scalar_type &d_)
1966  : t(t_), c(c_), d(d_) {}
1967  };
1968 
1969  struct ga_instruction_scalar_scalar_mult : public ga_instruction {
1970  scalar_type &t;
1971  const scalar_type &c, &d;
1972  virtual int exec() {
1973  GA_DEBUG_INFO("Instruction: scalar multiplication");
1974  t = c * d;
1975  return 0;
1976  }
1977  ga_instruction_scalar_scalar_mult(scalar_type &t_, const scalar_type &c_,
1978  const scalar_type &d_)
1979  : t(t_), c(c_), d(d_) {}
1980  };
1981 
1982  struct ga_instruction_scalar_scalar_div : public ga_instruction {
1983  scalar_type &t;
1984  const scalar_type &c, &d;
1985  virtual int exec() {
1986  GA_DEBUG_INFO("Instruction: scalar division");
1987  t = c / d;
1988  return 0;
1989  }
1990  ga_instruction_scalar_scalar_div(scalar_type &t_, const scalar_type &c_,
1991  const scalar_type &d_)
1992  : t(t_), c(c_), d(d_) {}
1993  };
1994 
1995  struct ga_instruction_scalar_mult : public ga_instruction {
1996  base_tensor &t, &tc1;
1997  const scalar_type &c;
1998  virtual int exec() {
1999  GA_DEBUG_INFO("Instruction: multiplication of a tensor by a scalar " << c);
2000  gmm::copy(gmm::scaled(tc1.as_vector(), c), t.as_vector());
2001  return 0;
2002  }
2003  ga_instruction_scalar_mult(base_tensor &t_, base_tensor &tc1_,
2004  const scalar_type &c_)
2005  : t(t_), tc1(tc1_), c(c_) {}
2006  };
2007 
2008  struct ga_instruction_scalar_div : public ga_instruction {
2009  base_tensor &t, &tc1;
2010  const scalar_type &c;
2011  virtual int exec() {
2012  GA_DEBUG_INFO("Instruction: division of a tensor by a scalar");
2013  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
2014 
2015  base_tensor::iterator it = t.begin(), it1 = tc1.begin();
2016  for (; it != t.end(); ++it, ++it1) *it = *it1/c;
2017  return 0;
2018  }
2019  ga_instruction_scalar_div(base_tensor &t_, base_tensor &tc1_,
2020  const scalar_type &c_)
2021  : t(t_), tc1(tc1_), c(c_) {}
2022  };
2023 
2024  struct ga_instruction_dotmult : public ga_instruction {
2025  base_tensor &t, &tc1, &tc2;
2026  virtual int exec() {
2027  GA_DEBUG_INFO("Instruction: componentwise multiplication");
2028  size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2029  GA_DEBUG_ASSERT(t.size() == s1_1*s2, "Wrong sizes");
2030 
2031  base_tensor::iterator it = t.begin();
2032  for (size_type i = 0; i < s2; ++i)
2033  for (size_type m = 0; m < s1_1; ++m, ++it)
2034  *it = tc1[m+s1_1*i] * tc2[i];
2035  return 0;
2036  }
2037  ga_instruction_dotmult(base_tensor &t_, base_tensor &tc1_,
2038  base_tensor &tc2_)
2039  : t(t_), tc1(tc1_), tc2(tc2_) {}
2040  };
2041 
2042  struct ga_instruction_dotdiv : public ga_instruction {
2043  base_tensor &t, &tc1, &tc2;
2044  virtual int exec() {
2045  GA_DEBUG_INFO("Instruction: componentwise division");
2046  size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2047  GA_DEBUG_ASSERT(t.size() == s1_1*s2, "Wrong sizes");
2048 
2049  base_tensor::iterator it = t.begin();
2050  for (size_type i = 0; i < s2; ++i)
2051  for (size_type m = 0; m < s1_1; ++m, ++it)
2052  *it = tc1[m+s1_1*i] / tc2[i];
2053  return 0;
2054  }
2055  ga_instruction_dotdiv(base_tensor &t_, base_tensor &tc1_,
2056  base_tensor &tc2_)
2057  : t(t_), tc1(tc1_), tc2(tc2_) {}
2058  };
2059 
2060  // Performs Ami Bni -> Cmni
2061  struct ga_instruction_dotmult_spec : public ga_instruction {
2062  base_tensor &t, &tc1, &tc2;
2063  virtual int exec() {
2064  GA_DEBUG_INFO("Instruction: specific componentwise multiplication");
2065  size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
2066  size_type s1_1 = tc1.size() / s2_2;
2067 
2068  base_tensor::iterator it = t.begin();
2069  for (size_type i = 0; i < s2_2; ++i)
2070  for (size_type n = 0; n < s2_1; ++n)
2071  for (size_type m = 0; m < s1_1; ++m, ++it)
2072  *it = tc1[m+s1_1*i] * tc2[n+s2_1*i];
2073  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2074  return 0;
2075  }
2076  ga_instruction_dotmult_spec(base_tensor &t_, base_tensor &tc1_,
2077  base_tensor &tc2_)
2078  : t(t_), tc1(tc1_), tc2(tc2_) {}
2079  };
2080 
2081  // Performs Amijik -> Cmjk. To be optimized
2082  struct ga_instruction_contract_1_1 : public ga_instruction {
2083  base_tensor &t, &tc1;
2084  size_type nn, ii2, ii3;
2085  virtual int exec() {
2086  GA_DEBUG_INFO("Instruction: single contraction on a single tensor");
2087 
2088  size_type ii1 = tc1.size() / (nn*nn*ii2*ii3);
2089 
2090  base_tensor::iterator it = t.begin();
2091  for (size_type i = 0; i < ii3; ++i)
2092  for (size_type j = 0; j < ii2; ++j)
2093  for (size_type k = 0; k < ii1; ++k, ++it) {
2094  *it = scalar_type(0);
2095  size_type pre_ind = k+j*ii1*nn+i*ii1*nn*ii2*nn;
2096  for (size_type n = 0; n < nn; ++n)
2097  *it += tc1[pre_ind+n*ii1+n*ii1*nn*ii2];
2098  }
2099 
2100  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2101  return 0;
2102  }
2103  ga_instruction_contract_1_1(base_tensor &t_, base_tensor &tc1_,
2104  size_type n_, size_type i2_, size_type i3_)
2105  : t(t_), tc1(tc1_), nn(n_), ii2(i2_), ii3(i3_) {}
2106  };
2107 
2108  // Performs Amijk Bnljp -> Cmniklp. To be optimized
2109  struct ga_instruction_contract_2_1 : public ga_instruction {
2110  base_tensor &t, &tc1, &tc2;
2111  size_type nn, ii1, ii2, ii3, ii4;
2112  virtual int exec() {
2113  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2114 
2115  size_type ift1 = tc1.size() / (nn*ii1*ii2);
2116  size_type ift2 = tc2.size() / (nn*ii3*ii4);
2117 
2118  base_tensor::iterator it = t.begin();
2119  for (size_type i = 0; i < ii4; ++i)
2120  for (size_type j = 0; j < ii3; ++j)
2121  for (size_type k = 0; k < ii2; ++k)
2122  for (size_type l = 0; l < ii1; ++l)
2123  for (size_type p = 0; p < ift2; ++p)
2124  for (size_type q = 0; q < ift1; ++q, ++it) {
2125  *it = scalar_type(0);
2126  size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2127  size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2128  for (size_type n = 0; n < nn; ++n)
2129  *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2130  }
2131 
2132  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2133  return 0;
2134  }
2135  ga_instruction_contract_2_1(base_tensor &t_, base_tensor &tc1_,
2136  base_tensor &tc2_,
2137  size_type n_, size_type i1_, size_type i2_,
2138  size_type i3_, size_type i4_)
2139  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2140  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2141  };
2142 
2143  // Performs Amijk Bnljp -> Cnmiklp. To be optimized
2144  struct ga_instruction_contract_2_1_rev : public ga_instruction {
2145  base_tensor &t, &tc1, &tc2;
2146  size_type nn, ii1, ii2, ii3, ii4;
2147  virtual int exec() {
2148  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2149 
2150  size_type ift1 = tc1.size() / (nn*ii1*ii2);
2151  size_type ift2 = tc2.size() / (nn*ii3*ii4);
2152 
2153  base_tensor::iterator it = t.begin();
2154  for (size_type i = 0; i < ii4; ++i)
2155  for (size_type j = 0; j < ii3; ++j)
2156  for (size_type k = 0; k < ii2; ++k)
2157  for (size_type l = 0; l < ii1; ++l)
2158  for (size_type q = 0; q < ift1; ++q)
2159  for (size_type p = 0; p < ift2; ++p, ++it) {
2160  *it = scalar_type(0);
2161  size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2162  size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2163  for (size_type n = 0; n < nn; ++n)
2164  *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2165  }
2166 
2167  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2168  return 0;
2169  }
2170  ga_instruction_contract_2_1_rev(base_tensor &t_, base_tensor &tc1_,
2171  base_tensor &tc2_,
2172  size_type n_, size_type i1_, size_type i2_,
2173  size_type i3_, size_type i4_)
2174  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2175  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2176  };
2177 
2178  // Performs Amijklp Bnqjrls -> Cmnikpqrs. To be optimized
2179  struct ga_instruction_contract_2_2 : public ga_instruction {
2180  base_tensor &t, &tc1, &tc2;
2181  size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2182  bool inv_tc2;
2183  virtual int exec() {
2184  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2185 
2186  size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2187  size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2188 
2189  size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2190  if (inv_tc2) std::swap(sn1, sn2);
2191 
2192  base_tensor::iterator it = t.begin();
2193  for (size_type i = 0; i < ii6; ++i)
2194  for (size_type j = 0; j < ii5; ++j)
2195  for (size_type k = 0; k < ii4; ++k)
2196  for (size_type l = 0; l < ii3; ++l)
2197  for (size_type p = 0; p < ii2; ++p)
2198  for (size_type q = 0; q < ii1; ++q)
2199  for (size_type r = 0; r < ift2; ++r)
2200  for (size_type s = 0; s < ift1; ++s, ++it) {
2201  *it = scalar_type(0);
2202  size_type ind1
2203  = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2204  size_type ind2
2205  = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2206  for (size_type n1 = 0; n1 < nn1; ++n1)
2207  for (size_type n2 = 0; n2 < nn2; ++n2)
2208  *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2209  * tc2[ind2+n1*sn1+n2*sn2];
2210  }
2211 
2212  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2213  return 0;
2214  }
2215  ga_instruction_contract_2_2(base_tensor &t_, base_tensor &tc1_,
2216  base_tensor &tc2_,
2217  size_type n1_, size_type n2_,
2218  size_type i1_, size_type i2_, size_type i3_,
2219  size_type i4_, size_type i5_, size_type i6_,
2220  bool intc2)
2221  : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2222  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2223  inv_tc2(intc2) {}
2224  };
2225 
2226  // Performs Amijklp Bnqjrls -> Cnmikpqrs. To be optimized
2227  struct ga_instruction_contract_2_2_rev : public ga_instruction {
2228  base_tensor &t, &tc1, &tc2;
2229  size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2230  bool inv_tc2;
2231  virtual int exec() {
2232  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2233 
2234  size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2235  size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2236 
2237  size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2238  if (inv_tc2) std::swap(sn1, sn2);
2239 
2240  base_tensor::iterator it = t.begin();
2241  for (size_type i = 0; i < ii6; ++i)
2242  for (size_type j = 0; j < ii5; ++j)
2243  for (size_type k = 0; k < ii4; ++k)
2244  for (size_type l = 0; l < ii3; ++l)
2245  for (size_type p = 0; p < ii2; ++p)
2246  for (size_type q = 0; q < ii1; ++q)
2247  for (size_type s = 0; s < ift1; ++s)
2248  for (size_type r = 0; r < ift2; ++r, ++it) {
2249  *it = scalar_type(0);
2250  size_type ind1
2251  = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2252  size_type ind2
2253  = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2254  for (size_type n1 = 0; n1 < nn1; ++n1)
2255  for (size_type n2 = 0; n2 < nn2; ++n2)
2256  *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2257  * tc2[ind2+n1*sn1+n2*sn2];
2258  }
2259 
2260  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2261  return 0;
2262  }
2263  ga_instruction_contract_2_2_rev(base_tensor &t_, base_tensor &tc1_,
2264  base_tensor &tc2_,
2265  size_type n1_, size_type n2_,
2266  size_type i1_, size_type i2_, size_type i3_,
2267  size_type i4_, size_type i5_, size_type i6_,
2268  bool intc2)
2269  : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2270  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2271  inv_tc2(intc2) {}
2272  };
2273 
2274 
2275  // Performs Amj Bjk -> Cmk. To be optimized
2276  struct ga_instruction_matrix_mult : public ga_instruction {
2277  base_tensor &t, &tc1, &tc2;
2278  size_type n;
2279  virtual int exec() {
2280  GA_DEBUG_INFO("Instruction: order one contraction "
2281  "(dot product or matrix multiplication)");
2282 
2283  size_type s1 = tc1.size() / n;
2284  size_type s2 = tc2.size() / n;
2285 
2286  base_tensor::iterator it = t.begin();
2287  for (size_type k = 0; k < s2; ++k)
2288  for (size_type i = 0; i < s1; ++i, ++it) {
2289  *it = scalar_type(0);
2290  for (size_type j = 0; j < n; ++j)
2291  *it += tc1[i+j*s1] * tc2[j+k*n];
2292  }
2293  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2294  return 0;
2295  }
2296  ga_instruction_matrix_mult(base_tensor &t_, base_tensor &tc1_,
2297  base_tensor &tc2_, size_type n_)
2298  : t(t_), tc1(tc1_), tc2(tc2_), n(n_) {}
2299  };
2300 
2301  // Performs Amij Bnjk -> Cmnik. To be optimized
2302  struct ga_instruction_matrix_mult_spec : public ga_instruction {
2303  base_tensor &t, &tc1, &tc2;
2304  size_type n, m, p; // tc1 of size q*m*n, tc2 of size l*n*p
2305  // t of size q*l*m*p
2306  virtual int exec() {
2307  GA_DEBUG_INFO("Instruction: specific order one contraction "
2308  "(dot product or matrix multiplication)");
2309  size_type q = tc1.size() / (m * n);
2310  size_type l = tc2.size() / (p * n);
2311 
2312  base_tensor::iterator it = t.begin();
2313  for (size_type r = 0; r < p; ++r)
2314  for (size_type k = 0; k < m; ++k)
2315  for (size_type j = 0; j < l; ++j)
2316  for (size_type i = 0; i < q; ++i, ++it) {
2317  *it = scalar_type(0);
2318  for (size_type s = 0; s < n; ++s)
2319  *it += tc1[i+k*q+s*q*m] * tc2[j+s*l+r*l*n];
2320  }
2321  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2322  return 0;
2323  }
2324  ga_instruction_matrix_mult_spec(base_tensor &t_, base_tensor &tc1_,
2325  base_tensor &tc2_, size_type n_,
2326  size_type m_, size_type p_)
2327  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), m(m_), p(p_) {}
2328  };
2329 
2330  // Performs Amij Bnjk -> Cnmik. To be optimized
2331  struct ga_instruction_matrix_mult_spec2 : public ga_instruction {
2332  base_tensor &t, &tc1, &tc2;
2333  size_type n, m, p; // tc1 of size q*m*n, tc2 of size l*n*p
2334  // t of size l*q*m*p
2335  virtual int exec() {
2336  GA_DEBUG_INFO("Instruction: specific order one contraction "
2337  "(dot product or matrix multiplication)");
2338  size_type q = tc1.size() / (m * n);
2339  size_type l = tc2.size() / (p * n);
2340 
2341  base_tensor::iterator it = t.begin();
2342  for (size_type r = 0; r < p; ++r)
2343  for (size_type k = 0; k < m; ++k)
2344  for (size_type i = 0; i < q; ++i)
2345  for (size_type j = 0; j < l; ++j, ++it) {
2346  *it = scalar_type(0);
2347  for (size_type s = 0; s < n; ++s)
2348  *it += tc1[i+k*q+s*q*m] * tc2[j+s*l+r*l*n];
2349  }
2350  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2351  return 0;
2352  }
2353  ga_instruction_matrix_mult_spec2(base_tensor &t_, base_tensor &tc1_,
2354  base_tensor &tc2_, size_type n_,
2355  size_type m_, size_type p_)
2356  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), m(m_), p(p_) {}
2357  };
2358 
2359  // Performs Ani Bmi -> Cmn
2360  struct ga_instruction_contraction : public ga_instruction {
2361  base_tensor &t, &tc1, &tc2;
2362  size_type nn;
2363  virtual int exec() {
2364  GA_DEBUG_INFO("Instruction: contraction operation of size " << nn);
2365 #if GA_USES_BLAS
2366  long m = int(tc1.size()/nn), k = int(nn), n = int(tc2.size()/nn);
2367  long lda = m, ldb = n, ldc = m;
2368  char T = 'T', N = 'N';
2369  scalar_type alpha(1), beta(0);
2370  gmm::dgemm_(&N, &T, &m, &n, &k, &alpha, &(tc1[0]), &lda, &(tc2[0]), &ldb,
2371  &beta, &(t[0]), &ldc);
2372 #else
2373  size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2374  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2375 
2376  auto it1=tc1.begin(), it2=tc2.begin(), it2end=it2 + s2;
2377  for (auto it = t.begin(); it != t.end(); ++it) {
2378  auto it11 = it1, it22 = it2;
2379  scalar_type a = (*it11) * (*it22);
2380  for (size_type i = 1; i < nn; ++i)
2381  { it11 += s1; it22 += s2; a += (*it11) * (*it22); }
2382  *it = a;
2383  ++it2; if (it2 == it2end) { it2 = tc2.begin(), ++it1; }
2384  }
2385  // auto it = t.begin(); // Unoptimized version.
2386  // for (size_type i = 0; i < s1; ++i)
2387  // for (size_type j = 0; j < s2; ++j, ++it) {
2388  // *it = scalar_type(0);
2389  // for (size_type k = 0; k < nn; ++k)
2390  // *it += tc1[i+k*s1] * tc2[j+k*s2];
2391  // }
2392 #endif
2393  return 0;
2394  }
2395  ga_instruction_contraction(base_tensor &t_, base_tensor &tc1_,
2396  base_tensor &tc2_, size_type n_)
2397  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2398  };
2399 
2400  // Performs Ani Bmi -> Cmn
2401  struct ga_instruction_contraction_opt0_2 : public ga_instruction {
2402  base_tensor &t, &tc1, &tc2;
2403  size_type n, q;
2404  virtual int exec() {
2405  GA_DEBUG_INFO("Instruction: contraction operation of size " << n*q <<
2406  " optimized for vectorized second tensor of type 2");
2407  size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2408  size_type s1_qq = s1*q, s2_qq = s2*q;
2409  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2410 
2411  auto it = t.begin(), it1 = tc1.begin();
2412  for (size_type i = 0; i < s1; ++i, ++it1) {
2413  auto it2 = tc2.begin();
2414  for (size_type j = 0; j < s2_q; ++j) {
2415  if (j) it2+=q;
2416  auto itt1 = it1;
2417  for (size_type l = 0; l < q; ++l, ++it) {
2418  if (l) itt1 += s1;
2419  auto ittt1 = itt1, ittt2 = it2;
2420  *it = *ittt1 * (*ittt2);
2421  for (size_type m = 1; m < n; ++m) {
2422  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2423  }
2424  }
2425  }
2426  }
2427  // base_tensor u = t;
2428  // ga_instruction_contraction toto(t, tc1, tc2, n*q);
2429  // toto.exec();
2430  // GMM_ASSERT1(gmm::vect_dist2(t.as_vector(), u.as_vector()) < 1E-9, "Erroneous");
2431  return 0;
2432  }
2433  ga_instruction_contraction_opt0_2(base_tensor &t_, base_tensor &tc1_,
2434  base_tensor &tc2_, size_type n_,
2435  size_type q_)
2436  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) {}
2437  };
2438 
2439  // Performs Ani Bmi -> Cmn
2440  template <int N>
2441  struct ga_instruction_contraction_opt0_2_unrolled : public ga_instruction {
2442  base_tensor &t, &tc1, &tc2;
2443  size_type q;
2444  virtual int exec() {
2445  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N*q
2446  << " optimized for vectorized second tensor of type 2");
2447  size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2448  size_type s1_qq = s1*q, s2_qq = s2*q;
2449  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2450 
2451  auto it = t.begin(), it1 = tc1.begin();
2452  for (size_type i = 0; i < s1; ++i, ++it1) {
2453  auto it2 = tc2.begin();
2454  for (size_type j = 0; j < s2_q; ++j) {
2455  if (j) it2+=q;
2456  auto itt1 = it1;
2457  for (size_type l = 0; l < q; ++l, ++it) {
2458  if (l) itt1 += s1;
2459  auto ittt1 = itt1, ittt2 = it2;
2460  *it = *ittt1 * (*ittt2);
2461  for (size_type m = 1; m < N; ++m) {
2462  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2463  }
2464  }
2465  }
2466  }
2467  return 0;
2468  }
2469  ga_instruction_contraction_opt0_2_unrolled(base_tensor &t_, base_tensor &tc1_,
2470  base_tensor &tc2_, size_type q_)
2471  : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2472  };
2473 
2474  // Performs Ani Bmi -> Cmn
2475  template <int N, int Q>
2476  struct ga_instruction_contraction_opt0_2_dunrolled : public ga_instruction {
2477  base_tensor &t, &tc1, &tc2;
2478  virtual int exec() {
2479  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N*Q
2480  << " optimized for vectorized second tensor of type 2");
2481  size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q), s2_q = s2/Q;
2482  size_type s1_qq = s1*Q, s2_qq = s2*Q;
2483  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2484 
2485  auto it = t.begin(), it1 = tc1.begin();
2486  for (size_type i = 0; i < s1; ++i, ++it1) {
2487  auto it2 = tc2.begin();
2488  for (size_type j = 0; j < s2_q; ++j) {
2489  if (j) it2+=Q;
2490  auto itt1 = it1;
2491  for (size_type l = 0; l < Q; ++l, ++it) {
2492  if (l) itt1 += s1;
2493  auto ittt1 = itt1, ittt2 = it2;
2494  *it = *ittt1 * (*ittt2);
2495  for (size_type m = 1; m < N; ++m) {
2496  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2497  }
2498  }
2499  }
2500  }
2501  return 0;
2502  }
2503  ga_instruction_contraction_opt0_2_dunrolled
2504  (base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2505  : t(t_), tc1(tc1_), tc2(tc2_) {}
2506  };
2507 
2508  // Performs Ani Bmi -> Cmn
2509  struct ga_instruction_contraction_opt2_0 : public ga_instruction {
2510  base_tensor &t, &tc1, &tc2;
2511  size_type n, q;
2512  virtual int exec() {
2513  GA_DEBUG_INFO("Instruction: contraction operation of size " << n*q <<
2514  " optimized for vectorized second tensor of type 2");
2515  size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2516  size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2517  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2518 
2519  auto it = t.begin();
2520  for (size_type i = 0; i < s1_q; ++i) {
2521  auto it1 = tc1.begin() + i*q;
2522  for (size_type l = 0; l < q; ++l) {
2523  auto it2 = tc2.begin() + l*s2;
2524  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
2525  auto itt1 = it1, itt2 = it2;
2526  *it = *itt1 * (*itt2);
2527  for (size_type m = 1; m < n; ++m) {
2528  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2529  }
2530  }
2531  }
2532  }
2533  return 0;
2534  }
2535  ga_instruction_contraction_opt2_0(base_tensor &t_, base_tensor &tc1_,
2536  base_tensor &tc2_, size_type n_,
2537  size_type q_)
2538  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) { }
2539  };
2540 
2541  // Performs Ani Bmi -> Cmn
2542  template <int N>
2543  struct ga_instruction_contraction_opt2_0_unrolled : public ga_instruction {
2544  base_tensor &t, &tc1, &tc2;
2545  size_type q;
2546  virtual int exec() {
2547  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N*q
2548  << " optimized for vectorized second tensor of type 2");
2549  size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2550  size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2551  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2552 
2553  auto it = t.begin(), it1 = tc1.begin();
2554  for (size_type i = 0; i < s1_q; ++i, it1 += q) {
2555  for (size_type l = 0; l < q; ++l) {
2556  auto it2 = tc2.begin() + l*s2;
2557  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
2558  auto itt1 = it1, itt2 = it2;
2559  *it = *itt1 * (*itt2);
2560  for (size_type m = 1; m < N; ++m) {
2561  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2562  }
2563  }
2564  }
2565  }
2566  return 0;
2567  }
2568  ga_instruction_contraction_opt2_0_unrolled(base_tensor &t_, base_tensor &tc1_,
2569  base_tensor &tc2_, size_type q_)
2570  : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2571  };
2572 
2573  // Performs Ani Bmi -> Cmn
2574  template <int N, int Q>
2575  struct ga_instruction_contraction_opt2_0_dunrolled : public ga_instruction {
2576  base_tensor &t, &tc1, &tc2;
2577  virtual int exec() {
2578  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N*Q
2579  << " optimized for vectorized second tensor of type 2");
2580  size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q);
2581  size_type s1_q = s1/Q, s1_qq = s1*Q, s2_qq = s2*Q;
2582  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2583 
2584  auto it = t.begin(), it1 = tc1.begin();
2585  for (size_type i = 0; i < s1_q; ++i, it1 += Q) {
2586  for (size_type l = 0; l < Q; ++l) {
2587  auto it2 = tc2.begin() + l*s2;
2588  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
2589  auto itt1 = it1, itt2 = it2;
2590  *it = *itt1 * (*itt2);
2591  for (size_type m = 1; m < N; ++m) {
2592  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2593  }
2594  }
2595  }
2596  }
2597  return 0;
2598  }
2599  ga_instruction_contraction_opt2_0_dunrolled
2600  (base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2601  : t(t_), tc1(tc1_), tc2(tc2_) {}
2602  };
2603 
2604  // Performs Ani Bmi -> Cmn
2605  struct ga_instruction_contraction_opt0_1 : public ga_instruction {
2606  base_tensor &t, &tc1, &tc2;
2607  size_type nn;
2608  virtual int exec() {
2609  GA_DEBUG_INFO("Instruction: contraction operation of size " << nn <<
2610  " optimized for vectorized second tensor of type 1");
2611  size_type ss1=tc1.size(), s1 = ss1/nn, s2=tc2.size()/nn, s2_n=s2/nn;
2612 
2613  auto it = t.begin(), it1 = tc1.begin();
2614  for (size_type i = 0; i < s1; ++i, ++it1) {
2615  auto it2 = tc2.begin();
2616  for (size_type j = 0; j < s2_n; ++j) {
2617  if (j) it2 += nn;
2618  auto itt1 = it1;
2619  *it++ = (*itt1) * (*it2);
2620  for (size_type k = 1; k < nn; ++k)
2621  { itt1 += s1; *it++ = (*itt1) * (*it2); }
2622  }
2623  }
2624  return 0;
2625  }
2626  ga_instruction_contraction_opt0_1(base_tensor &t_, base_tensor &tc1_,
2627  base_tensor &tc2_, size_type n_)
2628  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2629  };
2630 
2631  template<int N> inline void reduc_elem_unrolled_opt1_
2632  (const base_vector::iterator &it, const base_vector::iterator &it1,
2633  scalar_type a, size_type s1) {
2634  it[N-1] = it1[(N-1)*s1] * a;
2635  reduc_elem_unrolled_opt1_<N-1>(it, it1, a, s1);
2636  }
2637  template<> inline void reduc_elem_unrolled_opt1_<1>
2638  (const base_vector::iterator &it, const base_vector::iterator &it1,
2639  scalar_type a, size_type /* s1 */)
2640  { *it = (*it1) * a; }
2641 
2642  // Performs Ani Bmi -> Cmn
2643  template <int N>
2644  struct ga_instruction_contraction_opt0_1_unrolled : public ga_instruction {
2645  base_tensor &t, &tc1, &tc2;
2646  virtual int exec() {
2647  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N
2648  << " optimized for vectorized second tensor of type 1");
2649  size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2650  auto it = t.begin(), it1 = tc1.begin();
2651  for (size_type i = 0; i < s1; ++i, ++it1) {
2652  auto it2 = tc2.begin(), it2e = it2 + s2;
2653  for (; it2 != it2e; it2 += N, it += N)
2654  reduc_elem_unrolled_opt1_<N>(it, it1, *it2, s1);
2655  }
2656  return 0;
2657  }
2658  ga_instruction_contraction_opt0_1_unrolled(base_tensor &t_, base_tensor &tc1_,
2659  base_tensor &tc2_)
2660  : t(t_), tc1(tc1_), tc2(tc2_) {}
2661  };
2662 
2663  // Performs Ani Bmi -> Cmn
2664  struct ga_instruction_contraction_opt1_1 : public ga_instruction {
2665  base_tensor &t, &tc1, &tc2;
2666  size_type nn;
2667  virtual int exec() {
2668  GA_DEBUG_INFO("Instruction: contraction operation of size " << nn <<
2669  " optimized for both vectorized tensor of type 1");
2670  size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_1 = s2+1;
2671  GA_DEBUG_ASSERT(t.size() == s2*s1, "Internal error");
2672  size_type ss1 = s1/nn, ss2 = s2/nn;
2673 
2674  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
2675  auto it2 = tc2.begin();
2676  for (size_type j = 0; j < ss2; ++j) {
2677  if (j) it2 += nn;
2678  auto it1 = tc1.begin(), it = t.begin() + j*nn;
2679  for (size_type i = 0; i < ss1; ++i) {
2680  if (i) { it1 += nn, it += s2*nn; }
2681  scalar_type a = (*it1) * (*it2);
2682  auto itt = it;
2683  *itt = a; itt += s2_1; *itt = a;
2684  for (size_type k = 2; k < nn; ++k) { itt += s2_1; *itt = a; }
2685  }
2686  }
2687  return 0;
2688  }
2689  ga_instruction_contraction_opt1_1(base_tensor &t_, base_tensor &tc1_,
2690  base_tensor &tc2_, size_type n_)
2691  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2692  };
2693 
2694 
2695 
2696  template<int N> inline scalar_type reduc_elem_unrolled__
2697  (base_tensor::iterator &it1, base_tensor::iterator &it2,
2698  size_type s1, size_type s2) {
2699  return (it1[(N-1)*s1])*(it2[(N-1)*s2])
2700  + reduc_elem_unrolled__<N-1>(it1, it2, s1, s2);
2701  }
2702  template<> inline scalar_type reduc_elem_unrolled__<1>
2703  (base_tensor::iterator &it1, base_tensor::iterator &it2,
2704  size_type /*s1*/, size_type /*s2*/)
2705  { return (*it1)*(*it2); }
2706 
2707  // Performs Ani Bmi -> Cmn. Unrolled operation.
2708  template<int N> struct ga_instruction_contraction_unrolled
2709  : public ga_instruction {
2710  base_tensor &t, &tc1, &tc2;
2711  virtual int exec() {
2712  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N);
2713  size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2714  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error, " << t.size()
2715  << " != " << s1 << "*" << s2);
2716  base_tensor::iterator it1=tc1.begin(), it2=tc2.begin(), it2end=it2 + s2;
2717  for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
2718  *it = reduc_elem_unrolled__<N>(it1, it2, s1, s2);
2719  ++it2; if (it2 == it2end) { it2 = tc2.begin(), ++it1; }
2720  }
2721  return 0;
2722  }
2723  ga_instruction_contraction_unrolled(base_tensor &t_, base_tensor &tc1_,
2724  base_tensor &tc2_)
2725  : t(t_), tc1(tc1_), tc2(tc2_) {}
2726  };
2727 
2728  template<int N, int S2> inline void reduc_elem_d_unrolled__
2729  (base_tensor::iterator &it, base_tensor::iterator &it1,
2730  base_tensor::iterator &it2, size_type s1, size_type s2) {
2731  *it++ = reduc_elem_unrolled__<N>(it1, it2, s1, s2);
2732  reduc_elem_d_unrolled__<N, S2-1>(it, it1, ++it2, s1, s2);
2733  }
2734  // A Repeated definition is following because partial specialization
2735  // of functions is not allowed in C++ for the moment.
2736  // The gain in assembly time is small compared to the simply unrolled version
2737  template<> inline void reduc_elem_d_unrolled__<1, 0>
2738  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2739  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2740  template<> inline void reduc_elem_d_unrolled__<2, 0>
2741  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2742  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2743  template<> inline void reduc_elem_d_unrolled__<3, 0>
2744  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2745  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2746  template<> inline void reduc_elem_d_unrolled__<4, 0>
2747  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2748  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2749  template<> inline void reduc_elem_d_unrolled__<5, 0>
2750  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2751  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2752  template<> inline void reduc_elem_d_unrolled__<6, 0>
2753  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2754  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2755  template<> inline void reduc_elem_d_unrolled__<7, 0>
2756  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2757  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2758  template<> inline void reduc_elem_d_unrolled__<8, 0>
2759  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2760  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2761  template<> inline void reduc_elem_d_unrolled__<9, 0>
2762  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2763  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2764  template<> inline void reduc_elem_d_unrolled__<10, 0>
2765  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2766  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2767  template<> inline void reduc_elem_d_unrolled__<11, 0>
2768  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2769  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2770  template<> inline void reduc_elem_d_unrolled__<12, 0>
2771  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2772  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2773  template<> inline void reduc_elem_d_unrolled__<13, 0>
2774  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2775  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2776  template<> inline void reduc_elem_d_unrolled__<14, 0>
2777  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2778  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2779  template<> inline void reduc_elem_d_unrolled__<15, 0>
2780  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2781  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2782  template<> inline void reduc_elem_d_unrolled__<16, 0>
2783  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2784  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2785 
2786  // Performs Ani Bmi -> Cmn. Automatically doubly unrolled operation
2787  // (for uniform meshes).
2788  template<int N, int S2> struct ga_ins_red_d_unrolled
2789  : public ga_instruction {
2790  base_tensor &t, &tc1, &tc2;
2791  virtual int exec() {
2792  GA_DEBUG_INFO("Instruction: doubly unrolled contraction operation of size "
2793  << S2 << "x" << N);
2794  size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2795  GA_DEBUG_ASSERT(s2 == S2, "Internal error");
2796  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error, " << t.size()
2797  << " != " << s1 << "*" << s2);
2798  base_tensor::iterator it = t.begin(), it1 = tc1.begin();
2799  for (size_type ii = 0; ii < s1; ++ii, ++it1) {
2800  base_tensor::iterator it2 = tc2.begin();
2801  reduc_elem_d_unrolled__<N, S2>(it, it1, it2, s1, s2);
2802  }
2803  GA_DEBUG_ASSERT(it == t.end(), "Internal error");
2804  return 0;
2805  }
2806  ga_ins_red_d_unrolled(base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2807  : t(t_), tc1(tc1_), tc2(tc2_) {}
2808  };
2809 
2810 
2811  pga_instruction ga_instruction_contraction_switch
2812  (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
2813  size_type n, bool &to_clear) {
2814  base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
2815 
2816  if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
2817  tc1_.qdim() == n && tc2_.qdim() == n) {
2818  to_clear = true;
2819  t_.set_sparsity(10, tc1_.qdim());
2820  return std::make_shared<ga_instruction_contraction_opt1_1>(t, tc1, tc2, n);
2821  }
2822 
2823  if (tc2_.sparsity() == 1) {
2824  switch(n) {
2825  case 2:
2826  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
2827  (t, tc1, tc2);
2828  case 3:
2829  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
2830  (t, tc1, tc2);
2831  case 4:
2832  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
2833  (t, tc1, tc2);
2834  case 5:
2835  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
2836  (t, tc1, tc2);
2837  default:
2838  return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
2839  }
2840  }
2841  if (tc2_.sparsity() == 2) {
2842  size_type q2 = tc2.sizes()[1];
2843  size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[1] : 1;
2844  if (n2*q2 == n) {
2845  switch (n2) {
2846  case 1:
2847  switch (q2) {
2848  case 2:
2849  return
2850  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
2851  (t, tc1, tc2);
2852  case 3:
2853  return
2854  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
2855  (t, tc1, tc2);
2856  case 4:
2857  return
2858  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
2859  (t, tc1, tc2);
2860  default :
2861  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
2862  (t, tc1, tc2, q2);
2863  }
2864  case 2:
2865  switch (q2) {
2866  case 2:
2867  return
2868  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
2869  (t, tc1, tc2);
2870  case 3:
2871  return
2872  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
2873  (t, tc1, tc2);
2874  case 4:
2875  return
2876  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
2877  (t, tc1, tc2);
2878  default :
2879  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
2880  (t, tc1, tc2, q2);
2881  }
2882  case 3:
2883  switch (q2) {
2884  case 2:
2885  return
2886  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
2887  (t, tc1, tc2);
2888  case 3:
2889  return
2890  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
2891  (t, tc1, tc2);
2892  case 4:
2893  return
2894  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
2895  (t, tc1, tc2);
2896  default :
2897  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
2898  (t, tc1, tc2, q2);
2899  }
2900  case 4:
2901  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
2902  (t, tc1, tc2, q2);
2903  case 5:
2904  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
2905  (t, tc1, tc2, q2);
2906  default:
2907  return std::make_shared<ga_instruction_contraction_opt0_2>
2908  (t,tc1,tc2,n2,q2);
2909  }
2910  }
2911  }
2912  if (tc1_.sparsity() == 2) {
2913  size_type q1 = tc1.sizes()[1];
2914  size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[1] : 1;
2915  if (n1*q1 == n) {
2916  switch (n1) {
2917  case 1:
2918  switch (q1) {
2919  case 2:
2920  return
2921  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
2922  (t, tc1, tc2);
2923  case 3:
2924  return
2925  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
2926  (t, tc1, tc2);
2927  case 4:
2928  return
2929  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
2930  (t, tc1, tc2);
2931  default :
2932  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
2933  (t, tc1, tc2, q1);
2934  }
2935  case 2:
2936  switch (q1) {
2937  case 2:
2938  return
2939  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
2940  (t, tc1, tc2);
2941  case 3:
2942  return
2943  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
2944  (t, tc1, tc2);
2945  case 4:
2946  return
2947  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
2948  (t, tc1, tc2);
2949  default :
2950  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
2951  (t, tc1, tc2, q1);
2952  }
2953  case 3:
2954  switch (q1) {
2955  case 2:
2956  return
2957  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
2958  (t, tc1, tc2);
2959  case 3:
2960  return
2961  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
2962  (t, tc1, tc2);
2963  case 4:
2964  return
2965  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
2966  (t, tc1, tc2);
2967  default :
2968  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
2969  (t, tc1, tc2, q1);
2970  }
2971  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
2972  (t, tc1, tc2, q1);
2973  case 4:
2974  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
2975  (t, tc1, tc2, q1);
2976  case 5:
2977  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
2978  (t, tc1, tc2, q1);
2979  default:
2980  return std::make_shared<ga_instruction_contraction_opt2_0>
2981  (t,tc1,tc2, n1, q1);
2982  }
2983  }
2984  }
2985 
2986  switch(n) {
2987  case 2 : return std::make_shared<ga_instruction_contraction_unrolled< 2>>
2988  (t, tc1, tc2);
2989  case 3 : return std::make_shared<ga_instruction_contraction_unrolled< 3>>
2990  (t, tc1, tc2);
2991  case 4 : return std::make_shared<ga_instruction_contraction_unrolled< 4>>
2992  (t, tc1, tc2);
2993  case 5 : return std::make_shared<ga_instruction_contraction_unrolled< 5>>
2994  (t, tc1, tc2);
2995  case 6 : return std::make_shared<ga_instruction_contraction_unrolled< 6>>
2996  (t, tc1, tc2);
2997  case 7 : return std::make_shared<ga_instruction_contraction_unrolled< 7>>
2998  (t, tc1, tc2);
2999  case 8 : return std::make_shared<ga_instruction_contraction_unrolled< 8>>
3000  (t, tc1, tc2);
3001  case 9 : return std::make_shared<ga_instruction_contraction_unrolled< 9>>
3002  (t, tc1, tc2);
3003  case 10 : return std::make_shared<ga_instruction_contraction_unrolled<10>>
3004  (t, tc1, tc2);
3005  case 11 : return std::make_shared<ga_instruction_contraction_unrolled<11>>
3006  (t, tc1, tc2);
3007  case 12 : return std::make_shared<ga_instruction_contraction_unrolled<12>>
3008  (t, tc1, tc2);
3009  case 13 : return std::make_shared<ga_instruction_contraction_unrolled<13>>
3010  (t, tc1, tc2);
3011  case 14 : return std::make_shared<ga_instruction_contraction_unrolled<14>>
3012  (t, tc1, tc2);
3013  case 15 : return std::make_shared<ga_instruction_contraction_unrolled<15>>
3014  (t, tc1, tc2);
3015  case 16 : return std::make_shared<ga_instruction_contraction_unrolled<16>>
3016  (t, tc1, tc2);
3017  default : return std::make_shared<ga_instruction_contraction>
3018  (t, tc1, tc2, n);
3019  }
3020  }
3021 
3022  pga_instruction ga_uniform_instruction_contraction_switch
3023  (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
3024  size_type n, bool &to_clear) {
3025  base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
3026 
3027  if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
3028  tc1_.qdim() == n && tc2_.qdim() == n) {
3029  to_clear = true;
3030  t_.set_sparsity(10, tc1_.qdim());
3031  return std::make_shared<ga_instruction_contraction_opt1_1>(t,tc1,tc2,n);
3032  }
3033  if (tc2_.sparsity() == 1) {
3034  switch(n) {
3035  case 2:
3036  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
3037  (t, tc1, tc2);
3038  case 3:
3039  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
3040  (t, tc1, tc2);
3041  case 4:
3042  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
3043  (t, tc1, tc2);
3044  case 5:
3045  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
3046  (t, tc1, tc2);
3047  default:
3048  return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
3049  }
3050  }
3051  if (tc2_.sparsity() == 2) {
3052  size_type q2 = tc2.sizes()[1];
3053  size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[1] : 1;
3054  if (n2*q2 == n) {
3055  switch (n2) {
3056  case 1:
3057  switch (q2) {
3058  case 2:
3059  return
3060  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3061  (t, tc1, tc2);
3062  case 3:
3063  return
3064  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3065  (t, tc1, tc2);
3066  case 4:
3067  return
3068  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3069  (t, tc1, tc2);
3070  default :
3071  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3072  (t, tc1, tc2, q2);
3073  }
3074  case 2:
3075  switch (q2) {
3076  case 2:
3077  return
3078  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3079  (t, tc1, tc2);
3080  case 3:
3081  return
3082  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3083  (t, tc1, tc2);
3084  case 4:
3085  return
3086  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3087  (t, tc1, tc2);
3088  default :
3089  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3090  (t, tc1, tc2, q2);
3091  }
3092  case 3:
3093  switch (q2) {
3094  case 2:
3095  return
3096  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3097  (t, tc1, tc2);
3098  case 3:
3099  return
3100  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3101  (t, tc1, tc2);
3102  case 4:
3103  return
3104  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3105  (t, tc1, tc2);
3106  default :
3107  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3108  (t, tc1, tc2, q2);
3109  }
3110  case 4:
3111  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3112  (t, tc1, tc2, q2);
3113  case 5:
3114  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3115  (t, tc1, tc2, q2);
3116  default:
3117  return std::make_shared<ga_instruction_contraction_opt0_2>
3118  (t,tc1,tc2,n2,q2);
3119  }
3120  }
3121  }
3122  if (tc1_.sparsity() == 2) {
3123  size_type q1 = tc1.sizes()[1];
3124  size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[1] : 1;
3125  if (n1*q1 == n) {
3126  switch (n1) {
3127  case 1:
3128  switch (q1) {
3129  case 2:
3130  return
3131  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3132  (t, tc1, tc2);
3133  case 3:
3134  return
3135  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3136  (t, tc1, tc2);
3137  case 4:
3138  return
3139  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3140  (t, tc1, tc2);
3141  default :
3142  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3143  (t, tc1, tc2, q1);
3144  }
3145  case 2:
3146  switch (q1) {
3147  case 2:
3148  return
3149  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3150  (t, tc1, tc2);
3151  case 3:
3152  return
3153  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3154  (t, tc1, tc2);
3155  case 4:
3156  return
3157  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3158  (t, tc1, tc2);
3159  default :
3160  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3161  (t, tc1, tc2, q1);
3162  }
3163  case 3:
3164  switch (q1) {
3165  case 2:
3166  return
3167  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3168  (t, tc1, tc2);
3169  case 3:
3170  return
3171  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3172  (t, tc1, tc2);
3173  case 4:
3174  return
3175  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3176  (t, tc1, tc2);
3177  default :
3178  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3179  (t, tc1, tc2, q1);
3180  }
3181  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3182  (t, tc1, tc2, q1);
3183  case 4:
3184  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3185  (t, tc1, tc2, q1);
3186  case 5:
3187  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3188  (t, tc1, tc2, q1);
3189  default:
3190  return std::make_shared<ga_instruction_contraction_opt2_0>
3191  (t,tc1,tc2, n1, q1);
3192  }
3193  }
3194  }
3195 
3196  // Only specialized for certain values
3197  size_type s2 = tc2.size()/n;
3198  switch(s2) {
3199  case 1 :
3200  switch(n) {
3201  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,1>>(t, tc1, tc2);
3202  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,1>>(t, tc1, tc2);
3203  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,1>>(t, tc1, tc2);
3204  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3205  }
3206  case 2 :
3207  switch(n) {
3208  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,2>>(t, tc1, tc2);
3209  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,2>>(t, tc1, tc2);
3210  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,2>>(t, tc1, tc2);
3211  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3212  }
3213  case 3 :
3214  switch(n) {
3215  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,3>>(t, tc1, tc2);
3216  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,3>>(t, tc1, tc2);
3217  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,3>>(t, tc1, tc2);
3218  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3219  }
3220  case 4 :
3221  switch(n) {
3222  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,4>>(t, tc1, tc2);
3223  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,4>>(t, tc1, tc2);
3224  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,4>>(t, tc1, tc2);
3225  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3226  }
3227  case 5 :
3228  switch(n) {
3229  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,5>>(t, tc1, tc2);
3230  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,5>>(t, tc1, tc2);
3231  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,5>>(t, tc1, tc2);
3232  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3233  }
3234  case 6 :
3235  switch(n) {
3236  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,6>>(t, tc1, tc2);
3237  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,6>>(t, tc1, tc2);
3238  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,6>>(t, tc1, tc2);
3239  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3240  }
3241  case 7 :
3242  switch(n) {
3243  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,7>>(t, tc1, tc2);
3244  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,7>>(t, tc1, tc2);
3245  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,7>>(t, tc1, tc2);
3246  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3247  }
3248  case 8 :
3249  switch(n) {
3250  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,8>>(t, tc1, tc2);
3251  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,8>>(t, tc1, tc2);
3252  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,8>>(t, tc1, tc2);
3253  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3254  }
3255  case 9 :
3256  switch(n) {
3257  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,9>>(t, tc1, tc2);
3258  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,9>>(t, tc1, tc2);
3259  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,9>>(t, tc1, tc2);
3260  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3261  }
3262  case 10:
3263  switch(n) {
3264  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,10>>(t, tc1, tc2);
3265  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,10>>(t, tc1, tc2);
3266  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,10>>(t, tc1, tc2);
3267  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3268  }
3269  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3270  }
3271  }
3272 
3273 
3274  // Performs Amij Bnj -> Cmni. To be optimized.
3275  struct ga_instruction_spec_contraction : public ga_instruction {
3276  base_tensor &t, &tc1, &tc2;
3277  size_type nn;
3278  virtual int exec() {
3279  GA_DEBUG_INFO("Instruction: specific contraction operation of "
3280  "size " << nn);
3281  size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3282  size_type s2 = tc2.sizes()[0];
3283  base_tensor::iterator it = t.begin();
3284  for (size_type i = 0; i < s11; ++i)
3285  for (size_type n = 0; n < s2; ++n)
3286  for (size_type m = 0; m < s1; ++m, ++it) {
3287  *it = scalar_type(0);
3288  for (size_type j = 0; j < nn; ++j)
3289  *it += tc1[m+i*s1+j*s111] * tc2[n+j*s2];
3290  }
3291  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3292  return 0;
3293  }
3294  ga_instruction_spec_contraction(base_tensor &t_, base_tensor &tc1_,
3295  base_tensor &tc2_, size_type n_)
3296  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3297  };
3298 
3299  // Performs Amik Bnjk -> Cmnij. To be optimized.
3300  struct ga_instruction_spec2_contraction : public ga_instruction {
3301  base_tensor &t, &tc1, &tc2;
3302  size_type nn;
3303  virtual int exec() {
3304  GA_DEBUG_INFO("Instruction: second specific contraction operation of "
3305  "size " << nn);
3306  size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3307  size_type s2 = tc2.sizes()[0], s22 = tc2.size() / (s2*nn), s222 = s2*s22;
3308  base_tensor::iterator it = t.begin();
3309  for (size_type j = 0; j < s22; ++j)
3310  for (size_type i = 0; i < s11; ++i)
3311  for (size_type m = 0; m < s1; ++m)
3312  for (size_type n = 0; n < s2; ++n, ++it) {
3313  *it = scalar_type(0);
3314  for (size_type k = 0; k < nn; ++k)
3315  *it += tc1[m+i*s1+k*s111] * tc2[n+j*s2+k*s222];
3316  }
3317  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3318  return 0;
3319  }
3320  ga_instruction_spec2_contraction(base_tensor &t_, base_tensor &tc1_,
3321  base_tensor &tc2_, size_type n_)
3322  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3323  };
3324 
3325  // Performs Aij Bkl -> Cijkl
3326  struct ga_instruction_simple_tmult : public ga_instruction {
3327  base_tensor &t, &tc1, &tc2;
3328  virtual int exec() {
3329  GA_DEBUG_INFO("Instruction: simple tensor product");
3330  size_type s1 = tc1.size();
3331  GA_DEBUG_ASSERT(t.size() == s1 * tc2.size(), "Wrong sizes");
3332  base_tensor::iterator it2=tc2.begin(), it1=tc1.begin(), it1end=it1 + s1;
3333  for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
3334  *it = *(it2) * (*it1);
3335  ++it1; if (it1 == it1end) { it1 = tc1.begin(), ++it2; }
3336  }
3337  return 0;
3338  }
3339  ga_instruction_simple_tmult(base_tensor &t_, base_tensor &tc1_,
3340  base_tensor &tc2_)
3341  : t(t_), tc1(tc1_), tc2(tc2_) {}
3342  };
3343 
3344  template<int S1> inline void tmult_elem_unrolled__
3345  (base_tensor::iterator &it, base_tensor::iterator &it1,
3346  base_tensor::iterator &it2) {
3347  *it++ = (*it1++)*(*it2);
3348  tmult_elem_unrolled__<S1-1>(it, it1, it2);
3349  }
3350  template<> inline void tmult_elem_unrolled__<0>
3351  (base_tensor::iterator &/*it*/, base_tensor::iterator &/*it1*/,
3352  base_tensor::iterator &/*it2*/) { }
3353 
3354  // Performs Aij Bkl -> Cijkl, partially unrolled version
3355  template<int S1> struct ga_instruction_simple_tmult_unrolled
3356  : public ga_instruction {
3357  base_tensor &t, &tc1, &tc2;
3358  virtual int exec() {
3359  size_type s2 = tc2.size();
3360  GA_DEBUG_INFO("Instruction: simple tensor product, unrolled with "
3361  << tc1.size() << " operations");
3362  GA_DEBUG_ASSERT(t.size() == tc1.size() * s2, "Wrong sizes");
3363  GA_DEBUG_ASSERT(tc1.size() == S1, "Wrong sizes");
3364 
3365  base_tensor::iterator it = t.begin(), it2 = tc2.begin();
3366  for (size_type ii = 0; ii < s2; ++ii, ++it2) {
3367  base_tensor::iterator it1 = tc1.begin();
3368  tmult_elem_unrolled__<S1>(it, it1, it2);
3369  }
3370  GA_DEBUG_ASSERT(it == t.end(), "Internal error");
3371  return 0;
3372  }
3373  ga_instruction_simple_tmult_unrolled(base_tensor &t_, base_tensor &tc1_,
3374  base_tensor &tc2_)
3375  : t(t_), tc1(tc1_), tc2(tc2_) {}
3376  };
3377 
3378  pga_instruction ga_uniform_instruction_simple_tmult
3379  (base_tensor &t, base_tensor &tc1, base_tensor &tc2) {
3380  switch(tc1.size()) {
3381  case 2 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 2>>
3382  (t, tc1, tc2);
3383  case 3 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 3>>
3384  (t, tc1, tc2);
3385  case 4 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 4>>
3386  (t, tc1, tc2);
3387  case 5 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 5>>
3388  (t, tc1, tc2);
3389  case 6 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 6>>
3390  (t, tc1, tc2);
3391  case 7 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 7>>
3392  (t, tc1, tc2);
3393  case 8 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 8>>
3394  (t, tc1, tc2);
3395  case 9 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 9>>
3396  (t, tc1, tc2);
3397  case 10 : return std::make_shared<ga_instruction_simple_tmult_unrolled<10>>
3398  (t, tc1, tc2);
3399  case 11 : return std::make_shared<ga_instruction_simple_tmult_unrolled<11>>
3400  (t, tc1, tc2);
3401  case 12 : return std::make_shared<ga_instruction_simple_tmult_unrolled<12>>
3402  (t, tc1, tc2);
3403  case 13 : return std::make_shared<ga_instruction_simple_tmult_unrolled<13>>
3404  (t, tc1, tc2);
3405  case 14 : return std::make_shared<ga_instruction_simple_tmult_unrolled<14>>
3406  (t, tc1, tc2);
3407  case 15 : return std::make_shared<ga_instruction_simple_tmult_unrolled<15>>
3408  (t, tc1, tc2);
3409  case 16 : return std::make_shared<ga_instruction_simple_tmult_unrolled<16>>
3410  (t, tc1, tc2);
3411  default : return std::make_shared<ga_instruction_simple_tmult>
3412  (t, tc1, tc2);
3413  }
3414  }
3415 
3416 
3417  // Performs Ami Bnj -> Cmnij. To be optimized.
3418  struct ga_instruction_spec_tmult : public ga_instruction {
3419  base_tensor &t, &tc1, &tc2;
3420  size_type s1_2, s2_2;
3421  virtual int exec() {
3422  GA_DEBUG_INFO("Instruction: specific tensor product");
3423  GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(), "Wrong sizes");
3424  size_type s1_1 = tc1.size() / s1_2;
3425  size_type s2_1 = tc2.size() / s2_2;
3426 
3427  base_tensor::iterator it = t.begin();
3428  for (size_type j = 0; j < s2_2; ++j)
3429  for (size_type i = 0; i < s1_2; ++i)
3430  for (size_type n = 0; n < s2_1; ++n)
3431  for (size_type m = 0; m < s1_1; ++m, ++it)
3432  *it = tc1[m+i*s1_1] * tc2[n+j*s2_1];
3433  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3434  return 0;
3435  }
3436  ga_instruction_spec_tmult(base_tensor &t_, base_tensor &tc1_,
3437  base_tensor &tc2_, size_type s1_2_,
3438  size_type s2_2_)
3439  : t(t_), tc1(tc1_), tc2(tc2_), s1_2(s1_2_), s2_2(s2_2_) {}
3440  };
3441 
3442  // Performs Ai Bmj -> Cmij. To be optimized.
3443  struct ga_instruction_spec2_tmult : public ga_instruction {
3444  base_tensor &t, &tc1, &tc2;
3445  virtual int exec() {
3446  GA_DEBUG_INFO("Instruction: second specific tensor product");
3447  GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(), "Wrong sizes");
3448  size_type s1 = tc1.size();
3449  size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
3450 
3451  base_tensor::iterator it = t.begin();
3452  for (size_type j = 0; j < s2_2; ++j)
3453  for (size_type i = 0; i < s1; ++i)
3454  for (size_type m = 0; m < s2_1; ++m, ++it)
3455  *it = tc1[i] * tc2[m+j*s2_1];
3456  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3457  return 0;
3458  }
3459  ga_instruction_spec2_tmult(base_tensor &t_, base_tensor &tc1_,
3460  base_tensor &tc2_)
3461  : t(t_), tc1(tc1_), tc2(tc2_) {}
3462  };
3463 
3464 
3465 
3466  struct ga_instruction_simple_c_matrix : public ga_instruction {
3467  base_tensor &t;
3468  std::vector<scalar_type *> components;
3469  virtual int exec() {
3470  GA_DEBUG_INFO("Instruction: gathering components for explicit "
3471  "matrix");
3472  GA_DEBUG_ASSERT(t.size() == components.size(), "Wrong sizes");
3473  for (size_type i = 0; i < components.size(); ++i)
3474  t[i] = *(components[i]);
3475  return 0;
3476  }
3477  ga_instruction_simple_c_matrix(base_tensor &t_,
3478  std::vector<scalar_type *> &components_)
3479  : t(t_), components(components_) {}
3480  };
3481 
3482  struct ga_instruction_c_matrix_with_tests : public ga_instruction {
3483  base_tensor &t;
3484  const std::vector<const base_tensor *> components;
3485  virtual int exec() {
3486  GA_DEBUG_INFO("Instruction: gathering components for explicit "
3487  "matrix with tests functions");
3488  size_type s = t.size() / components.size();
3489  GA_DEBUG_ASSERT(s, "Wrong sizes");
3490  base_tensor::iterator it = t.begin();
3491  for (size_type i = 0; i < components.size(); ++i) {
3492  const base_tensor &t1 = *(components[i]);
3493  if (t1.size() > 1) {
3494  GA_DEBUG_ASSERT(t1.size() == s, "Wrong sizes, " << t1.size()
3495  << " != " << s);
3496  for (size_type j = 0; j < s; ++j) *it++ = t1[j];
3497  } else {
3498  for (size_type j = 0; j < s; ++j) *it++ = t1[0];
3499  }
3500  }
3501  return 0;
3502  }
3503  ga_instruction_c_matrix_with_tests
3504  (base_tensor &t_, const std::vector<const base_tensor *> &components_)
3505  : t(t_), components(components_) {}
3506  };
3507 
3508  struct ga_instruction_eval_func_1arg_1res : public ga_instruction {
3509  scalar_type &t;
3510  const scalar_type &c;
3511  pscalar_func_onearg f1;
3512  virtual int exec() {
3513  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
3514  "predefined function on a scalar");
3515  t = (*f1)(c);
3516  return 0;
3517  }
3518  ga_instruction_eval_func_1arg_1res(scalar_type &t_, const scalar_type &c_,
3519  pscalar_func_onearg f1_)
3520  : t(t_), c(c_), f1(f1_) {}
3521  };
3522 
3523  struct ga_instruction_eval_func_1arg_1res_expr : public ga_instruction {
3524  scalar_type &t;
3525  const scalar_type &c;
3526  const ga_predef_function &F;
3527  virtual int exec() {
3528  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
3529  "predefined function on a scalar");
3530  t = F(c);
3531  return 0;
3532  }
3533  ga_instruction_eval_func_1arg_1res_expr(scalar_type &t_,
3534  const scalar_type &c_,
3535  const ga_predef_function &F_)
3536  : t(t_), c(c_), F(F_) {}
3537  };
3538 
3539  struct ga_instruction_eval_func_1arg : public ga_instruction {
3540  base_tensor &t, &tc1;
3541  pscalar_func_onearg f1;
3542  virtual int exec() {
3543  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
3544  "predefined function on tensor");
3545  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
3546  for (size_type i = 0; i < t.size(); ++i) t[i] = (*f1)(tc1[i]);
3547  return 0;
3548  }
3549  ga_instruction_eval_func_1arg(base_tensor &t_, base_tensor &c_,
3550  pscalar_func_onearg f1_)
3551  : t(t_), tc1(c_), f1(f1_) {}
3552  };
3553 
3554  struct ga_instruction_eval_func_1arg_expr : public ga_instruction {
3555  base_tensor &t, &tc1;
3556  const ga_predef_function &F;
3557  virtual int exec() {
3558  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
3559  "predefined function on tensor");
3560  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
3561  for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i]);
3562  return 0;
3563  }
3564  ga_instruction_eval_func_1arg_expr(base_tensor &t_, base_tensor &c_,
3565  const ga_predef_function &F_)
3566  : t(t_), tc1(c_), F(F_) {}
3567  };
3568 
3569  struct ga_instruction_eval_func_2arg_1res : public ga_instruction {
3570  scalar_type &t;
3571  const scalar_type &c, &d;
3572  pscalar_func_twoargs f2;
3573  virtual int exec() {
3574  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3575  "predefined function on two scalar");
3576  t = (*f2)(c, d);
3577  return 0;
3578  }
3579  ga_instruction_eval_func_2arg_1res(scalar_type &t_, const scalar_type &c_,
3580  const scalar_type &d_,
3581  pscalar_func_twoargs f2_)
3582  : t(t_), c(c_), d(d_), f2(f2_) {}
3583  };
3584 
3585  struct ga_instruction_eval_func_2arg_1res_expr : public ga_instruction {
3586  scalar_type &t;
3587  const scalar_type &c, &d;
3588  const ga_predef_function &F;
3589  virtual int exec() {
3590  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3591  "predefined function on two scalar");
3592  t = F(c, d);
3593  return 0;
3594  }
3595  ga_instruction_eval_func_2arg_1res_expr(scalar_type &t_,
3596  const scalar_type &c_,
3597  const scalar_type &d_,
3598  const ga_predef_function &F_)
3599  : t(t_), c(c_), d(d_), F(F_) {}
3600  };
3601 
3602  struct ga_instruction_eval_func_2arg_first_scalar : public ga_instruction {
3603  base_tensor &t, &tc1, &tc2;
3604  pscalar_func_twoargs f2;
3605  virtual int exec() {
3606  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3607  "predefined function on one scalar and one tensor");
3608  GA_DEBUG_ASSERT(t.size() == tc2.size(), "Wrong sizes");
3609  for (size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[0], tc2[i]);
3610  return 0;
3611  }
3612  ga_instruction_eval_func_2arg_first_scalar
3613  (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3614  pscalar_func_twoargs f2_)
3615  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3616  };
3617 
3618  struct ga_instruction_eval_func_2arg_first_scalar_expr
3619  : public ga_instruction {
3620  base_tensor &t, &tc1, &tc2;
3621  const ga_predef_function &F;
3622  virtual int exec() {
3623  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3624  "predefined function on one scalar and one tensor");
3625  GA_DEBUG_ASSERT(t.size() == tc2.size(), "Wrong sizes");
3626  for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[0], tc2[i]);
3627  return 0;
3628  }
3629  ga_instruction_eval_func_2arg_first_scalar_expr
3630  (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3631  const ga_predef_function &F_)
3632  : t(t_), tc1(c_), tc2(d_), F(F_) {}
3633  };
3634 
3635  struct ga_instruction_eval_func_2arg_second_scalar : public ga_instruction {
3636  base_tensor &t, &tc1, &tc2;
3637  pscalar_func_twoargs f2;
3638  virtual int exec() {
3639  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3640  "predefined function on one tensor and one scalar");
3641  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
3642  for (size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[i], tc2[0]);
3643  return 0;
3644  }
3645  ga_instruction_eval_func_2arg_second_scalar(base_tensor &t_,
3646  base_tensor &c_,
3647  base_tensor &d_,
3648  pscalar_func_twoargs f2_)
3649  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3650  };
3651 
3652  struct ga_instruction_eval_func_2arg_second_scalar_expr
3653  : public ga_instruction {
3654  base_tensor &t, &tc1, &tc2;
3655  const ga_predef_function &F;
3656  virtual int exec() {
3657  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3658  "predefined function on one tensor and one scalar");
3659  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
3660  for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i], tc2[0]);
3661  return 0;
3662  }
3663  ga_instruction_eval_func_2arg_second_scalar_expr
3664  (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3665  const ga_predef_function &F_)
3666  : t(t_), tc1(c_), tc2(d_), F(F_) {}
3667  };
3668 
3669  struct ga_instruction_eval_func_2arg : public ga_instruction {
3670  base_tensor &t, &tc1, &tc2;
3671  pscalar_func_twoargs f2;
3672  virtual int exec() {
3673  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3674  "predefined function on two tensors");
3675  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
3676  "Wrong sizes");
3677 
3678  for (size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[i], tc2[i]);
3679  return 0;
3680  }
3681  ga_instruction_eval_func_2arg(base_tensor &t_, base_tensor &c_,
3682  base_tensor &d_, pscalar_func_twoargs f2_)
3683  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3684  };
3685 
3686  struct ga_instruction_eval_func_2arg_expr : public ga_instruction {
3687  base_tensor &t, &tc1, &tc2;
3688  const ga_predef_function &F;
3689  virtual int exec() {
3690  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3691  "predefined function on two tensors");
3692  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
3693  "Wrong sizes");
3694 
3695  for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i], tc2[i]);
3696  return 0;
3697  }
3698  ga_instruction_eval_func_2arg_expr(base_tensor &t_, base_tensor &c_,
3699  base_tensor &d_,
3700  const ga_predef_function &F_)
3701  : t(t_), tc1(c_), tc2(d_), F(F_) {}
3702  };
3703 
3704  struct ga_instruction_eval_OP : public ga_instruction {
3705  base_tensor &t;
3706  const ga_nonlinear_operator &OP;
3707  ga_nonlinear_operator::arg_list args;
3708  virtual int exec() {
3709  GA_DEBUG_INFO("Instruction: operator evaluation");
3710  OP.value(args, t);
3711  return 0;
3712  }
3713  ga_instruction_eval_OP(base_tensor &t_, const ga_nonlinear_operator &OP_,
3714  ga_nonlinear_operator::arg_list &args_)
3715  : t(t_), OP(OP_), args(args_) {}
3716  };
3717 
3718  struct ga_instruction_eval_derivative_OP : public ga_instruction {
3719  base_tensor &t;
3720  const ga_nonlinear_operator &OP;
3721  ga_nonlinear_operator::arg_list args;
3722  size_type der1;
3723  virtual int exec() {
3724  GA_DEBUG_INFO("Instruction: operator derivative evaluation");
3725  OP.derivative(args, der1, t);
3726  return 0;
3727  }
3728  ga_instruction_eval_derivative_OP(base_tensor &t_,
3729  const ga_nonlinear_operator &OP_,
3730  ga_nonlinear_operator::arg_list &args_,
3731  size_type der1_)
3732  : t(t_), OP(OP_), args(args_), der1(der1_) {}
3733  };
3734 
3735  struct ga_instruction_eval_second_derivative_OP : public ga_instruction {
3736  base_tensor &t;
3737  const ga_nonlinear_operator &OP;
3738  ga_nonlinear_operator::arg_list args;
3739  size_type der1, der2;
3740  virtual int exec() {
3741  GA_DEBUG_INFO("Instruction: operator second derivative evaluation");
3742  OP.second_derivative(args, der1, der2, t);
3743  return 0;
3744  }
3745  ga_instruction_eval_second_derivative_OP
3746  (base_tensor &t_, const ga_nonlinear_operator &OP_,
3747  ga_nonlinear_operator::arg_list &args_, size_type der1_, size_type der2_)
3748  : t(t_), OP(OP_), args(args_), der1(der1_), der2(der2_) {}
3749  };
3750 
3751  struct ga_instruction_tensor_slice : public ga_instruction {
3752  base_tensor &t, &tc1;
3753  bgeot::multi_index mi, indices;
3754  virtual int exec() {
3755  GA_DEBUG_INFO("Instruction: tensor slice");
3756  size_type order = t.sizes().size();
3757  for (bgeot::multi_index mi3(order); !mi3.finished(t.sizes());
3758  mi3.incrementation(t.sizes())) {
3759  for (size_type j = 0; j < order; ++j)
3760  mi[indices[j]] = mi3[j];
3761  t(mi3) = tc1(mi);
3762  }
3763  return 0;
3764  }
3765  ga_instruction_tensor_slice(base_tensor &t_, base_tensor &tc1_,
3766  bgeot::multi_index &mi_,
3767  bgeot::multi_index &indices_)
3768  : t(t_), tc1(tc1_), mi(mi_), indices(indices_) {}
3769  };
3770 
3771  struct ga_instruction_transformation_call : public ga_instruction {
3772  const ga_workspace &workspace;
3773  ga_instruction_set::interpolate_info &inin;
3774  pinterpolate_transformation trans;
3775  fem_interpolation_context &ctx;
3776  const base_small_vector &Normal;
3777  const mesh &m;
3778  bool compute_der;
3779 
3780  virtual int exec() {
3781  GA_DEBUG_INFO("Instruction: call interpolate transformation");
3782  base_node P_ref;
3783  size_type cv;
3784  short_type face_num;
3785  gmm::clear(inin.Normal);
3786  inin.pt_type = trans->transform(workspace, m, ctx, Normal, &(inin.m), cv,
3787  face_num, P_ref, inin.Normal,
3788  inin.derivatives, compute_der);
3789  if (inin.pt_type) {
3790  if (cv != size_type(-1)) {
3791  inin.m->points_of_convex(cv, inin.G);
3792  inin.ctx.change((inin.m)->trans_of_convex(cv),
3793  0, P_ref, inin.G, cv, face_num);
3794  inin.has_ctx = true;
3795  if (face_num != short_type(-1)) {
3796  inin.Normal = bgeot::compute_normal(inin.ctx, face_num);
3797  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
3798  } else
3799  inin.Normal.resize(0);
3800  inin.pt_y = inin.ctx.xreal();
3801  } else {
3802  inin.ctx.invalid_convex_num();
3803  inin.pt_y = P_ref;
3804  inin.has_ctx = false;
3805  }
3806  } else {
3807  inin.ctx.invalid_convex_num();
3808  inin.Normal.resize(0);
3809  inin.pt_y.resize(0);
3810  inin.has_ctx = false;
3811  }
3812  GA_DEBUG_INFO("Instruction: end of call interpolate transformation");
3813  return 0;
3814  }
3815  ga_instruction_transformation_call
3816  (const ga_workspace &w, ga_instruction_set::interpolate_info &i,
3817  pinterpolate_transformation t, fem_interpolation_context &ctxx,
3818  const base_small_vector &No, const mesh &mm, bool compute_der_)
3819  : workspace(w), inin(i), trans(t), ctx(ctxx), Normal(No), m(mm),
3820  compute_der(compute_der_) {}
3821  };
3822 
3823  struct ga_instruction_neighbour_transformation_call : public ga_instruction {
3824  const ga_workspace &workspace;
3825  ga_instruction_set::interpolate_info &inin;
3826  pinterpolate_transformation trans;
3827  fem_interpolation_context &ctx;
3828  base_small_vector &Normal;
3829  const mesh &m;
3830  size_type &ipt;
3831  papprox_integration &pai;
3833  std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbour_corresp;
3834 
3835  virtual int exec() {
3836  bool cancel_optimization = false;
3837  GA_DEBUG_INFO("Instruction: call interpolate neighbour transformation");
3838  if (ipt == 0) {
3839  if (!(ctx.have_pgp()) || !pai || pai->is_built_on_the_fly()
3840  || cancel_optimization) {
3841  inin.ctx.invalid_convex_num();
3842  } else {
3843  // Test if the situation has already been encountered
3844  size_type cv = ctx.convex_num();
3845  short_type f = ctx.face_num();
3846  auto adj_face = m.adjacent_face(cv, f);
3847  if (adj_face.cv == size_type(-1)) {
3848  inin.ctx.invalid_convex_num();
3849  } else {
3850  gauss_pt_corresp gpc;
3851  gpc.pgt1 = m.trans_of_convex(cv);
3852  gpc.pgt2 = m.trans_of_convex(adj_face.cv);
3853  gpc.pai = pai;
3854  auto inds_pt1 = m.ind_points_of_face_of_convex(cv, f);
3855  auto inds_pt2 = m.ind_points_of_face_of_convex(adj_face.cv,
3856  adj_face.f);
3857  auto str1 = gpc.pgt1->structure();
3858  auto str2 = gpc.pgt2->structure();
3859  size_type nbptf1 = str1->nb_points_of_face(f);
3860  size_type nbptf2 = str2->nb_points_of_face(adj_face.f);
3861  gpc.nodes.resize(nbptf1*2);
3862  for (size_type i = 0; i < nbptf1; ++i) {
3863  gpc.nodes[2*i] = str1->ind_points_of_face(f)[i];
3864  bool found = false;
3865  for (size_type j = 0; j < nbptf2; ++j) {
3866  if (inds_pt2[j] == inds_pt1[i]) {
3867  gpc.nodes[2*i+1] = str2->ind_points_of_face(adj_face.f)[j];
3868  found = true;
3869  break;
3870  }
3871  }
3872  GMM_ASSERT1(found, "Internal error");
3873  }
3874  bgeot::pstored_point_tab pspt = 0;
3875  auto itm = neighbour_corresp.find(gpc);
3876  if (itm != neighbour_corresp.end()) {
3877  pspt = itm->second;
3878  } else {
3879  size_type nbpt = pai->nb_points_on_face(f);
3881  gic.init(m.points_of_convex(adj_face.cv), gpc.pgt2);
3882  size_type first_ind = pai->ind_first_point_on_face(f);
3884  &spt = *(pai->pintegration_points());
3885  base_matrix G;
3886  m.points_of_convex(cv, G);
3887  fem_interpolation_context ctx_x(gpc.pgt1, 0, spt[0], G, cv, f);
3888  std::vector<base_node> P_ref(nbpt);
3889 
3890  for (size_type i = 0; i < nbpt; ++i) {
3891  ctx_x.set_xref(spt[first_ind+i]);
3892  bool converged = true;
3893  gic.invert(ctx_x.xreal(), P_ref[i], converged);
3894  bool is_in = (gpc.pgt2->convex_ref()->is_in(P_ref[i]) < 1E-4);
3895  GMM_ASSERT1(is_in && converged,"Geometric transformation "
3896  "inversion has failed in neighbour transformation");
3897  }
3898  pspt = store_point_tab(P_ref);
3899  neighbour_corresp[gpc] = pspt;
3900  }
3901  m.points_of_convex(adj_face.cv, inin.G);
3902  bgeot::pgeotrans_precomp pgp = gp_pool(gpc.pgt2, pspt);
3903  inin.ctx.change(pgp, 0, 0, inin.G, adj_face.cv, adj_face.f);
3904  }
3905  }
3906  }
3907 
3908  if (inin.ctx.have_pgp()) {
3909  inin.ctx.set_ii(ipt);
3910  inin.pt_type = 1;
3911  inin.has_ctx = true;
3912  inin.pt_y = inin.ctx.xreal();
3913  inin.Normal = bgeot::compute_normal(inin.ctx, inin.ctx.face_num());
3914  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
3915  inin.m = &m;
3916  } else {
3917  base_node P_ref;
3918  size_type cv;
3919  short_type face_num;
3920  gmm::clear(inin.Normal);
3921  inin.pt_type = trans->transform(workspace, m, ctx, Normal, &(inin.m),
3922  cv, face_num, P_ref, inin.Normal,
3923  inin.derivatives, false);
3924  if (inin.pt_type) {
3925  if (cv != size_type(-1)) {
3926  inin.m->points_of_convex(cv, inin.G);
3927  inin.ctx.change((inin.m)->trans_of_convex(cv),
3928  0, P_ref, inin.G, cv, face_num);
3929  inin.has_ctx = true;
3930  if (face_num != short_type(-1)) {
3931  inin.Normal = bgeot::compute_normal(inin.ctx, face_num);
3932  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
3933  } else
3934  inin.Normal.resize(0);
3935  inin.pt_y = inin.ctx.xreal();
3936  } else {
3937  inin.ctx.invalid_convex_num();
3938  inin.pt_y = P_ref;
3939  inin.has_ctx = false;
3940  }
3941  } else {
3942  inin.ctx.invalid_convex_num();
3943  inin.Normal.resize(0);
3944  inin.pt_y.resize(0);
3945  inin.has_ctx = false;
3946  }
3947  }
3948  GA_DEBUG_INFO("Instruction: end of call neighbour interpolate "
3949  "transformation");
3950  return 0;
3951  }
3952  ga_instruction_neighbour_transformation_call
3953  (const ga_workspace &w, ga_instruction_set::interpolate_info &i,
3954  pinterpolate_transformation t, fem_interpolation_context &ctxx,
3955  base_small_vector &No, const mesh &mm, size_type &ipt_,
3956  papprox_integration &pai_, bgeot::geotrans_precomp_pool &gp_pool_,
3957  std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbour_corresp_)
3958  : workspace(w), inin(i), trans(t), ctx(ctxx), Normal(No), m(mm),
3959  ipt(ipt_), pai(pai_), gp_pool(gp_pool_),
3960  neighbour_corresp(neighbour_corresp_) {}
3961  };
3962 
3963 
3964  struct ga_instruction_scalar_assembly : public ga_instruction {
3965  base_tensor &t;
3966  scalar_type &E, &coeff;
3967  virtual int exec() {
3968  GA_DEBUG_INFO("Instruction: scalar term assembly");
3969  E += t[0] * coeff;
3970  return 0;
3971  }
3972  ga_instruction_scalar_assembly(base_tensor &t_, scalar_type &E_,
3973  scalar_type &coeff_)
3974  : t(t_), E(E_), coeff(coeff_) {}
3975  };
3976 
3977  struct ga_instruction_fem_vector_assembly : public ga_instruction {
3978  base_tensor &t;
3979  base_vector &Vr, &Vn;
3980  const fem_interpolation_context &ctx;
3981  const gmm::sub_interval &Ir, &In;
3982  const mesh_fem *mfn, **mfg;
3983  scalar_type &coeff;
3984  const size_type &nbpt, &ipt;
3985  base_vector elem;
3986  bool interpolate;
3987  virtual int exec() {
3988  GA_DEBUG_INFO("Instruction: vector term assembly for fem variable");
3989  bool empty_weight = (coeff == scalar_type(0));
3990  if (ipt == 0 || interpolate) {
3991  if (empty_weight) elem.resize(0);
3992  elem.resize(t.size());
3993  if (!empty_weight) {
3994  auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
3995  size_type nd = ((t.size()) >> 2);
3996  for (size_type i = 0; i < nd; ++i) {
3997  *it++ = (*itt++) * coeff; *it++ = (*itt++) * coeff;
3998  *it++ = (*itt++) * coeff; *it++ = (*itt++) * coeff;
3999  }
4000  for (; it != ite;) *it++ = (*itt++) * coeff;
4001  }
4002  } else if (!empty_weight) {
4003  auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
4004  size_type nd = ((t.size()) >> 2);
4005  for (size_type i = 0; i < nd; ++i) {
4006  *it++ += (*itt++) * coeff; *it++ += (*itt++) * coeff;
4007  *it++ += (*itt++) * coeff; *it++ += (*itt++) * coeff;
4008  }
4009  for (; it != ite;) *it++ += (*itt++) * coeff;
4010  // gmm::add(gmm::scaled(t.as_vector(), coeff), elem);
4011  }
4012  if (ipt == nbpt-1 || interpolate) {
4013  const mesh_fem &mf = *(mfg ? *mfg : mfn);
4014  GMM_ASSERT1(mfg ? *mfg : mfn, "Internal error");
4015  const gmm::sub_interval &I = mf.is_reduced() ? Ir : In;
4016  base_vector &V = mf.is_reduced() ? Vr : Vn;
4017  if (!(ctx.is_convex_num_valid())) return 0;
4018  size_type cv_1 = ctx.convex_num();
4019  // size_type cv_1 = ctx.is_convex_num_valid()
4020  // ? ctx.convex_num() : mf.convex_index().first_true();
4021  GA_DEBUG_ASSERT(V.size() >= I.first() + mf.nb_basic_dof(),
4022  "Bad assembly vector size");
4023  auto &ct = mf.ind_scalar_basic_dof_of_element(cv_1);
4024  size_type qmult = mf.get_qdim();
4025  if (qmult > 1) qmult /= mf.fem_of_element(cv_1)->target_dim();
4026  size_type ifirst = I.first();
4027  auto ite = elem.begin();
4028  for (auto itc = ct.begin(); itc != ct.end(); ++itc)
4029  for (size_type q = 0; q < qmult; ++q)
4030  V[ifirst+(*itc)+q] += *ite++;
4031  GMM_ASSERT1(ite == elem.end(), "Internal error");
4032  }
4033  return 0;
4034  }
4035  ga_instruction_fem_vector_assembly
4036  (base_tensor &t_, base_vector &Vr_, base_vector &Vn_,
4037  const fem_interpolation_context &ctx_,
4038  const gmm::sub_interval &Ir_, const gmm::sub_interval &In_,
4039  const mesh_fem *mfn_, const mesh_fem **mfg_,
4040  scalar_type &coeff_,
4041  const size_type &nbpt_, const size_type &ipt_, bool interpolate_)
4042  : t(t_), Vr(Vr_), Vn(Vn_), ctx(ctx_), Ir(Ir_), In(In_), mfn(mfn_),
4043  mfg(mfg_), coeff(coeff_), nbpt(nbpt_), ipt(ipt_),
4044  interpolate(interpolate_) {}
4045  };
4046 
4047  struct ga_instruction_vector_assembly : public ga_instruction {
4048  base_tensor &t;
4049  base_vector &V;
4050  const gmm::sub_interval &I;
4051  scalar_type &coeff;
4052  virtual int exec() {
4053  GA_DEBUG_INFO("Instruction: vector term assembly for "
4054  "fixed size variable");
4055  gmm::add(gmm::scaled(t.as_vector(), coeff), gmm::sub_vector(V, I));
4056  return 0;
4057  }
4058  ga_instruction_vector_assembly(base_tensor &t_, base_vector &V_,
4059  const gmm::sub_interval &I_,
4060  scalar_type &coeff_)
4061  : t(t_), V(V_), I(I_), coeff(coeff_) {}
4062  };
4063 
4064  struct ga_instruction_assignment : public ga_instruction {
4065  base_tensor &t;
4066  base_vector &V;
4067  const fem_interpolation_context &ctx;
4068  const im_data *imd;
4069  virtual int exec() {
4070  GA_DEBUG_INFO("Instruction: Assignement to im_data");
4071  imd->set_tensor(V, ctx.convex_num(), ctx.ii(), t);
4072  return 0;
4073  }
4074  ga_instruction_assignment(base_tensor &t_, base_vector &V_,
4075  const fem_interpolation_context &ctx_,
4076  const im_data *imd_)
4077  : t(t_), V(V_), ctx(ctx_), imd(imd_) {}
4078  };
4079 
4080  template <class MAT>
4081  inline void add_elem_matrix_
4082  (MAT &K, const std::vector<size_type> &dofs1,
4083  const std::vector<size_type> &dofs2, std::vector<size_type> &/*dofs1_sort*/,
4084  base_vector &elem, scalar_type threshold, size_type /* N */) {
4085  base_vector::const_iterator it = elem.cbegin();
4086  for (const size_type &dof2 : dofs2)
4087  for (const size_type &dof1 : dofs1) {
4088  if (gmm::abs(*it) > threshold)
4089  K(dof1, dof2) += *it;
4090  ++it;
4091  }
4092  }
4093 
4094  // static const std::vector<size_type> *the_indto_sort;
4095  // int compare_my_indices(const void *a, const void *b) {
4096  // size_type aa = *((const size_type *)(a));
4097  // size_type bb = *((const size_type *)(b));
4098  // return int((*the_indto_sort)[aa]) - int((*the_indto_sort)[bb]);
4099  // }
4100 
4101  inline void add_elem_matrix_
4102  (gmm::col_matrix<gmm::rsvector<scalar_type>> &K,
4103  const std::vector<size_type> &dofs1, const std::vector<size_type> &dofs2,
4104  std::vector<size_type> &dofs1_sort,
4105  base_vector &elem, scalar_type threshold, size_type N) {
4106  size_type maxest = (N+1) * std::max(dofs1.size(), dofs2.size());
4107  size_type s1 = dofs1.size(), s2 = dofs2.size();
4108  gmm::elt_rsvector_<scalar_type> ev;
4109 
4110  dofs1_sort.resize(s1);
4111  for (size_type i = 0; i < s1; ++i) { // insertion sort
4112  size_type j = i, k = j-1;
4113  while (j > 0 && dofs1[i] < dofs1[dofs1_sort[k]])
4114  { dofs1_sort[j] = dofs1_sort[k]; j--; k--; }
4115  dofs1_sort[j] = i;
4116  }
4117 
4118  // dofs1_sort.resize(s1); // test with qsort: not faster in the tested cases
4119  // for (size_type i = 0; i < s1; ++i) dofs1_sort[i] = i;
4120  // the_indto_sort = &dofs1;
4121  // qsort(&(dofs1_sort[0]), s1, sizeof(size_type), compare_my_indices);
4122 
4123  base_vector::const_iterator it = elem.cbegin();
4124  for (size_type j = 0; j < s2; ++j) { // Iteration on columns
4125  if (j) it += s1;
4126  std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dofs2[j]];
4127  size_type nb = col.size();
4128 
4129  if (nb == 0) {
4130  col.reserve(maxest);
4131  for (size_type i = 0; i < s1; ++i) {
4132  size_type k = dofs1_sort[i]; ev.e = *(it+k);
4133  if (gmm::abs(ev.e) > threshold) { ev.c=dofs1[k]; col.push_back(ev); }
4134  }
4135  } else { // column merge
4136  size_type ind = 0;
4137  for (size_type i = 0; i < s1; ++i) {
4138  size_type k = dofs1_sort[i]; ev.e = *(it+k);
4139  if (gmm::abs(ev.e) > threshold) {
4140  ev.c = dofs1[k];
4141 
4142  size_type count = nb - ind, step, l;
4143  while (count > 0) {
4144  step = count / 2; l = ind + step;
4145  if (col[l].c < ev.c) { ind = ++l; count -= step + 1; }
4146  else count = step;
4147  }
4148 
4149  auto itc = col.begin() + ind;
4150  if (ind != nb && itc->c == ev.c) itc->e += ev.e;
4151  else {
4152  if (nb - ind > 1300)
4153  GMM_WARNING2("Inefficient addition of element in rsvector with "
4154  << col.size() - ind << " non-zero entries");
4155  col.push_back(ev);
4156  if (ind != nb) {
4157  itc = col.begin() + ind;
4158  auto ite = col.end(); --ite; auto itee = ite;
4159  for (; ite != itc; --ite) { --itee; *ite = *itee; }
4160  *itc = ev;
4161  }
4162  ++nb;
4163  }
4164  ++ind;
4165  }
4166  }
4167  }
4168  }
4169  }
4170 
4171 
4172  template <class MAT = model_real_sparse_matrix>
4173  struct ga_instruction_matrix_assembly : public ga_instruction {
4174  const base_tensor &t;
4175  MAT &Kr, &Kn;
4176  const fem_interpolation_context &ctx1, &ctx2;
4177  const gmm::sub_interval &Ir1, &Ir2;
4178  const gmm::sub_interval &In1, &In2;
4179  const mesh_fem *mfn1, *mfn2;
4180  const mesh_fem **mfg1, **mfg2;
4181  const scalar_type &coeff, &alpha1, &alpha2;
4182  const size_type &nbpt, &ipt;
4183  base_vector elem;
4184  bool interpolate;
4185  std::vector<size_type> dofs1, dofs2, dofs1_sort;
4186  virtual int exec() {
4187  GA_DEBUG_INFO("Instruction: matrix term assembly");
4188  bool empty_weight = (coeff == scalar_type(0));
4189  if (ipt == 0 || interpolate) {
4190  if (empty_weight) elem.resize(0);
4191  elem.resize(t.size());
4192  if (!empty_weight) {
4193  auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
4194  scalar_type e = coeff*alpha1*alpha2;
4195  size_type nd = ((t.size()) >> 2);
4196  for (size_type i = 0; i < nd; ++i) {
4197  *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4198  *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4199  }
4200  for (; it != ite;) *it++ = (*itt++) * e;
4201  }
4202  } else if (!empty_weight){
4203  // Faster than a daxpy blas call on my config
4204  auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
4205  scalar_type e = coeff*alpha1*alpha2;
4206  size_type nd = ((t.size()) >> 2);
4207  for (size_type i = 0; i < nd; ++i) {
4208  *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4209  *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4210  }
4211  for (; it != ite;) *it++ += (*itt++) * e;
4212  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4213  }
4214  if (ipt == nbpt-1 || interpolate) {
4215  const mesh_fem *pmf1 = mfg1 ? *mfg1 : mfn1;
4216  const mesh_fem *pmf2 = mfg2 ? *mfg2 : mfn2;
4217  bool reduced = (pmf1 && pmf1->is_reduced())
4218  || (pmf2 && pmf2->is_reduced());
4219  MAT &K = reduced ? Kr : Kn;
4220  const gmm::sub_interval &I1 = reduced ? Ir1 : In1;
4221  const gmm::sub_interval &I2 = reduced ? Ir2 : In2;
4222  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
4223 
4224  scalar_type ninf = gmm::vect_norminf(elem);
4225  if (ninf == scalar_type(0)) return 0;
4226 
4227  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4228  size_type cv1 = pmf1 ? ctx1.convex_num() : s1;
4229  size_type cv2 = pmf2 ? ctx2.convex_num() : s2;
4230  size_type N = 1;
4231 
4232  dofs1.assign(s1, I1.first());
4233  if (pmf1) {
4234  if (!(ctx1.is_convex_num_valid())) return 0;
4235  N = ctx1.N();
4236  auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
4237  size_type qmult1 = pmf1->get_qdim();
4238  if (qmult1 > 1) qmult1 /= pmf1->fem_of_element(cv1)->target_dim();
4239  auto itd = dofs1.begin();
4240  if (qmult1 == 1) {
4241  for (auto itt = ct1.begin(); itt != ct1.end(); ++itt)
4242  *itd++ += *itt;
4243  } else {
4244  for (auto itt = ct1.begin(); itt != ct1.end(); ++itt)
4245  for (size_type q = 0; q < qmult1; ++q)
4246  *itd++ += *itt + q;
4247  }
4248  } else
4249  for (size_type i=0; i < s1; ++i) dofs1[i] += i;
4250 
4251  if (pmf1 == pmf2 && cv1 == cv2) {
4252  if (I1.first() == I2.first()) {
4253  add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4254  } else {
4255  dofs2.resize(dofs1.size());
4256  for (size_type i = 0; i < dofs1.size(); ++i)
4257  dofs2[i] = dofs1[i] + I2.first() - I1.first();
4258  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4259  }
4260  } else {
4261  dofs2.assign(s2, I2.first());
4262  if (pmf2) {
4263  if (!(ctx2.is_convex_num_valid())) return 0;
4264  N = std::max(N, ctx2.N());
4265  auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
4266  size_type qmult2 = pmf2->get_qdim();
4267  if (qmult2 > 1) qmult2 /= pmf2->fem_of_element(cv2)->target_dim();
4268  auto itd = dofs2.begin();
4269  if (qmult2 == 1) {
4270  for (auto itt = ct2.begin(); itt != ct2.end(); ++itt)
4271  *itd++ += *itt;
4272  } else {
4273  for (auto itt = ct2.begin(); itt != ct2.end(); ++itt)
4274  for (size_type q = 0; q < qmult2; ++q)
4275  *itd++ += *itt + q;
4276  }
4277  } else
4278  for (size_type i=0; i < s2; ++i) dofs2[i] += i;
4279 
4280  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4281  }
4282  }
4283  return 0;
4284  }
4285  ga_instruction_matrix_assembly
4286  (const base_tensor &t_, MAT &Kr_, MAT &Kn_,
4287  const fem_interpolation_context &ctx1_,
4288  const fem_interpolation_context &ctx2_,
4289  const gmm::sub_interval &Ir1_, const gmm::sub_interval &In1_,
4290  const gmm::sub_interval &Ir2_, const gmm::sub_interval &In2_,
4291  const mesh_fem *mfn1_, const mesh_fem **mfg1_,
4292  const mesh_fem *mfn2_, const mesh_fem **mfg2_,
4293  const scalar_type &coeff_,
4294  const scalar_type &alpha2_, const scalar_type &alpha1_,
4295  const size_type &nbpt_, const size_type &ipt_, bool interpolate_)
4296  : t(t_), Kr(Kr_), Kn(Kn_), ctx1(ctx1_), ctx2(ctx2_),
4297  Ir1(Ir1_), Ir2(Ir2_), In1(In1_), In2(In2_),
4298  mfn1(mfn1_), mfn2(mfn2_), mfg1(mfg1_), mfg2(mfg2_),
4299  coeff(coeff_), alpha1(alpha1_), alpha2(alpha2_),
4300  nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_),
4301  dofs1(0), dofs2(0) {}
4302  };
4303 
4304  template <class MAT = model_real_sparse_matrix>
4305  struct ga_instruction_matrix_assembly_standard_scalar: public ga_instruction {
4306  const base_tensor &t;
4307  MAT &K;
4308  const fem_interpolation_context &ctx1, &ctx2;
4309  const gmm::sub_interval &I1, &I2;
4310  const mesh_fem *pmf1, *pmf2;
4311  const scalar_type &coeff, &alpha1, &alpha2;
4312  const size_type &nbpt, &ipt;
4313  base_vector elem;
4314  std::vector<size_type> dofs1, dofs2, dofs1_sort;
4315  virtual int exec() {
4316  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
4317  "scalar fems");
4318  if (ipt == 0) {
4319  elem.resize(t.size());
4320  auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
4321  scalar_type e = coeff*alpha1*alpha2;
4322  size_type nd = ((t.size()) >> 2);
4323  for (size_type i = 0; i < nd; ++i) {
4324  *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4325  *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4326  }
4327  for (; it != ite;) *it++ = (*itt++) * e;
4328  // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4329  } else {
4330  // Faster than a daxpy blas call on my config
4331  auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
4332  scalar_type e = coeff*alpha1*alpha2;
4333  size_type nd = ((t.size()) >> 2);
4334  for (size_type i = 0; i < nd; ++i) {
4335  *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4336  *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4337  }
4338  for (; it != ite;) *it++ += (*itt++) * e;
4339  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4340  }
4341  if (ipt == nbpt-1) {
4342  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
4343 
4344  scalar_type ninf = gmm::vect_norminf(elem);
4345  if (ninf == scalar_type(0)) return 0;
4346 
4347  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num(), N=ctx1.N();
4348  if (cv1 == size_type(-1)) return 0;
4349  auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
4350  GA_DEBUG_ASSERT(ct1.size() == t.sizes()[0], "Internal error");
4351  dofs1.resize(ct1.size());
4352  for (size_type i = 0; i < ct1.size(); ++i)
4353  dofs1[i] = ct1[i] + I1.first();
4354 
4355  if (pmf2 == pmf1 && cv1 == cv2) {
4356  if (I1.first() == I2.first()) {
4357  add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4358  } else {
4359  dofs2.resize(dofs1.size());
4360  for (size_type i = 0; i < dofs1.size(); ++i)
4361  dofs2[i] = dofs1[i] + I2.first() - I1.first();
4362  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4363  }
4364  } else {
4365  if (cv2 == size_type(-1)) return 0;
4366  auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
4367  GA_DEBUG_ASSERT(ct2.size() == t.sizes()[1], "Internal error");
4368  dofs2.resize(ct2.size());
4369  for (size_type i = 0; i < ct2.size(); ++i)
4370  dofs2[i] = ct2[i] + I2.first();
4371  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4372  }
4373  }
4374  return 0;
4375  }
4376  ga_instruction_matrix_assembly_standard_scalar
4377  (const base_tensor &t_, MAT &Kn_,
4378  const fem_interpolation_context &ctx1_,
4379  const fem_interpolation_context &ctx2_,
4380  const gmm::sub_interval &In1_, const gmm::sub_interval &In2_,
4381  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
4382  const scalar_type &coeff_, const scalar_type &alpha2_,
4383  const scalar_type &alpha1_,
4384  const size_type &nbpt_, const size_type &ipt_)
4385  : t(t_), K(Kn_), ctx1(ctx1_), ctx2(ctx2_),
4386  I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_),
4387  coeff(coeff_), alpha1(alpha1_), alpha2(alpha2_),
4388  nbpt(nbpt_), ipt(ipt_) {}
4389  };
4390 
4391  template <class MAT = model_real_sparse_matrix>
4392  struct ga_instruction_matrix_assembly_standard_vector: public ga_instruction {
4393  const base_tensor &t;
4394  MAT &K;
4395  const fem_interpolation_context &ctx1, &ctx2;
4396  const gmm::sub_interval &I1, &I2;
4397  const mesh_fem *pmf1, *pmf2;
4398  const scalar_type &coeff, &alpha1, &alpha2;
4399  const size_type &nbpt, &ipt;
4400  mutable base_vector elem;
4401  mutable std::vector<size_type> dofs1, dofs2, dofs1_sort;
4402  virtual int exec() {
4403  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
4404  "vector fems");
4405  if (ipt == 0) {
4406  elem.resize(t.size());
4407  auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
4408  scalar_type e = coeff*alpha1*alpha2;
4409  size_type nd = ((t.size()) >> 3);
4410  for (size_type i = 0; i < nd; ++i) {
4411  *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4412  *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4413  *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4414  *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4415  }
4416  for (; it != ite;) *it++ = (*itt++) * e;
4417  // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4418  } else {
4419  // (Far) faster than a daxpy blas call on my config.
4420  auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
4421  scalar_type e = coeff*alpha1*alpha2;
4422  size_type nd = ((t.size()) >> 3);
4423  for (size_type i = 0; i < nd; ++i) {
4424  *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4425  *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4426  *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4427  *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4428  }
4429  for (; it != ite;) *it++ += (*itt++) * e;
4430  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4431  }
4432  if (ipt == nbpt-1) {
4433  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
4434 
4435  scalar_type ninf = gmm::vect_norminf(elem);
4436  if (ninf == scalar_type(0)) return 0;
4437  size_type s1 = t.sizes()[0], s2 = t.sizes()[1], N = ctx1.N();
4438 
4439  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4440  if (cv1 == size_type(-1)) return 0;
4441  auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
4442  size_type qmult1 = pmf1->get_qdim();
4443  if (qmult1 > 1) qmult1 /= pmf1->fem_of_element(cv1)->target_dim();
4444  dofs1.assign(s1, I1.first());
4445  auto itd = dofs1.begin();
4446  for (auto itt = ct1.begin(); itt != ct1.end(); ++itt)
4447  for (size_type q = 0; q < qmult1; ++q)
4448  *itd++ += *itt + q;
4449 
4450  if (pmf2 == pmf1 && cv1 == cv2) {
4451  if (I1.first() == I2.first()) {
4452  add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4453  } else {
4454  dofs2.resize(dofs1.size());
4455  for (size_type i = 0; i < dofs1.size(); ++i)
4456  dofs2[i] = dofs1[i] + I2.first() - I1.first();
4457  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4458  }
4459  } else {
4460  if (cv2 == size_type(-1)) return 0;
4461  auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
4462  size_type qmult2 = pmf2->get_qdim();
4463  if (qmult2 > 1) qmult2 /= pmf2->fem_of_element(cv2)->target_dim();
4464  dofs2.assign(s2, I2.first());
4465  itd = dofs2.begin();
4466  for (auto itt = ct2.begin(); itt != ct2.end(); ++itt)
4467  for (size_type q = 0; q < qmult2; ++q)
4468  *itd++ += *itt + q;
4469 
4470  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4471  }
4472  }
4473  return 0;
4474  }
4475  ga_instruction_matrix_assembly_standard_vector
4476  (const base_tensor &t_, MAT &Kn_,
4477  const fem_interpolation_context &ctx1_,
4478  const fem_interpolation_context &ctx2_,
4479  const gmm::sub_interval &In1_, const gmm::sub_interval &In2_,
4480  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
4481  const scalar_type &coeff_, const scalar_type &alpha2_,
4482  const scalar_type &alpha1_, const size_type &nbpt_,
4483  const size_type &ipt_)
4484  : t(t_), K(Kn_), ctx1(ctx1_), ctx2(ctx2_),
4485  I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_),
4486  coeff(coeff_), alpha1(alpha1_), alpha2(alpha2_),
4487  nbpt(nbpt_), ipt(ipt_), dofs1(0), dofs2(0) {}
4488  };
4489 
4490  struct ga_instruction_matrix_assembly_standard_vector_opt10_2
4491  : public ga_instruction {
4492  const base_tensor &t;
4493  model_real_sparse_matrix &K;
4494  const fem_interpolation_context &ctx1, &ctx2;
4495  const gmm::sub_interval &I1, &I2;
4496  const mesh_fem *pmf1, *pmf2;
4497  const scalar_type &coeff, &alpha1, &alpha2;
4498  const size_type &nbpt, &ipt;
4499  mutable base_vector elem;
4500  mutable std::vector<size_type> dofs1, dofs2, dofs1_sort;
4501  virtual int exec() {
4502  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
4503  "vector fems optimized for format 10 qdim 2");
4504  size_type s1 = t.sizes()[0], s2 = t.sizes()[1], s1_q = 2*s1;
4505  size_type ss1 = s1/2, ss2 = s2/2;
4506  scalar_type e = coeff*alpha1*alpha2;
4507  if (ipt == 0) {
4508  elem.resize(ss1*ss2);
4509  auto itel = elem.begin();
4510  for (size_type j = 0; j < ss2; ++j) {
4511  auto it = t.begin() + j*s1_q;
4512  for (size_type i = 0; i < ss1; ++i, it += 2)
4513  *itel++ = (*it) * e;
4514  }
4515  } else {
4516  auto itel = elem.begin();
4517  for (size_type j = 0; j < ss2; ++j) {
4518  auto it = t.begin() + j*s1_q;
4519  for (size_type i = 0; i < ss1; ++i, it += 2)
4520  *itel++ += (*it) * e;
4521  }
4522  }
4523  if (ipt == nbpt-1) {
4524  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
4525 
4526  scalar_type ninf = gmm::vect_norminf(elem) * 1E-14;
4527  if (ninf == scalar_type(0)) return 0;
4528  size_type N = ctx1.N();
4529  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4530  size_type i1 = I1.first(), i2 = I2.first();
4531  if (cv1 == size_type(-1)) return 0;
4532  auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
4533  dofs1.resize(ss1);
4534  for (size_type i = 0; i < ss1; ++i) dofs1[i] = i1 + ct1[i];
4535 
4536  if (pmf2 == pmf1 && cv1 == cv2) {
4537  if (i1 == i2) {
4538  add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf, N);
4539  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4540  add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf, N);
4541  } else {
4542  dofs2.resize(ss2);
4543  for (size_type i = 0; i < ss2; ++i) dofs2[i] = i2 + ct1[i];
4544  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4545  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4546  for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
4547  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4548  }
4549  } else {
4550  if (cv2 == size_type(-1)) return 0;
4551  auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
4552  dofs2.resize(ss2);
4553  for (size_type i = 0; i < ss2; ++i) dofs2[i] = i2 + ct2[i];
4554  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4555  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4556  for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
4557  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4558  }
4559  }
4560  return 0;
4561  }
4562  ga_instruction_matrix_assembly_standard_vector_opt10_2
4563  (const base_tensor &t_, model_real_sparse_matrix &Kn_,
4564  const fem_interpolation_context &ctx1_,
4565  const fem_interpolation_context &ctx2_,
4566  const gmm::sub_interval &In1_, const gmm::sub_interval &In2_,
4567  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
4568  const scalar_type &coeff_, const scalar_type &alpha2_,
4569  const scalar_type &alpha1_, const size_type &nbpt_,
4570  const size_type &ipt_)
4571  : t(t_), K(Kn_), ctx1(ctx1_), ctx2(ctx2_),
4572  I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_),
4573  coeff(coeff_), alpha1(alpha1_), alpha2(alpha2_),
4574  nbpt(nbpt_), ipt(ipt_), dofs1(0), dofs2(0) {}
4575  };
4576 
4577  struct ga_instruction_matrix_assembly_standard_vector_opt10_3
4578  : public ga_instruction {
4579  const base_tensor &t;
4580  model_real_sparse_matrix &K;
4581  const fem_interpolation_context &ctx1, &ctx2;
4582  const gmm::sub_interval &I1, &I2;
4583  const mesh_fem *pmf1, *pmf2;
4584  const scalar_type &coeff, &alpha1, &alpha2;
4585  const size_type &nbpt, &ipt;
4586  mutable base_vector elem;
4587  mutable std::vector<size_type> dofs1, dofs2, dofs1_sort;
4588  virtual int exec() {
4589  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
4590  "vector fems optimized for format 10 qdim 3");
4591  size_type s1 = t.sizes()[0], s2 = t.sizes()[1], s1_q = 3*s1;
4592  size_type ss1 = s1/3, ss2 = s2/3;
4593  scalar_type e = coeff*alpha1*alpha2;
4594  if (ipt == 0) {
4595  elem.resize(ss1*ss2);
4596  auto itel = elem.begin();
4597  for (size_type j = 0; j < ss2; ++j) {
4598  auto it = t.begin() + j*s1_q;
4599  for (size_type i = 0; i < ss1; ++i, it += 3)
4600  *itel++ = (*it) * e;
4601  }
4602  } else {
4603  auto itel = elem.begin();
4604  for (size_type j = 0; j < ss2; ++j) {
4605  auto it = t.begin() + j*s1_q;
4606  for (size_type i = 0; i < ss1; ++i, it += 3)
4607  *itel++ += (*it) * e;
4608  }
4609  }
4610  if (ipt == nbpt-1) {
4611  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
4612 
4613  scalar_type ninf = gmm::vect_norminf(elem)*1E-14;
4614  if (ninf == scalar_type(0)) return 0;
4615  size_type N = ctx1.N();
4616  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4617  size_type i1 = I1.first(), i2 = I2.first();
4618  if (cv1 == size_type(-1)) return 0;
4619  auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
4620  dofs1.resize(ss1);
4621  for (size_type i = 0; i < ss1; ++i) dofs1[i] = i1 + ct1[i];
4622 
4623  if (pmf2 == pmf1 && cv1 == cv2) {
4624  if (i1 == i2) {
4625  add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf, N);
4626  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4627  add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf, N);
4628  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4629  add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf, N);
4630  } else {
4631  dofs2.resize(ss2);
4632  for (size_type i = 0; i < ss2; ++i) dofs2[i] = i2 + ct1[i];
4633  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4634  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4635  for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
4636  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4637  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4638  for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
4639  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4640  }
4641  } else {
4642  if (cv2 == size_type(-1)) return 0;
4643  auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
4644  dofs2.resize(ss2);
4645  for (size_type i = 0; i < ss2; ++i) dofs2[i] = i2 + ct2[i];
4646  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4647  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4648  for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
4649  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4650  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4651  for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
4652  add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4653  }
4654  }
4655  return 0;
4656  }
4657  ga_instruction_matrix_assembly_standard_vector_opt10_3
4658  (const base_tensor &t_, model_real_sparse_matrix &Kn_,
4659  const fem_interpolation_context &ctx1_,
4660  const fem_interpolation_context &ctx2_,
4661  const gmm::sub_interval &In1_, const gmm::sub_interval &In2_,
4662  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
4663  const scalar_type &coeff_, const scalar_type &alpha2_,
4664  const scalar_type &alpha1_, const size_type &nbpt_,
4665  const size_type &ipt_)
4666  : t(t_), K(Kn_), ctx1(ctx1_), ctx2(ctx2_),
4667  I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_),
4668  coeff(coeff_), alpha1(alpha1_), alpha2(alpha2_),
4669  nbpt(nbpt_), ipt(ipt_), dofs1(0), dofs2(0) {}
4670  };
4671 
4672 
4673 
4674 
4675 
4676 
4677  //=========================================================================
4678  // Compilation of assembly trees into a list of basic instructions
4679  //=========================================================================
4680 
4681  static void add_interval_to_gis(const ga_workspace &workspace,
4682  const std::string &varname,
4683  ga_instruction_set &gis) {
4684  if (workspace.variable_group_exists(varname)) {
4685  for (const std::string &v : workspace.variable_group(varname))
4686  add_interval_to_gis(workspace, v, gis);
4687  } else {
4688  if (gis.var_intervals.find(varname) == gis.var_intervals.end()) {
4689  const mesh_fem *mf = workspace.associated_mf(varname);
4690  size_type nd = mf ? mf->nb_basic_dof() :
4691  gmm::vect_size(workspace.value(varname));
4692  gis.var_intervals[varname]=gmm::sub_interval(gis.nb_dof, nd);
4693  gis.nb_dof += nd;
4694  }
4695  gis.max_dof = std::max(gis.max_dof,
4696  workspace.interval_of_variable(varname).last());
4697  }
4698  }
4699 
4700  static void extend_variable_in_gis(const ga_workspace &workspace,
4701  const std::string &varname,
4702  ga_instruction_set &gis) {
4703  if (workspace.variable_group_exists(varname)) {
4704  for (const std::string &v : workspace.variable_group(varname))
4705  extend_variable_in_gis(workspace, v, gis);
4706  } else if (gis.extended_vars.find(varname)==gis.extended_vars.end()) {
4707  const mesh_fem *mf = workspace.associated_mf(varname);
4708  if (mf->is_reduced()) {
4709  auto n = (mf->get_qdim() == 1) ? workspace.qdim(varname) : 1;
4710  base_vector U(mf->nb_basic_dof() * n);
4711  mf->extend_vector(workspace.value(varname), U);
4712  gis.really_extended_vars[varname] = U;
4713  gis.extended_vars[varname] = &(gis.really_extended_vars[varname]);
4714  } else {
4715  gis.extended_vars[varname] = &(workspace.value(varname));
4716  }
4717  }
4718  }
4719 
4720  static void ga_clear_node_list
4721  (pga_tree_node pnode, std::map<scalar_type,
4722  std::list<pga_tree_node> > &node_list) {
4723  std::list<pga_tree_node> &loc_node_list = node_list[pnode->hash_value];
4724  for (std::list<pga_tree_node>::iterator it = loc_node_list.begin();
4725  it != loc_node_list.end(); ) {
4726  if (*it == pnode) it = loc_node_list.erase(it); else ++it;
4727  }
4728  for (size_type i = 0; i < pnode->children.size(); ++i)
4729  ga_clear_node_list(pnode->children[i], node_list);
4730  }
4731 
4732  static void ga_compile_node(const pga_tree_node pnode,
4733  const ga_workspace &workspace,
4734  ga_instruction_set &gis,
4735  ga_instruction_set::region_mim_instructions &rmi,
4736  const mesh &m, bool function_case,
4737  ga_if_hierarchy &if_hierarchy) {
4738 
4739  if (pnode->node_type == GA_NODE_PREDEF_FUNC ||
4740  pnode->node_type == GA_NODE_OPERATOR ||
4741  pnode->node_type == GA_NODE_SPEC_FUNC ||
4742  pnode->node_type == GA_NODE_CONSTANT ||
4743  pnode->node_type == GA_NODE_ALLINDICES ||
4744  pnode->node_type == GA_NODE_RESHAPE ||
4745  pnode->node_type == GA_NODE_SWAP_IND ||
4746  pnode->node_type == GA_NODE_IND_MOVE_LAST ||
4747  pnode->node_type == GA_NODE_CONTRACT) return;
4748 
4749  // cout << "compiling "; ga_print_node(pnode, cout); cout << endl;
4750 
4751  pga_instruction pgai;
4752  ga_if_hierarchy *pif_hierarchy = &if_hierarchy;
4753  ga_if_hierarchy new_if_hierarchy;
4754 
4755  const mesh_fem *mf1 = 0, *mf2 = 0;
4756  const mesh_fem **mfg1 = 0, **mfg2 = 0;
4757  fem_interpolation_context *pctx1 = 0, *pctx2 = 0;
4758  bool tensor_to_clear = false;
4759  bool tensor_to_adapt = false;
4760 
4761  if (pnode->test_function_type) {
4762  if (pnode->name_test1.size())
4763  mf1 = workspace.associated_mf(pnode->name_test1);
4764  if (mf1) {
4765  pctx1 = &(gis.ctx);
4766  const std::string &intn1 = pnode->interpolate_name_test1;
4767  if (intn1.size()) {
4768  tensor_to_adapt = true;
4769  pctx1 = &(rmi.interpolate_infos[intn1].ctx);
4770  if (workspace.variable_group_exists(pnode->name_test1)) {
4771  ga_instruction_set::variable_group_info &vgi =
4772  rmi.interpolate_infos[intn1].groups_info[pnode->name_test1];
4773  mfg1 = &(vgi.mf);
4774  mf1 = 0;
4775  }
4776  }
4777  }
4778  if (pnode->name_test2.size())
4779  mf2 = workspace.associated_mf(pnode->name_test2);
4780  if (mf2) {
4781  pctx2 = &(gis.ctx);
4782  const std::string &intn2 = pnode->interpolate_name_test2;
4783  if (intn2.size()) {
4784  tensor_to_adapt = true;
4785  pctx2 = &(rmi.interpolate_infos[intn2].ctx);
4786  if (workspace.variable_group_exists(pnode->name_test2)) {
4787  ga_instruction_set::variable_group_info &vgi =
4788  rmi.interpolate_infos[intn2].groups_info[pnode->name_test2];
4789  mfg2 = &(vgi.mf);
4790  mf2 = 0;
4791  }
4792  }
4793  }
4794  }
4795 
4796  // Produce a resize instruction which is stored if no equivalent node is
4797  // detected and if the mesh is not uniform.
4798  pnode->t.set_to_original(); pnode->t.set_sparsity(0, 0);
4799  bool is_uniform = false;
4800  if (pnode->test_function_type == 1) {
4801  if (mf1 || mfg1)
4802  pgai = std::make_shared<ga_instruction_first_ind_tensor>
4803  (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
4804  if (mf1 && mf1->is_uniform())
4805  { is_uniform = true; pctx1->invalid_convex_num(); }
4806  } else if (pnode->test_function_type == 2) {
4807  if (mf2 || mfg2)
4808  pgai = std::make_shared<ga_instruction_first_ind_tensor>
4809  (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
4810  if (mf2 && mf2->is_uniform())
4811  { is_uniform = true; pctx2->invalid_convex_num(); }
4812  } else if (pnode->test_function_type == 3) {
4813  if ((mf1 || mfg1) && (mf2 || mfg2)) {
4814  pgai = std::make_shared<ga_instruction_two_first_ind_tensor>
4815  (pnode->tensor(), *pctx1, *pctx2, pnode->qdim1, mf1, mfg1,
4816  pnode->qdim2, mf2, mfg2);
4817  if (mf1 && mf1->is_uniform() && mf2 && mf2->is_uniform()) {
4818  is_uniform = true;
4819  pctx1->invalid_convex_num();
4820  pctx2->invalid_convex_num();
4821  }
4822  } else if (mf1 || mfg1) {
4823  pgai = std::make_shared<ga_instruction_first_ind_tensor>
4824  (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
4825  if (mf1 && mf1->is_uniform())
4826  { is_uniform = true; pctx1->invalid_convex_num(); }
4827  } else if (mf2 || mfg2) {
4828  pgai = std::make_shared<ga_instruction_second_ind_tensor>
4829  (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
4830  if (mf2 && mf2->is_uniform())
4831  { is_uniform = true; pctx2->invalid_convex_num(); }
4832  }
4833  }
4834 
4835  // Optimization: detects if an equivalent node has already been compiled
4836  pnode->t.set_to_original();
4837  if (rmi.node_list.find(pnode->hash_value) != rmi.node_list.end()) {
4838  std::list<pga_tree_node> &node_list = rmi.node_list[pnode->hash_value];
4839  for (std::list<pga_tree_node>::iterator it = node_list.begin();
4840  it != node_list.end(); ++it) {
4841  // cout << "found potential equivalent nodes ";
4842  // ga_print_node(pnode, cout);
4843  // cout << " and "; ga_print_node(*it, cout); cout << endl;
4844  if (sub_tree_are_equal(pnode, *it, workspace, 1)) {
4845  pnode->t.set_to_copy((*it)->t);
4846  return;
4847  }
4848  if (sub_tree_are_equal(pnode, *it, workspace, 2)) {
4849  // cout << "confirmed with transpose" << endl;
4850  if (pnode->nb_test_functions() == 2) {
4851  if (pgai) { // resize instruction if needed
4852  if (is_uniform)
4853  { pgai->exec(); }
4854  else { rmi.instructions.push_back(std::move(pgai)); }
4855  }
4856  pgai = std::make_shared<ga_instruction_transpose_test>
4857  (pnode->tensor(), (*it)->tensor());
4858  rmi.instructions.push_back(std::move(pgai));
4859  GMM_ASSERT1(false,
4860  "No use of X is allowed in scalar functions");
4861  } else {
4862  pnode->t.set_to_copy((*it)->t);
4863  }
4864  return;
4865  }
4866  std::stringstream ss;
4867  ss << "Detected wrong equivalent nodes: ";
4868  ga_print_node(pnode, ss);
4869  ss << " and "; ga_print_node(*it, ss);
4870  ss << " (no problem, but hash code would be adapted) " << endl;
4871  GMM_TRACE2(ss.str());
4872  }
4873  }
4874 
4875  if (pgai) { // resize instruction if needed and no equivalent node detected
4876  if (is_uniform) { pgai->exec(); }
4877  else {
4878  if (tensor_to_adapt)
4879  rmi.instructions.push_back(std::move(pgai));
4880  else
4881  rmi.elt_instructions.push_back(std::move(pgai));
4882  }
4883  }
4884 
4885  size_type interpolate_filter_inst = rmi.instructions.size();
4886  if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
4887  pgai = pga_instruction();
4888  rmi.instructions.push_back(std::move(pgai));
4889  if_hierarchy.increment();
4890  new_if_hierarchy.child_of(if_hierarchy);
4891  pif_hierarchy = &new_if_hierarchy;
4892  }
4893 
4894  for (size_type i = 0; i < pnode->children.size(); ++i)
4895  ga_compile_node(pnode->children[i], workspace, gis, rmi, m,
4896  function_case, *pif_hierarchy);
4897 
4898  if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
4899  const std::string &intn = pnode->interpolate_name;
4900  ga_instruction_set::interpolate_info &inin = rmi.interpolate_infos[intn];
4901  pgai = std::make_shared<ga_instruction_interpolate_filter>
4902  (pnode->tensor(), inin, pnode->nbc1,
4903  int(rmi.instructions.size() - interpolate_filter_inst));
4904  rmi.instructions[interpolate_filter_inst].swap(pgai);
4905  pgai = std::make_shared<ga_instruction_copy_tensor>
4906  (pnode->tensor(), pnode->children[0]->tensor());
4907  rmi.instructions.push_back(std::move(pgai));
4908  ga_clear_node_list(pnode->children[0], rmi.node_list);
4909  }
4910 
4911  static scalar_type minus = -scalar_type(1);
4912  size_type nbch = pnode->children.size();
4913  pga_tree_node child0 = (nbch > 0) ? pnode->children[0] : 0;
4914  pga_tree_node child1 = (nbch > 1) ? pnode->children[1] : 0;
4915  bgeot::multi_index mi;
4916  const bgeot::multi_index &size0 = child0 ? child0->t.sizes() : mi;
4917  // const bgeot::multi_index &size1 = child1 ? child1->t.sizes() : mi;
4918  size_type dim0 = child0 ? child0->tensor_order() : 0;
4919  size_type dim1 = child1 ? child1->tensor_order() : 0;
4920 
4921  switch (pnode->node_type) {
4922 
4923  case GA_NODE_PREDEF_FUNC: case GA_NODE_OPERATOR: case GA_NODE_SPEC_FUNC:
4924  case GA_NODE_CONSTANT: case GA_NODE_ALLINDICES: case GA_NODE_ZERO:
4925  case GA_NODE_RESHAPE: case GA_NODE_SWAP_IND: case GA_NODE_IND_MOVE_LAST:
4926  case GA_NODE_CONTRACT: case GA_NODE_INTERPOLATE_FILTER:
4927  break;
4928 
4929  case GA_NODE_X:
4930  GMM_ASSERT1(!function_case,
4931  "No use of X is allowed in scalar functions");
4932  if (pnode->nbc1) {
4933  GA_DEBUG_ASSERT(pnode->tensor().size() == 1, "dimensions mismatch");
4934  GMM_ASSERT1(pnode->nbc1 <= m.dim(),
4935  "Bad index for X in expression");
4936  pgai = std::make_shared<ga_instruction_X_component>
4937  (pnode->tensor()[0], gis.ctx, pnode->nbc1-1);
4938  } else {
4939  if (pnode->tensor().size() != m.dim())
4940  pnode->init_vector_tensor(m.dim());
4941  pgai = std::make_shared<ga_instruction_X>(pnode->tensor(), gis.ctx);
4942  }
4943  rmi.instructions.push_back(std::move(pgai));
4944  break;
4945 
4946  case GA_NODE_ELT_SIZE:
4947  GMM_ASSERT1(!function_case,
4948  "No use of element_size is allowed in functions");
4949  if (pnode->tensor().size() != 1) pnode->init_scalar_tensor(0);
4950  pgai = std::make_shared<ga_instruction_element_size>
4951  (pnode->tensor(), gis.elt_size);
4952  gis.need_elt_size = true;
4953  rmi.instructions.push_back(std::move(pgai));
4954  break;
4955 
4956  case GA_NODE_ELT_K:
4957  GMM_ASSERT1(!function_case,
4958  "No use of element_K is allowed in functions");
4959  pgai = std::make_shared<ga_instruction_element_K>(pnode->tensor(),
4960  gis.ctx);
4961  rmi.instructions.push_back(std::move(pgai));
4962  break;
4963 
4964  case GA_NODE_ELT_B:
4965  GMM_ASSERT1(!function_case,
4966  "No use of element_B is allowed in functions");
4967  pgai = std::make_shared<ga_instruction_element_B>(pnode->tensor(),
4968  gis.ctx);
4969  rmi.instructions.push_back(std::move(pgai));
4970  break;
4971 
4972  case GA_NODE_NORMAL:
4973  {
4974  GMM_ASSERT1(!function_case,
4975  "No use of Normal is allowed in functions");
4976  if (pnode->tensor().size() != m.dim())
4977  pnode->init_vector_tensor(m.dim());
4978  const mesh_im_level_set *mimls
4979  = dynamic_cast<const mesh_im_level_set *>(rmi.im);
4980  if (mimls && mimls->location()==mesh_im_level_set::INTEGRATE_BOUNDARY) {
4981  // Appel avec ctx (pt de Gauss)
4982  pgai = std::make_shared<ga_instruction_level_set_normal_vector>
4983  (pnode->tensor(), mimls, gis.ctx);
4984  rmi.instructions.push_back(std::move(pgai));
4985  } else {
4986  pgai = std::make_shared<ga_instruction_copy_Normal>
4987  (pnode->tensor(), gis.Normal);
4988  rmi.instructions.push_back(std::move(pgai));
4989  }
4990  }
4991  break;
4992 
4993  case GA_NODE_INTERPOLATE_X:
4994  case GA_NODE_INTERPOLATE_NORMAL:
4995  GMM_ASSERT1(!function_case,
4996  "No use of Interpolate is allowed in functions");
4997  if (pnode->tensor().size() != m.dim())
4998  pnode->init_vector_tensor(m.dim());
4999  if (pnode->node_type == GA_NODE_INTERPOLATE_X)
5000  pgai = std::make_shared<ga_instruction_copy_small_vect>
5001  (pnode->tensor(),
5002  rmi.interpolate_infos[pnode->interpolate_name].pt_y);
5003  else if (pnode->node_type == GA_NODE_INTERPOLATE_NORMAL)
5004  pgai = std::make_shared<ga_instruction_copy_Normal>
5005  (pnode->tensor(),
5006  rmi.interpolate_infos[pnode->interpolate_name].Normal);
5007  rmi.instructions.push_back(std::move(pgai));
5008  break;
5009 
5010  case GA_NODE_VAL: case GA_NODE_GRAD:
5011  case GA_NODE_HESS: case GA_NODE_DIVERG:
5012  case GA_NODE_ELEMENTARY_VAL: case GA_NODE_ELEMENTARY_GRAD:
5013  case GA_NODE_ELEMENTARY_HESS: case GA_NODE_ELEMENTARY_DIVERG:
5014  case GA_NODE_XFEM_PLUS_VAL: case GA_NODE_XFEM_PLUS_GRAD:
5015  case GA_NODE_XFEM_PLUS_HESS: case GA_NODE_XFEM_PLUS_DIVERG:
5016  case GA_NODE_XFEM_MINUS_VAL: case GA_NODE_XFEM_MINUS_GRAD:
5017  case GA_NODE_XFEM_MINUS_HESS: case GA_NODE_XFEM_MINUS_DIVERG:
5018  if (function_case) {
5019  GMM_ASSERT1(pnode->node_type != GA_NODE_ELEMENTARY_VAL &&
5020  pnode->node_type != GA_NODE_ELEMENTARY_GRAD &&
5021  pnode->node_type != GA_NODE_ELEMENTARY_HESS &&
5022  pnode->node_type != GA_NODE_ELEMENTARY_DIVERG,
5023  "No elementary transformation is allowed in functions");
5024  GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_PLUS_VAL &&
5025  pnode->node_type != GA_NODE_XFEM_PLUS_GRAD &&
5026  pnode->node_type != GA_NODE_XFEM_PLUS_HESS &&
5027  pnode->node_type != GA_NODE_XFEM_PLUS_DIVERG,
5028  "Xfem_plus not allowed in functions");
5029  GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_MINUS_VAL &&
5030  pnode->node_type != GA_NODE_XFEM_MINUS_GRAD &&
5031  pnode->node_type != GA_NODE_XFEM_MINUS_HESS &&
5032  pnode->node_type != GA_NODE_XFEM_MINUS_DIVERG,
5033  "Xfem_plus not allowed in functions");
5034  const mesh_fem *mf = workspace.associated_mf(pnode->name);
5035  const im_data *imd = workspace.associated_im_data(pnode->name);
5036  GMM_ASSERT1(!mf,"No fem expression is allowed in function expression");
5037  GMM_ASSERT1(!imd, "No integration method data is allowed in "
5038  "function expression");
5039  if (gmm::vect_size(workspace.value(pnode->name)) == 1)
5040  pgai = std::make_shared<ga_instruction_copy_scalar>
5041  (pnode->tensor()[0], (workspace.value(pnode->name))[0]);
5042  else
5043  pgai = std::make_shared<ga_instruction_copy_vect>
5044  (pnode->tensor().as_vector(), workspace.value(pnode->name));
5045  rmi.instructions.push_back(std::move(pgai));
5046  } else {
5047  const mesh_fem *mf = workspace.associated_mf(pnode->name);
5048  const im_data *imd = workspace.associated_im_data(pnode->name);
5049 
5050  if (imd) {
5051  pgai = std::make_shared<ga_instruction_extract_local_im_data>
5052  (pnode->tensor(), *imd, workspace.value(pnode->name),
5053  gis.pai, gis.ctx, workspace.qdim(pnode->name));
5054  rmi.instructions.push_back(std::move(pgai));
5055  } else {
5056  GMM_ASSERT1(mf, "Internal error");
5057 
5058  GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
5059  "The finite element of variable " << pnode->name <<
5060  " has to be defined on the same mesh than the "
5061  "integration method or interpolation used");
5062 
5063  // An instruction for extracting local dofs of the variable.
5064  if (rmi.local_dofs.count(pnode->name) == 0) {
5065  rmi.local_dofs[pnode->name] = base_vector(1);
5066  extend_variable_in_gis(workspace, pnode->name, gis);
5067  // cout << "local dof of " << pnode->name << endl;
5068  size_type qmult2 = mf->get_qdim();
5069  if (qmult2 > 1 && !(mf->is_uniformly_vectorized()))
5070  qmult2 = size_type(-1);
5071  pgai = std::make_shared<ga_instruction_slice_local_dofs>
5072  (*mf, *(gis.extended_vars[pnode->name]), gis.ctx,
5073  rmi.local_dofs[pnode->name],
5074  workspace.qdim(pnode->name) / mf->get_qdim(), qmult2);
5075  rmi.elt_instructions.push_back(std::move(pgai));
5076  }
5077 
5078  // An instruction for pfp update
5079  if (rmi.pfps.count(mf) == 0) {
5080  rmi.pfps[mf] = 0;
5081  pgai = std::make_shared<ga_instruction_update_pfp>
5082  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
5083  if (mf->is_uniform())
5084  rmi.begin_instructions.push_back(std::move(pgai));
5085  else
5086  rmi.instructions.push_back(std::move(pgai));
5087  }
5088 
5089  // An instruction for the base value
5090  pgai = pga_instruction();
5091  switch (pnode->node_type) {
5092  case GA_NODE_VAL: case GA_NODE_ELEMENTARY_VAL:
5093  if (rmi.base.count(mf) == 0 ||
5094  !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
5095  rmi.base_hierarchy[mf].push_back(if_hierarchy);
5096  pgai = std::make_shared<ga_instruction_val_base>
5097  (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5098  }
5099  break;
5100  case GA_NODE_XFEM_PLUS_VAL:
5101  if (rmi.xfem_plus_base.count(mf) == 0 ||
5102  !(if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))) {
5103  rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
5104  pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
5105  (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5106  }
5107  break;
5108  case GA_NODE_XFEM_MINUS_VAL:
5109  if (rmi.xfem_minus_base.count(mf) == 0 ||
5110  !(if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))) {
5111  rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
5112  pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
5113  (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5114  }
5115  break;
5116  case GA_NODE_GRAD: case GA_NODE_DIVERG:
5117  case GA_NODE_ELEMENTARY_GRAD: case GA_NODE_ELEMENTARY_DIVERG:
5118  if (rmi.grad.count(mf) == 0 ||
5119  !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
5120  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
5121  pgai = std::make_shared<ga_instruction_grad_base>
5122  (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5123  }
5124  break;
5125  case GA_NODE_XFEM_PLUS_GRAD: case GA_NODE_XFEM_PLUS_DIVERG:
5126  if (rmi.xfem_plus_grad.count(mf) == 0 ||
5127  !(if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))) {
5128  rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
5129  pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
5130  (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5131  }
5132  break;
5133  case GA_NODE_XFEM_MINUS_GRAD: case GA_NODE_XFEM_MINUS_DIVERG:
5134  if (rmi.xfem_minus_grad.count(mf) == 0 ||
5135  !(if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))) {
5136  rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
5137  pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
5138  (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5139  }
5140  break;
5141  case GA_NODE_HESS: case GA_NODE_ELEMENTARY_HESS:
5142  if (rmi.hess.count(mf) == 0 ||
5143  !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
5144  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
5145  pgai = std::make_shared<ga_instruction_hess_base>
5146  (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5147  }
5148  break;
5149  case GA_NODE_XFEM_PLUS_HESS:
5150  if (rmi.xfem_plus_hess.count(mf) == 0 ||
5151  !(if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))) {
5152  rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
5153  pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
5154  (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5155  }
5156  break;
5157  case GA_NODE_XFEM_MINUS_HESS:
5158  if (rmi.xfem_minus_hess.count(mf) == 0 ||
5159  !(if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))) {
5160  rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
5161  pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
5162  (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5163  }
5164  break;
5165 
5166  default : GMM_ASSERT1(false, "Internal error");
5167  }
5168  if (pgai) rmi.instructions.push_back(std::move(pgai));
5169 
5170  // The eval instruction
5171  switch (pnode->node_type) {
5172  case GA_NODE_VAL: // --> t(target_dim*Qmult)
5173  pgai = std::make_shared<ga_instruction_val>
5174  (pnode->tensor(), rmi.base[mf], rmi.local_dofs[pnode->name],
5175  workspace.qdim(pnode->name));
5176  break;
5177  case GA_NODE_GRAD: // --> t(target_dim*Qmult,N)
5178  pgai = std::make_shared<ga_instruction_grad>
5179  (pnode->tensor(), rmi.grad[mf],
5180  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5181  break;
5182  case GA_NODE_HESS: // --> t(target_dim*Qmult,N,N)
5183  pgai = std::make_shared<ga_instruction_hess>
5184  (pnode->tensor(), rmi.hess[mf],
5185  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5186  break;
5187  case GA_NODE_DIVERG: // --> t(1)
5188  pgai = std::make_shared<ga_instruction_diverg>
5189  (pnode->tensor(), rmi.grad[mf],
5190  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5191  break;
5192  case GA_NODE_XFEM_PLUS_VAL: // --> t(target_dim*Qmult)
5193  pgai = std::make_shared<ga_instruction_val>
5194  (pnode->tensor(), rmi.xfem_plus_base[mf],
5195  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5196  break;
5197  case GA_NODE_XFEM_PLUS_GRAD: // --> t(target_dim*Qmult,N)
5198  pgai = std::make_shared<ga_instruction_grad>
5199  (pnode->tensor(), rmi.xfem_plus_grad[mf],
5200  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5201  break;
5202  case GA_NODE_XFEM_PLUS_HESS: // --> t(target_dim*Qmult,N,N)
5203  pgai = std::make_shared<ga_instruction_hess>
5204  (pnode->tensor(), rmi.xfem_plus_hess[mf],
5205  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5206  break;
5207  case GA_NODE_XFEM_PLUS_DIVERG: // --> t(1)
5208  pgai = std::make_shared<ga_instruction_diverg>
5209  (pnode->tensor(), rmi.xfem_plus_grad[mf],
5210  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5211  break;
5212  case GA_NODE_XFEM_MINUS_VAL: // --> t(target_dim*Qmult)
5213  pgai = std::make_shared<ga_instruction_val>
5214  (pnode->tensor(), rmi.xfem_minus_base[mf],
5215  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5216  break;
5217  case GA_NODE_XFEM_MINUS_GRAD: // --> t(target_dim*Qmult,N)
5218  pgai = std::make_shared<ga_instruction_grad>
5219  (pnode->tensor(), rmi.xfem_minus_grad[mf],
5220  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5221  break;
5222  case GA_NODE_XFEM_MINUS_HESS: // --> t(target_dim*Qmult,N,N)
5223  pgai = std::make_shared<ga_instruction_hess>
5224  (pnode->tensor(), rmi.xfem_minus_hess[mf],
5225  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5226  break;
5227  case GA_NODE_XFEM_MINUS_DIVERG: // --> t(1)
5228  pgai = std::make_shared<ga_instruction_diverg>
5229  (pnode->tensor(), rmi.xfem_minus_grad[mf],
5230  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5231  break;
5232  case GA_NODE_ELEMENTARY_VAL:
5233  { // --> t(target_dim*Qmult)
5234  ga_instruction_set::elementary_trans_info &eti
5235  = rmi.elementary_trans_infos[pnode->elementary_name];
5236  pgai =
5237  std::make_shared<ga_instruction_elementary_transformation_val>
5238  (pnode->tensor(), rmi.base[mf],
5239  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name),
5240  workspace.elementary_transformation(pnode->elementary_name),
5241  *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5242  }
5243  break;
5244  case GA_NODE_ELEMENTARY_GRAD:
5245  { // --> t(target_dim*Qmult,N)
5246  ga_instruction_set::elementary_trans_info &eti
5247  = rmi.elementary_trans_infos[pnode->elementary_name];
5248  pgai =
5249  std::make_shared<ga_instruction_elementary_transformation_grad>
5250  (pnode->tensor(), rmi.grad[mf],
5251  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name),
5252  workspace.elementary_transformation(pnode->elementary_name),
5253  *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5254  }
5255  break;
5256  case GA_NODE_ELEMENTARY_HESS:
5257  { // --> t(target_dim*Qmult,N,N)
5258  ga_instruction_set::elementary_trans_info &eti
5259  = rmi.elementary_trans_infos[pnode->elementary_name];
5260  pgai =
5261  std::make_shared<ga_instruction_elementary_transformation_hess>
5262  (pnode->tensor(), rmi.hess[mf],
5263  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name),
5264  workspace.elementary_transformation(pnode->elementary_name),
5265  *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5266  }
5267  break;
5268  case GA_NODE_ELEMENTARY_DIVERG:
5269  { // --> t(1)
5270  ga_instruction_set::elementary_trans_info &eti
5271  = rmi.elementary_trans_infos[pnode->elementary_name];
5272  pgai =
5273  std::make_shared<ga_instruction_elementary_transformation_diverg>
5274  (pnode->tensor(), rmi.grad[mf],
5275  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name),
5276  workspace.elementary_transformation(pnode->elementary_name),
5277  *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5278  }
5279  break;
5280  default: break;
5281  }
5282  rmi.instructions.push_back(std::move(pgai));
5283  }
5284  }
5285  break;
5286 
5287  case GA_NODE_INTERPOLATE_VAL: case GA_NODE_INTERPOLATE_GRAD:
5288  case GA_NODE_INTERPOLATE_HESS: case GA_NODE_INTERPOLATE_DIVERG:
5289  {
5290  extend_variable_in_gis(workspace, pnode->name, gis);
5291 
5292  const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
5293  const std::string &intn = pnode->interpolate_name;
5294  const base_vector *Un = gis.extended_vars[pnode->name], **Ug = 0;
5295  fem_interpolation_context *pctx = &(rmi.interpolate_infos[intn].ctx);
5296  const mesh **m2 = &(rmi.interpolate_infos[intn].m);
5297  if (workspace.variable_group_exists(pnode->name)) {
5298  ga_instruction_set::variable_group_info &vgi =
5299  rmi.interpolate_infos[intn].groups_info[pnode->name];
5300  mfg = &(vgi.mf); mfn = 0; Ug = &(vgi.U); Un = 0;
5301  }
5302 
5303  if (pnode->node_type == GA_NODE_INTERPOLATE_VAL) {
5304  // --> t(target_dim*Qmult)
5305  pgai = std::make_shared<ga_instruction_interpolate_val>
5306  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
5307  workspace.qdim(pnode->name),
5308  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
5309  } else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD) {
5310  // --> t(target_dim*Qmult,N)
5311  pgai = std::make_shared<ga_instruction_interpolate_grad>
5312  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
5313  workspace.qdim(pnode->name),
5314  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
5315  } else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS) {
5316  // --> t(target_dim*Qmult,N,N)
5317  pgai = std::make_shared<ga_instruction_interpolate_hess>
5318  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
5319  workspace.qdim(pnode->name),
5320  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
5321  } else { // --> t(1)
5322  pgai = std::make_shared<ga_instruction_interpolate_diverg>
5323  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
5324  workspace.qdim(pnode->name),
5325  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
5326  }
5327  rmi.instructions.push_back(std::move(pgai));
5328  }
5329  break;
5330 
5331  case GA_NODE_INTERPOLATE_DERIVATIVE:
5332  GMM_ASSERT1(!function_case,
5333  "No use of Interpolate is allowed in functions");
5334  pgai = std::make_shared<ga_instruction_copy_tensor_possibly_void>
5335  (pnode->tensor(),
5336  rmi.interpolate_infos[pnode->interpolate_name_der]
5337  .derivatives[var_trans_pair(pnode->name, pnode->interpolate_name)]);
5338  rmi.instructions.push_back(std::move(pgai));
5339  break;
5340 
5341  case GA_NODE_VAL_TEST: case GA_NODE_GRAD_TEST:
5342  case GA_NODE_HESS_TEST: case GA_NODE_DIVERG_TEST:
5343  case GA_NODE_ELEMENTARY_VAL_TEST: case GA_NODE_ELEMENTARY_GRAD_TEST:
5344  case GA_NODE_ELEMENTARY_HESS_TEST: case GA_NODE_ELEMENTARY_DIVERG_TEST:
5345  case GA_NODE_XFEM_PLUS_VAL_TEST: case GA_NODE_XFEM_PLUS_GRAD_TEST:
5346  case GA_NODE_XFEM_PLUS_HESS_TEST: case GA_NODE_XFEM_PLUS_DIVERG_TEST:
5347  case GA_NODE_XFEM_MINUS_VAL_TEST: case GA_NODE_XFEM_MINUS_GRAD_TEST:
5348  case GA_NODE_XFEM_MINUS_HESS_TEST: case GA_NODE_XFEM_MINUS_DIVERG_TEST:
5349  // GMM_ASSERT1(!function_case,
5350  // "Test functions not allowed in functions");
5351  {
5352  const mesh_fem *mf = workspace.associated_mf(pnode->name);
5353  if (mf) {
5354  GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
5355  "The finite element of variable " << pnode->name <<
5356  " and the applied integration method have to be"
5357  " defined on the same mesh");
5358 
5359  // An instruction for pfp update
5360  if (rmi.pfps.count(mf) == 0) {
5361  rmi.pfps[mf] = 0;
5362  pgai = std::make_shared<ga_instruction_update_pfp>
5363  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
5364  if (is_uniform)
5365  rmi.begin_instructions.push_back(std::move(pgai));
5366  else
5367  rmi.instructions.push_back(std::move(pgai));
5368  }
5369 
5370  // An instruction for the base value
5371  pgai = pga_instruction();
5372  switch (pnode->node_type) {
5373  case GA_NODE_VAL_TEST: case GA_NODE_ELEMENTARY_VAL_TEST:
5374  if (rmi.base.find(mf) == rmi.base.end() ||
5375  !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
5376  rmi.base_hierarchy[mf].push_back(if_hierarchy);
5377  pgai = std::make_shared<ga_instruction_val_base>
5378  (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5379  }
5380  break;
5381  case GA_NODE_XFEM_PLUS_VAL_TEST:
5382  if (rmi.xfem_plus_base.find(mf) == rmi.xfem_plus_base.end() ||
5383  !(if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))) {
5384  rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
5385  pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
5386  (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5387  }
5388  break;
5389  case GA_NODE_XFEM_MINUS_VAL_TEST:
5390  if (rmi.xfem_minus_base.find(mf) == rmi.xfem_minus_base.end() ||
5391  !(if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))) {
5392  rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
5393  pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
5394  (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5395  }
5396  break;
5397  case GA_NODE_GRAD_TEST: case GA_NODE_DIVERG_TEST:
5398  case GA_NODE_ELEMENTARY_GRAD_TEST:
5399  case GA_NODE_ELEMENTARY_DIVERG_TEST:
5400  if (rmi.grad.find(mf) == rmi.grad.end() ||
5401  !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
5402  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
5403  pgai = std::make_shared<ga_instruction_grad_base>
5404  (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5405  }
5406  break;
5407  case GA_NODE_XFEM_PLUS_GRAD_TEST: case GA_NODE_XFEM_PLUS_DIVERG_TEST:
5408  if (rmi.xfem_plus_grad.find(mf) == rmi.xfem_plus_grad.end() ||
5409  !(if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))) {
5410  rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
5411  pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
5412  (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5413  }
5414  break;
5415  case GA_NODE_XFEM_MINUS_GRAD_TEST:
5416  case GA_NODE_XFEM_MINUS_DIVERG_TEST:
5417  if (rmi.xfem_minus_grad.find(mf) == rmi.xfem_minus_grad.end() ||
5418  !(if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))) {
5419  rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
5420  pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
5421  (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5422  }
5423  break;
5424  case GA_NODE_HESS_TEST: case GA_NODE_ELEMENTARY_HESS_TEST:
5425  if (rmi.hess.count(mf) == 0 ||
5426  !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
5427  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
5428  pgai = std::make_shared<ga_instruction_hess_base>
5429  (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5430  }
5431  break;
5432  case GA_NODE_XFEM_PLUS_HESS_TEST:
5433  if (rmi.xfem_plus_hess.count(mf) == 0 ||
5434  !(if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
5435  ) {
5436  rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
5437  pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
5438  (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5439  }
5440  break;
5441  case GA_NODE_XFEM_MINUS_HESS_TEST:
5442  if (rmi.xfem_minus_hess.find(mf) == rmi.xfem_minus_hess.end() ||
5443  !(if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))) {
5444  rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
5445  pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
5446  (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5447  }
5448  break;
5449 
5450  default : GMM_ASSERT1(false, "Internal error");
5451  }
5452  if (pgai) rmi.instructions.push_back(std::move(pgai));
5453 
5454  // The copy of the real_base_value
5455  switch(pnode->node_type) {
5456  case GA_NODE_VAL_TEST:
5457  // --> t(Qmult*ndof,Qmult*target_dim)
5458  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
5459  pnode->t.set_sparsity(1, mf->get_qdim());
5460  tensor_to_clear = true;
5461  pgai = std::make_shared<ga_instruction_copy_vect_val_base>
5462  (pnode->tensor(), rmi.base[mf], mf->get_qdim());
5463  } else {
5464  pgai = std::make_shared<ga_instruction_copy_val_base>
5465  (pnode->tensor(), rmi.base[mf], mf->get_qdim());
5466  }
5467  break;
5468  case GA_NODE_GRAD_TEST:
5469  // --> t(Qmult*ndof,Qmult*target_dim,N)
5470  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
5471  pnode->t.set_sparsity(2, mf->get_qdim());
5472  tensor_to_clear = true;
5473  pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
5474  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
5475  } else {
5476  pgai = std::make_shared<ga_instruction_copy_grad_base>
5477  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
5478  }
5479  break;
5480  case GA_NODE_HESS_TEST:
5481  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
5482  pgai = std::make_shared<ga_instruction_copy_hess_base>
5483  (pnode->tensor(), rmi.hess[mf], mf->get_qdim());
5484  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
5485  pnode->t.set_sparsity(3, mf->get_qdim());
5486  break;
5487  case GA_NODE_DIVERG_TEST:
5488  // --> t(Qmult*ndof)
5489  pgai = std::make_shared<ga_instruction_copy_diverg_base>
5490  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
5491  break;
5492  case GA_NODE_XFEM_PLUS_VAL_TEST:
5493  // -->t(Qmult*ndof,Qmult*target_dim)
5494  pgai = std::make_shared<ga_instruction_copy_val_base>
5495  (pnode->tensor(), rmi.xfem_plus_base[mf], mf->get_qdim());
5496  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
5497  pnode->t.set_sparsity(1, mf->get_qdim());
5498  break;
5499  case GA_NODE_XFEM_PLUS_GRAD_TEST:
5500  // --> t(Qmult*ndof,Qmult*target_dim,N)
5501  pgai = std::make_shared<ga_instruction_copy_grad_base>
5502  (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
5503  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
5504  pnode->t.set_sparsity(2, mf->get_qdim());
5505  break;
5506  case GA_NODE_XFEM_PLUS_HESS_TEST:
5507  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
5508  pgai = std::make_shared<ga_instruction_copy_hess_base>
5509  (pnode->tensor(), rmi.xfem_plus_hess[mf], mf->get_qdim());
5510  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
5511  pnode->t.set_sparsity(3, mf->get_qdim());
5512  break;
5513  case GA_NODE_XFEM_PLUS_DIVERG_TEST:
5514  // --> t(Qmult*ndof)
5515  pgai = std::make_shared<ga_instruction_copy_diverg_base>
5516  (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
5517  break;
5518  case GA_NODE_XFEM_MINUS_VAL_TEST:
5519  // -->t(Qmult*ndof,Qmult*target_dim)
5520  pgai = std::make_shared<ga_instruction_copy_val_base>
5521  (pnode->tensor(), rmi.xfem_minus_base[mf], mf->get_qdim());
5522  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
5523  pnode->t.set_sparsity(1, mf->get_qdim());
5524  break;
5525  case GA_NODE_XFEM_MINUS_GRAD_TEST:
5526  // --> t(Qmult*ndof,Qmult*target_dim,N)
5527  pgai = std::make_shared<ga_instruction_copy_grad_base>
5528  (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
5529  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
5530  pnode->t.set_sparsity(2, mf->get_qdim());
5531  break;
5532  case GA_NODE_XFEM_MINUS_HESS_TEST:
5533  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
5534  pgai = std::make_shared<ga_instruction_copy_hess_base>
5535  (pnode->tensor(), rmi.xfem_minus_hess[mf], mf->get_qdim());
5536  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
5537  pnode->t.set_sparsity(3, mf->get_qdim());
5538  break;
5539  case GA_NODE_XFEM_MINUS_DIVERG_TEST:
5540  // --> t(Qmult*ndof)
5541  pgai = std::make_shared<ga_instruction_copy_diverg_base>
5542  (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
5543  break;
5544  case GA_NODE_ELEMENTARY_VAL_TEST:
5545  { // --> t(Qmult*ndof,Qmult*target_dim)
5546  ga_instruction_set::elementary_trans_info &eti
5547  = rmi.elementary_trans_infos[pnode->elementary_name];
5548  pgai =
5549  std::make_shared<ga_instruction_elementary_transformation_val_base>
5550  (pnode->tensor(), rmi.base[mf], mf->get_qdim(),
5551  workspace.elementary_transformation(pnode->elementary_name),
5552  *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5553  }
5554  break;
5555  case GA_NODE_ELEMENTARY_GRAD_TEST:
5556  { // --> t(Qmult*ndof,Qmult*target_dim,N)
5557  ga_instruction_set::elementary_trans_info &eti
5558  = rmi.elementary_trans_infos[pnode->elementary_name];
5559  pgai =
5560  std::make_shared<ga_instruction_elementary_transformation_grad_base>
5561  (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
5562  workspace.elementary_transformation(pnode->elementary_name),
5563  *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5564  }
5565  break;
5566  case GA_NODE_ELEMENTARY_HESS_TEST:
5567  { // --> t(Qmult*ndof,Qmult*target_dim,N,N)
5568  ga_instruction_set::elementary_trans_info &eti
5569  = rmi.elementary_trans_infos[pnode->elementary_name];
5570  pgai =
5571  std::make_shared<ga_instruction_elementary_transformation_hess_base>
5572  (pnode->tensor(), rmi.hess[mf], mf->get_qdim(),
5573  workspace.elementary_transformation(pnode->elementary_name),
5574  *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5575  }
5576  break;
5577  case GA_NODE_ELEMENTARY_DIVERG_TEST:
5578  { // --> t(Qmult*ndof)
5579  ga_instruction_set::elementary_trans_info &eti
5580  = rmi.elementary_trans_infos[pnode->elementary_name];
5581  pgai =
5582  std::make_shared<ga_instruction_elementary_transformation_diverg_base>
5583  (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
5584  workspace.elementary_transformation(pnode->elementary_name),
5585  *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5586  }
5587  break;
5588  default: break;
5589  }
5590  if (pgai) rmi.instructions.push_back(std::move(pgai));
5591  }
5592  add_interval_to_gis(workspace, pnode->name, gis);
5593  }
5594  break;
5595 
5596  case GA_NODE_INTERPOLATE_VAL_TEST: case GA_NODE_INTERPOLATE_GRAD_TEST:
5597  case GA_NODE_INTERPOLATE_HESS_TEST: case GA_NODE_INTERPOLATE_DIVERG_TEST:
5598  {
5599  const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
5600  const std::string &intn = pnode->interpolate_name;
5601  const mesh **m2 = &(rmi.interpolate_infos[intn].m);
5602  if (workspace.variable_group_exists(pnode->name)) {
5603  ga_instruction_set::variable_group_info &vgi =
5604  rmi.interpolate_infos[intn].groups_info[pnode->name];
5605  mfg = &(vgi.mf); mfn = 0;
5606  }
5607 
5608  if (pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST) {
5609  // --> t(Qmult*ndof,Qmult*target_dim)
5610  pgai = std::make_shared<ga_instruction_interpolate_val_base>
5611  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
5612  workspace.qdim(pnode->name), rmi.interpolate_infos[intn],
5613  gis.fp_pool);
5614  } else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST) {
5615  // --> t(Qmult*ndof,Qmult*target_dim,N)
5616  pgai = std::make_shared<ga_instruction_interpolate_grad_base>
5617  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
5618  workspace.qdim(pnode->name),
5619  rmi.interpolate_infos[intn], gis.fp_pool);
5620  } else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST) {
5621  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
5622  pgai = std::make_shared<ga_instruction_interpolate_hess_base>
5623  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
5624  workspace.qdim(pnode->name),
5625  rmi.interpolate_infos[intn], gis.fp_pool);
5626  } else { // if (pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST) {
5627  // --> t(Qmult*ndof)
5628  pgai = std::make_shared<ga_instruction_interpolate_diverg_base>
5629  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
5630  workspace.qdim(pnode->name),
5631  rmi.interpolate_infos[intn], gis.fp_pool);
5632  }
5633  rmi.instructions.push_back(std::move(pgai));
5634  add_interval_to_gis(workspace, pnode->name, gis);
5635  }
5636  break;
5637 
5638  case GA_NODE_OP:
5639  switch(pnode->op_type) {
5640 
5641  case GA_PLUS:
5642  if (pnode->tensor().size() == 1) {
5643  GA_DEBUG_ASSERT(child0->tensor().size() == 1,
5644  "Internal error: child0 not scalar");
5645  GA_DEBUG_ASSERT(child1->tensor().size() == 1,
5646  "Internal error: child1 not scalar");
5647  pgai = std::make_shared<ga_instruction_scalar_add>
5648  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
5649  } else {
5650  pgai = std::make_shared<ga_instruction_add>
5651  (pnode->tensor(), child0->tensor(), child1->tensor());
5652  }
5653  if (child0->t.sparsity() == child1->t.sparsity()
5654  && child0->t.qdim() == child1->t.qdim())
5655  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5656  rmi.instructions.push_back(std::move(pgai));
5657  break;
5658 
5659  case GA_MINUS:
5660  if (pnode->tensor().size() == 1) {
5661  GA_DEBUG_ASSERT(child0->tensor().size() == 1,
5662  "Internal error: child0 not scalar");
5663  GA_DEBUG_ASSERT(child1->tensor().size() == 1,
5664  "Internal error: child1 not scalar");
5665  pgai = std::make_shared<ga_instruction_scalar_sub>
5666  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
5667  } else {
5668  pgai = std::make_shared<ga_instruction_sub>
5669  (pnode->tensor(), child0->tensor(), child1->tensor());
5670  }
5671  if (child0->t.sparsity() == child1->t.sparsity()
5672  && child0->t.qdim() == child1->t.qdim())
5673  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5674  rmi.instructions.push_back(std::move(pgai));
5675  break;
5676 
5677  case GA_UNARY_MINUS:
5678  if (pnode->tensor().size() == 1) {
5679  GA_DEBUG_ASSERT(child0->tensor().size() == 1, "Internal error");
5680  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
5681  (pnode->tensor()[0], child0->tensor()[0], minus);
5682  } else {
5683  pgai = std::make_shared<ga_instruction_scalar_mult>
5684  (pnode->tensor(), child0->tensor(), minus);
5685  }
5686  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5687  rmi.instructions.push_back(std::move(pgai));
5688  break;
5689 
5690 
5691  case GA_DOT: case GA_COLON: case GA_MULT:
5692  {
5693  size_type tps0 = child0->tensor_proper_size();
5694  size_type tps1 = child1->tensor_proper_size();
5695  size_type s1 = (tps0 * tps1) / pnode->tensor_proper_size();
5696  size_type s2 = size_type(round(sqrt(scalar_type(s1))));
5697 
5698  pgai = pga_instruction();
5699  if ((pnode->op_type == GA_DOT && dim1 <= 1) ||
5700  pnode->op_type == GA_COLON ||
5701  (pnode->op_type == GA_MULT && dim0 == 4) ||
5702  (pnode->op_type == GA_MULT && dim1 <= 1) ||
5703  child0->tensor().size() == 1 || tps1 == 1) {
5704 
5705  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
5706  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
5707  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
5708  }
5709  else if (child0->tensor().size() == 1) {
5710  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
5711  pgai = std::make_shared<ga_instruction_scalar_mult>
5712  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
5713  }
5714  else if (child1->tensor().size() == 1) {
5715  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5716  pgai = std::make_shared<ga_instruction_scalar_mult>
5717  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
5718  }
5719  else if (pnode->test_function_type < 3) {
5720  if (tps0 == 1) {
5721  if (is_uniform) // Unrolled instruction
5722  pgai = ga_uniform_instruction_simple_tmult
5723  (pnode->tensor(), child0->tensor(), child1->tensor());
5724  else
5725  pgai = std::make_shared<ga_instruction_simple_tmult>
5726  (pnode->tensor(), child0->tensor(), child1->tensor());
5727  } else {
5728  if (tps1 == 1) {
5729  if (is_uniform) // Unrolled instruction
5730  pgai = ga_uniform_instruction_simple_tmult
5731  (pnode->tensor(), child1->tensor(), child0->tensor());
5732  else
5733  pgai = std::make_shared<ga_instruction_simple_tmult>
5734  (pnode->tensor(), child1->tensor(), child0->tensor());
5735  } else if (is_uniform) // Unrolled instruction
5736  pgai = ga_uniform_instruction_contraction_switch
5737  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
5738  else // Unrolled instruction
5739  pgai = ga_instruction_contraction_switch
5740  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
5741  }
5742  } else {
5743  if (child1->test_function_type == 1 ||
5744  child1->test_function_type == 3) {
5745  if (child1->test_function_type == 3 ||
5746  child1->tensor_proper_size() <= s2) {
5747  if (tps0 == 1) {
5748  if (is_uniform) { // Unrolled instruction
5749  pgai = ga_uniform_instruction_simple_tmult
5750  (pnode->tensor(), child1->tensor(), child0->tensor());
5751  } else
5752  pgai = std::make_shared<ga_instruction_simple_tmult>
5753  (pnode->tensor(), child1->tensor(), child0->tensor());
5754  } else if (is_uniform) // Unrolled instruction
5755  pgai = ga_uniform_instruction_contraction_switch
5756  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
5757  else // Unrolled instruction
5758  pgai = ga_instruction_contraction_switch
5759  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
5760  } else
5761  pgai = std::make_shared<ga_instruction_spec_contraction>
5762  (pnode->tensor(), child1->tensor(), child0->tensor(), s2);
5763  } else if (child1->test_function_type == 0 ||
5764  (child0->tensor_proper_size() == s2 &&
5765  child1->tensor_proper_size() == s2)) {
5766  if (tps0 == 1) {
5767  if (is_uniform) { // Unrolled instruction
5768  pgai = ga_uniform_instruction_simple_tmult
5769  (pnode->tensor(), child0->tensor(), child1->tensor());
5770  } else
5771  pgai = std::make_shared<ga_instruction_simple_tmult>
5772  (pnode->tensor(), child0->tensor(), child1->tensor());
5773  } else {
5774  if (is_uniform) // Unrolled instruction
5775  pgai = ga_uniform_instruction_contraction_switch
5776  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
5777  else // Unrolled instruction
5778  pgai = ga_instruction_contraction_switch
5779  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
5780  }
5781  } else {
5782  if (child0->tensor_proper_size() == s2)
5783  pgai = ga_uniform_instruction_contraction_switch
5784  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
5785  else if (child1->tensor_proper_size() == s2)
5786  pgai = std::make_shared<ga_instruction_spec_contraction>
5787  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
5788  else
5789  pgai = std::make_shared<ga_instruction_spec2_contraction>
5790  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
5791  }
5792  }
5793  } else { // GA_MULT or GA_DOT for dim1 > 1
5794  // and child1->tensor_proper_size() > 1
5795  if (pnode->test_function_type < 3) {
5796  if (tps0 == 1) {
5797  if (is_uniform) // Unrolled instruction
5798  pgai = ga_uniform_instruction_simple_tmult
5799  (pnode->tensor(), child0->tensor(), child1->tensor());
5800  else
5801  pgai = std::make_shared<ga_instruction_simple_tmult>
5802  (pnode->tensor(), child0->tensor(), child1->tensor());
5803  } else {
5804  if (child1->test_function_type == 0)
5805  pgai = std::make_shared<ga_instruction_matrix_mult>
5806  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
5807  else
5808  pgai = std::make_shared<ga_instruction_matrix_mult_spec>
5809  (pnode->tensor(), child0->tensor(), child1->tensor(),
5810  s2, tps0/s2, tps1/s2);
5811  }
5812  } else {
5813  if (child0->tensor_proper_size() == 1) {
5814  if (child0->test_function_type == 0 ||
5815  child0->test_function_type == 1) {
5816  if (is_uniform) // Unrolled instruction
5817  pgai = ga_uniform_instruction_simple_tmult
5818  (pnode->tensor(), child0->tensor(), child1->tensor());
5819  else
5820  pgai = std::make_shared<ga_instruction_simple_tmult>
5821  (pnode->tensor(), child0->tensor(), child1->tensor());
5822  } else
5823  pgai = std::make_shared<ga_instruction_spec_tmult>
5824  (pnode->tensor(), child1->tensor(), child0->tensor(),
5825  tps1, tps0);
5826  } else {
5827  if (child1->test_function_type == 0)
5828  pgai = std::make_shared<ga_instruction_matrix_mult>
5829  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
5830  else if (child1->test_function_type == 2)
5831  pgai = std::make_shared<ga_instruction_matrix_mult_spec>
5832  (pnode->tensor(), child0->tensor(), child1->tensor(),
5833  s2, tps0/s2, tps1/s2);
5834  else
5835  pgai = std::make_shared<ga_instruction_matrix_mult_spec2>
5836  (pnode->tensor(), child0->tensor(), child1->tensor(),
5837  s2, tps0/s2, tps1/s2);
5838  }
5839  }
5840  }
5841  rmi.instructions.push_back(std::move(pgai));
5842  }
5843  break;
5844 
5845  case GA_DIV:
5846  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
5847  pgai = std::make_shared<ga_instruction_scalar_scalar_div>
5848  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
5849  } else if (child1->tensor().size() == 1) {
5850  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5851  pgai = std::make_shared<ga_instruction_scalar_div>
5852  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
5853  } else GMM_ASSERT1(false, "Internal error");
5854  rmi.instructions.push_back(std::move(pgai));
5855  break;
5856 
5857  case GA_PRINT:
5858  pnode->t.set_to_copy(child0->t);
5859  pgai = std::make_shared<ga_instruction_print_tensor>
5860  (pnode->tensor(), child0, gis.ctx, gis.nbpt, gis.ipt);
5861  rmi.instructions.push_back(std::move(pgai));
5862  break;
5863 
5864  case GA_QUOTE:
5865  if (pnode->tensor_proper_size() > 1) {
5866  size_type n1 = child0->tensor_proper_size(0);
5867  size_type n2 = (child0->tensor_order() > 1) ?
5868  child0->tensor_proper_size(1) : 1;
5869  size_type nn = 1;
5870  for (size_type i = 2; i < child0->tensor_order(); ++i)
5871  nn *= child0->tensor_proper_size(i);
5872  if (child0->nb_test_functions() == 0)
5873  pgai = std::make_shared<ga_instruction_transpose_no_test>
5874  (pnode->tensor(), child0->tensor(), n1, n2, nn);
5875  else
5876  pgai = std::make_shared<ga_instruction_transpose>
5877  (pnode->tensor(), child0->tensor(), n1, n2, nn);
5878  rmi.instructions.push_back(std::move(pgai));
5879  } else {
5880  pnode->t.set_to_copy(child0->t);
5881  }
5882  break;
5883 
5884  case GA_SYM:
5885  if (pnode->tensor_proper_size() != 1) {
5886  pgai = std::make_shared<ga_instruction_sym>
5887  (pnode->tensor(), child0->tensor());
5888  rmi.instructions.push_back(std::move(pgai));
5889  } else {
5890  pnode->t.set_to_copy(child0->t);
5891  }
5892  break;
5893 
5894  case GA_SKEW:
5895  {
5896  pgai = std::make_shared<ga_instruction_skew>
5897  (pnode->tensor(), child0->tensor());
5898  rmi.instructions.push_back(std::move(pgai));
5899  }
5900  break;
5901 
5902  case GA_TRACE:
5903  {
5904  size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
5905  if (N == 1) {
5906  pnode->t.set_to_copy(child0->t);
5907  } else {
5908  pgai = std::make_shared<ga_instruction_trace>
5909  (pnode->tensor(), child0->tensor(), N);
5910  rmi.instructions.push_back(std::move(pgai));
5911  }
5912  }
5913  break;
5914 
5915  case GA_DEVIATOR:
5916  {
5917  size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
5918  pgai = std::make_shared<ga_instruction_deviator>
5919  (pnode->tensor(), child0->tensor(), N);
5920  rmi.instructions.push_back(std::move(pgai));
5921  }
5922  break;
5923 
5924  case GA_DOTMULT:
5925 
5926  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
5927  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
5928  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
5929  } else if (child0->tensor().size() == 1) {
5930  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
5931  pgai = std::make_shared<ga_instruction_scalar_mult>
5932  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
5933  }
5934  else if (child1->tensor().size() == 1) {
5935  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5936  pgai = std::make_shared<ga_instruction_scalar_mult>
5937  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
5938  }
5939  else if (child1->test_function_type == 0)
5940  pgai = std::make_shared<ga_instruction_dotmult>
5941  (pnode->tensor(), child0->tensor(), child1->tensor());
5942  else if (child0->test_function_type == 0)
5943  pgai = std::make_shared<ga_instruction_dotmult>
5944  (pnode->tensor(), child1->tensor(), child0->tensor());
5945  else if (child0->test_function_type == 1)
5946  pgai = std::make_shared<ga_instruction_dotmult_spec>
5947  (pnode->tensor(), child0->tensor(), child1->tensor());
5948  else
5949  pgai = std::make_shared<ga_instruction_dotmult_spec>
5950  (pnode->tensor(), child1->tensor(), child0->tensor());
5951 
5952  rmi.instructions.push_back(std::move(pgai));
5953  break;
5954 
5955 
5956  case GA_DOTDIV:
5957  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
5958  pgai = std::make_shared<ga_instruction_scalar_scalar_div>
5959  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
5960  } else if (child1->tensor().size() == 1) {
5961  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5962  pgai = std::make_shared<ga_instruction_scalar_div>
5963  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
5964  } else if (child1->test_function_type == 0) {
5965  pgai = std::make_shared<ga_instruction_dotdiv>
5966  (pnode->tensor(), child0->tensor(), child1->tensor());
5967  } else GMM_ASSERT1(false, "Internal error");
5968  rmi.instructions.push_back(std::move(pgai));
5969  break;
5970 
5971 
5972  case GA_TMULT:
5973  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
5974  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
5975  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
5976  } else if (child0->tensor().size() == 1) {
5977  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
5978  pgai = std::make_shared<ga_instruction_scalar_mult>
5979  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
5980  }
5981  else if (child1->tensor().size() == 1) {
5982  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5983  pgai = std::make_shared<ga_instruction_scalar_mult>
5984  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
5985  }
5986  else if (child1->test_function_type == 0) {
5987  if (is_uniform) // Unrolled instruction
5988  pgai = ga_uniform_instruction_simple_tmult
5989  (pnode->tensor(), child0->tensor(), child1->tensor());
5990  else
5991  pgai = std::make_shared<ga_instruction_simple_tmult>
5992  (pnode->tensor(), child0->tensor(), child1->tensor());
5993  } else if (child1->tensor_proper_size() == 1)
5994  pgai = std::make_shared<ga_instruction_spec2_tmult>
5995  (pnode->tensor(), child0->tensor(), child1->tensor());
5996  else
5997  pgai = std::make_shared<ga_instruction_spec_tmult>
5998  (pnode->tensor(), child0->tensor(), child1->tensor(),
5999  child0->tensor_proper_size(),
6000  child1->tensor_proper_size());
6001 
6002  rmi.instructions.push_back(std::move(pgai));
6003  break;
6004 
6005  default:GMM_ASSERT1(false, "Unexpected operation. Internal error.");
6006  }
6007  break;
6008 
6009  case GA_NODE_C_MATRIX:
6010  {
6011  if (pnode->test_function_type) {
6012  std::vector<const base_tensor *> components(pnode->children.size());
6013  for (size_type i = 0; i < pnode->children.size(); ++i)
6014  components[i] = &(pnode->children[i]->tensor());
6015  pgai = std::make_shared<ga_instruction_c_matrix_with_tests>
6016  (pnode->tensor(), components);
6017  } else {
6018  std::vector<scalar_type *> components(pnode->children.size());
6019  for (size_type i = 0; i < pnode->children.size(); ++i)
6020  components[i] = &(pnode->children[i]->tensor()[0]);
6021  pgai = std::make_shared<ga_instruction_simple_c_matrix>
6022  (pnode->tensor(), components);
6023  }
6024  rmi.instructions.push_back(std::move(pgai));
6025  }
6026  break;
6027 
6028  case GA_NODE_PARAMS:
6029  if (child0->node_type == GA_NODE_RESHAPE) {
6030  pgai = std::make_shared<ga_instruction_copy_tensor>(pnode->tensor(),
6031  child1->tensor());
6032  rmi.instructions.push_back(std::move(pgai));
6033  } else if (child0->node_type == GA_NODE_IND_MOVE_LAST) {
6034  size_type ind;
6035  ind = size_type(round(pnode->children[2]->tensor()[0])-1);
6036  size_type ii2 = 1;
6037  for (size_type i = 0; i < child1->tensor_order(); ++i)
6038  if (i>ind) ii2 *= child1->tensor_proper_size(i);
6039  size_type nn = child1->tensor_proper_size(ind);
6040  pgai = std::make_shared<ga_instruction_index_move_last>
6041  (pnode->tensor(), child1->tensor(), nn, ii2);
6042  rmi.instructions.push_back(std::move(pgai));
6043  } else if (child0->node_type == GA_NODE_SWAP_IND) {
6044  size_type ind[4];
6045  for (size_type i = 2; i < 4; ++i)
6046  ind[i] = size_type(round(pnode->children[i]->tensor()[0])-1);
6047  if (ind[2] > ind[3]) std::swap(ind[2], ind[3]);
6048  size_type ii2 = 1, ii3 = 1;
6049  for (size_type i = 0; i < child1->tensor_order(); ++i) {
6050  if (i>ind[2] && i<ind[3]) ii2 *= child1->tensor_proper_size(i);
6051  if (i>ind[3]) ii3 *= child1->tensor_proper_size(i);
6052  }
6053  size_type nn1 = child1->tensor_proper_size(ind[2]);
6054  size_type nn2 = child1->tensor_proper_size(ind[3]);
6055 
6056  pgai = std::make_shared<ga_instruction_swap_indices>
6057  (pnode->tensor(), child1->tensor(), nn1, nn2, ii2, ii3);
6058  rmi.instructions.push_back(std::move(pgai));
6059  } else if (child0->node_type == GA_NODE_CONTRACT) {
6060  std::vector<size_type> ind(2), indsize(2);
6061  pga_tree_node child2(0);
6062  if (pnode->children.size() == 4)
6063  { ind[0] = 2; ind[1] = 3; }
6064  else if (pnode->children.size() == 5)
6065  { ind[0] = 2; ind[1] = 4; child2 = pnode->children[3]; }
6066  else if (pnode->children.size() == 7) {
6067  ind.resize(4); indsize.resize(4);
6068  ind[0] = 2; ind[1] = 3; ind[2] = 5; ind[3] = 6;
6069  child2 = pnode->children[4];
6070  }
6071  size_type kk = 0, ll = 1;
6072  for (size_type i = 1; i < pnode->children.size(); ++i) {
6073  if (i == ind[kk]) {
6074  ind[kk] = size_type(round(pnode->children[i]->tensor()[0])-1);
6075  indsize[kk] = pnode->children[ll]->tensor_proper_size(ind[kk]);
6076  ++kk;
6077  } else ll = i;
6078  }
6079 
6080  if (pnode->children.size() == 4) {
6081  size_type i1 = ind[0], i2 = ind[1];
6082  if (i1 > i2) std::swap(i1, i2);
6083  size_type ii2 = 1, ii3 = 1;
6084  for (size_type i = 0; i < child1->tensor_order(); ++i) {
6085  if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
6086  if (i > i2) ii3 *= child1->tensor_proper_size(i);
6087  }
6088  pgai = std::make_shared<ga_instruction_contract_1_1>
6089  (pnode->tensor(), child1->tensor(), indsize[0], ii2, ii3);
6090  }
6091  else if (pnode->children.size() == 5) {
6092  // Particular cases should be detected (ii2=ii3=1 in particular).
6093  size_type i1 = ind[0], i2 = ind[1];
6094  size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1;
6095  for (size_type i = 0; i < child1->tensor_order(); ++i) {
6096  if (i < i1) ii1 *= child1->tensor_proper_size(i);
6097  if (i > i1) ii2 *= child1->tensor_proper_size(i);
6098  }
6099  for (size_type i = 0; i < child2->tensor_order(); ++i) {
6100  if (i < i2) ii3 *= child2->tensor_proper_size(i);
6101  if (i > i2) ii4 *= child2->tensor_proper_size(i);
6102  }
6103  if (child1->test_function_type==1 && child2->test_function_type==2)
6104  pgai = std::make_shared<ga_instruction_contract_2_1_rev>
6105  (pnode->tensor(), child1->tensor(), child2->tensor(),
6106  indsize[0], ii1, ii2, ii3, ii4);
6107  else
6108  pgai = std::make_shared<ga_instruction_contract_2_1>
6109  (pnode->tensor(), child1->tensor(), child2->tensor(),
6110  indsize[0], ii1, ii2, ii3, ii4);
6111  }
6112  else if (pnode->children.size() == 7) {
6113  // Particular cases should be detected (ii2=ii3=1 in particular).
6114  size_type i1 = ind[0], i2 = ind[1], i3 = ind[2], i4 = ind[3];
6115  size_type nn1 = indsize[0], nn2 = indsize[1];
6116  size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1, ii5 = 1, ii6 = 1;
6117  if (i1 > i2)
6118  { std::swap(i1, i2); std::swap(i3, i4); std::swap(nn1, nn2); }
6119  for (size_type i = 0; i < child1->tensor_order(); ++i) {
6120  if (i < i1) ii1 *= child1->tensor_proper_size(i);
6121  if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
6122  if (i > i2) ii3 *= child1->tensor_proper_size(i);
6123  }
6124  for (size_type i = 0; i < child2->tensor_order(); ++i) {
6125  if (i < i3 && i < i4) ii4 *= child2->tensor_proper_size(i);
6126  if ((i > i3 && i < i4) || (i > i4 && i < i3))
6127  ii5 *= child2->tensor_proper_size(i);
6128  if (i > i3 && i > i4) ii6 *= child2->tensor_proper_size(i);
6129  }
6130  if (child1->test_function_type==1 && child2->test_function_type==2)
6131  pgai = std::make_shared<ga_instruction_contract_2_2_rev>
6132  (pnode->tensor(), child1->tensor(), child2->tensor(),
6133  nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
6134  else
6135  pgai = std::make_shared<ga_instruction_contract_2_2>
6136  (pnode->tensor(), child1->tensor(), child2->tensor(),
6137  nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
6138  }
6139  rmi.instructions.push_back(std::move(pgai));
6140  } else if (child0->node_type == GA_NODE_PREDEF_FUNC) {
6141 
6142  std::string name = child0->name;
6143  const ga_predef_function_tab &PREDEF_FUNCTIONS
6145  ga_predef_function_tab::const_iterator it = PREDEF_FUNCTIONS.find(name);
6146  const ga_predef_function &F = it->second;
6147  size_type nbargs = F.nbargs();
6148  pga_tree_node child2 = (nbargs == 2) ? pnode->children[2] : child1;
6149 
6150  if (nbargs == 1) {
6151  if (child1->tensor().size() == 1) {
6152  if (F.ftype() == 0)
6153  pgai = std::make_shared<ga_instruction_eval_func_1arg_1res>
6154  (pnode->tensor()[0], child1->tensor()[0], F.f1());
6155  else
6156  pgai = std::make_shared<ga_instruction_eval_func_1arg_1res_expr>
6157  (pnode->tensor()[0], child1->tensor()[0], F);
6158  } else {
6159  if (F.ftype() == 0)
6160  pgai = std::make_shared<ga_instruction_eval_func_1arg>
6161  (pnode->tensor(), child1->tensor(), F.f1());
6162  else
6163  pgai = std::make_shared<ga_instruction_eval_func_1arg_expr>
6164  (pnode->tensor(), child1->tensor(), F);
6165  }
6166  } else {
6167  if (child1->tensor().size() == 1 && child2->tensor().size() == 1) {
6168  if (F.ftype() == 0)
6169  pgai = std::make_shared<ga_instruction_eval_func_2arg_1res>
6170  (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
6171  F.f2());
6172  else
6173  pgai = std::make_shared<ga_instruction_eval_func_2arg_1res_expr>
6174  (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
6175  F);
6176  } else if (child1->tensor().size() == 1) {
6177  if (F.ftype() == 0)
6178  pgai =
6179  std::make_shared<ga_instruction_eval_func_2arg_first_scalar>
6180  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
6181  else
6182  pgai =
6183  std::make_shared<ga_instruction_eval_func_2arg_first_scalar_expr>
6184  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
6185  } else if (child2->tensor().size() == 1) {
6186  if (F.ftype() == 0)
6187  pgai =
6188  std::make_shared<ga_instruction_eval_func_2arg_second_scalar>
6189  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
6190  else
6191  pgai =
6192  std::make_shared<ga_instruction_eval_func_2arg_second_scalar_expr>
6193  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
6194  } else {
6195  if (F.ftype() == 0)
6196  pgai = std::make_shared<ga_instruction_eval_func_2arg>
6197  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
6198  else
6199  pgai = std::make_shared<ga_instruction_eval_func_2arg_expr>
6200  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
6201  }
6202  }
6203  rmi.instructions.push_back(std::move(pgai));
6204 
6205  } else if (child0->node_type == GA_NODE_SPEC_FUNC) {
6206 
6207  GMM_ASSERT1(false, "Internal error");
6208 
6209  } else if (child0->node_type == GA_NODE_OPERATOR) {
6210 
6211  ga_predef_operator_tab &PREDEF_OPERATORS
6213  ga_predef_operator_tab::T::iterator it
6214  = PREDEF_OPERATORS.tab.find(child0->name);
6215  const ga_nonlinear_operator &OP = *(it->second);
6216  ga_nonlinear_operator::arg_list args;
6217  for (size_type i = 1; i < pnode->children.size(); ++i)
6218  args.push_back(&(pnode->children[i]->tensor()));
6219 
6220  if (child0->der1 && child0->der2 == 0) {
6221  pgai = std::make_shared<ga_instruction_eval_derivative_OP>
6222  (pnode->tensor(), OP, args, child0->der1);
6223  } else if (child0->der1 && child0->der2) {
6224  pgai = std::make_shared<ga_instruction_eval_second_derivative_OP>
6225  (pnode->tensor(), OP, args, child0->der1, child0->der2);
6226  } else {
6227  pgai = std::make_shared<ga_instruction_eval_OP>(pnode->tensor(),
6228  OP, args);
6229  }
6230  rmi.instructions.push_back(std::move(pgai));
6231 
6232  } else { // Access to a component of the tensor
6233  bgeot::multi_index mi1(size0.size()), indices;
6234  if (pnode->tensor().size() == 1) {
6235  for (size_type i = 0; i < child0->tensor_order(); ++i)
6236  mi1[i] = size_type(round(pnode->children[i+1]->tensor()[0])-1);
6237  pgai = std::make_shared<ga_instruction_copy_scalar>
6238  (pnode->tensor()[0], child0->tensor()(mi1));
6239  } else {
6240  size_type nb_test = pnode->nb_test_functions();
6241  for (size_type i = 0; i < nb_test; ++i) indices.push_back(i);
6242  for (size_type i = 0; i < child0->tensor_order(); ++i) {
6243  if (pnode->children[i+1]->node_type != GA_NODE_ALLINDICES)
6244  mi1[i+nb_test]
6245  = size_type(round(pnode->children[i+1]->tensor()[0])- 1);
6246  else
6247  indices.push_back(i+nb_test);
6248  }
6249  pgai = std::make_shared<ga_instruction_tensor_slice>
6250  (pnode->tensor(), child0->tensor(), mi1, indices);
6251  }
6252  rmi.instructions.push_back(std::move(pgai));
6253  }
6254 
6255  break;
6256 
6257  default:GMM_ASSERT1(false, "Unexpected node type " << pnode->node_type
6258  << " in compilation. Internal error.");
6259  }
6260  if (tensor_to_clear) {
6261  gmm::clear(pnode->tensor().as_vector());
6262  if (!is_uniform) {
6263  pgai = std::make_shared<ga_instruction_clear_tensor>(pnode->tensor());
6264  rmi.elt_instructions.push_back(std::move(pgai));
6265  }
6266  }
6267  rmi.node_list[pnode->hash_value].push_back(pnode);
6268  }
6269 
6270  void ga_compile_function(ga_workspace &workspace,
6271  ga_instruction_set &gis, bool scalar) {
6272  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
6273  const ga_workspace::tree_description &td = workspace.tree_info(i);
6274 
6275  gis.trees.push_back(*(td.ptree));
6276  pga_tree_node root = gis.trees.back().root;
6277  if (root) {
6278  GMM_ASSERT1(!scalar || (root->tensor().size() == 1),
6279  "The result of the given expression is not a scalar");
6280  ga_instruction_set::region_mim rm(td.mim, td.rg);
6281  gis.whole_instructions[rm].m = td.m;
6282  ga_if_hierarchy if_hierarchy;
6283  ga_compile_node(root, workspace, gis,
6284  gis.whole_instructions[rm],*(td.m),true,if_hierarchy);
6285 
6286  gis.coeff = scalar_type(1);
6287  pga_instruction pgai;
6288  workspace.assembled_tensor() = root->tensor();
6289  pgai = std::make_shared<ga_instruction_add_to_coeff>
6290  (workspace.assembled_tensor(), root->tensor(), gis.coeff);
6291  gis.whole_instructions[rm].instructions.push_back(std::move(pgai));
6292  }
6293  }
6294  }
6295 
6296  static bool ga_node_used_interpolates
6297  (const pga_tree_node pnode, const ga_workspace &workspace,
6298  std::map<std::string, std::set<std::string> > &interpolates,
6299  std::set<std::string> &interpolates_der) {
6300  bool found = false;
6301  bool intrpl(pnode->node_type == GA_NODE_INTERPOLATE_VAL ||
6302  pnode->node_type == GA_NODE_INTERPOLATE_GRAD ||
6303  pnode->node_type == GA_NODE_INTERPOLATE_HESS ||
6304  pnode->node_type == GA_NODE_INTERPOLATE_DIVERG);
6305  bool intrpl_test(pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST ||
6306  pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST ||
6307  pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST ||
6308  pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST);
6309 
6310  if (intrpl || intrpl_test ||
6311  pnode->node_type == GA_NODE_INTERPOLATE_FILTER ||
6312  pnode->node_type == GA_NODE_INTERPOLATE_X ||
6313  pnode->node_type == GA_NODE_INTERPOLATE_NORMAL) {
6314  interpolates[pnode->interpolate_name].size();
6315  if (intrpl || intrpl_test) {
6316  if (workspace.variable_group_exists(pnode->name))
6317  interpolates[pnode->interpolate_name].insert(pnode->name);
6318  }
6319  found = true;
6320  }
6321  if (pnode->node_type == GA_NODE_INTERPOLATE_DERIVATIVE) {
6322  interpolates_der.insert(pnode->interpolate_name_der);
6323  interpolates[pnode->interpolate_name_der].size();
6324  if (workspace.variable_group_exists(pnode->name))
6325  interpolates[pnode->interpolate_name_der].insert(pnode->name);
6326  }
6327  for (size_type i = 0; i < pnode->children.size(); ++i)
6328  found = ga_node_used_interpolates(pnode->children[i], workspace,
6329  interpolates, interpolates_der)
6330  || found;
6331  return found;
6332  }
6333 
6334 
6335  static void ga_compile_interpolate_trans
6336  (const pga_tree_node pnode, const ga_workspace &workspace,
6337  ga_instruction_set &gis, ga_instruction_set::region_mim_instructions &rmi,
6338  const mesh &m) {
6339 
6340  std::set<std::string> interpolates_der;
6341  std::map<std::string, std::set<std::string> > transformations;
6342  ga_node_used_interpolates(pnode, workspace, transformations,
6343  interpolates_der);
6344 
6345  for (const auto &transformation : transformations) {
6346  const std::string &transname = transformation.first;
6347  bool compute_der = (interpolates_der.count(transname) != 0);
6348  if (rmi.transformations.count(transname) == 0 ||
6349  (compute_der && rmi.transformations_der.count(transname) == 0)) {
6350  rmi.transformations[transname].size();
6351  gis.transformations.insert(transname);
6352  if (compute_der) rmi.transformations_der.insert(transname);
6353  pga_instruction pgai;
6354  if (transname.compare("neighbour_elt") == 0) {
6355  pgai = std::make_shared<ga_instruction_neighbour_transformation_call>
6356  (workspace, rmi.interpolate_infos[transname],
6357  workspace.interpolate_transformation(transname), gis.ctx,
6358  gis.Normal, m, gis.ipt, gis.pai, gis.gp_pool,
6359  gis.neighbour_corresp);
6360  } else {
6361  pgai = std::make_shared<ga_instruction_transformation_call>
6362  (workspace, rmi.interpolate_infos[transname],
6363  workspace.interpolate_transformation(transname), gis.ctx,
6364  gis.Normal, m, compute_der);
6365  }
6366  if (pgai) rmi.instructions.push_back(std::move(pgai));
6367  }
6368 
6369  for (const std::string &nodename : transformation.second) {
6370  if (rmi.transformations[transname].count(nodename) == 0) {
6371  auto&& inin = rmi.interpolate_infos[transname];
6372  pga_instruction pgai =
6373  std::make_shared<ga_instruction_update_group_info>
6374  (workspace, gis, inin, nodename, inin.groups_info[nodename]);
6375  rmi.instructions.push_back(std::move(pgai));
6376  rmi.transformations[transname].insert(nodename);
6377  }
6378  }
6379  }
6380  }
6381 
6382  void ga_compile_interpolation(ga_workspace &workspace,
6383  ga_instruction_set &gis) {
6384  gis.transformations.clear();
6385  gis.whole_instructions.clear();
6386  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
6387  const ga_workspace::tree_description &td = workspace.tree_info(i);
6388  if (td.interpolation > 0) {
6389  gis.trees.push_back(*(td.ptree));
6390 
6391  // Semantic analysis mainly to evaluate fixed size variables and data
6392  const mesh *m = td.m;
6393  GMM_ASSERT1(m, "Internal error");
6394  ga_semantic_analysis(gis.trees.back(), workspace, *m,
6395  ref_elt_dim_of_mesh(*m), true, false);
6396  pga_tree_node root = gis.trees.back().root;
6397  if (root) {
6398  // Compile tree
6399  ga_instruction_set::region_mim rm(td.mim, td.rg);
6400  ga_instruction_set::region_mim_instructions &rmi
6401  = gis.whole_instructions[rm];
6402  rmi.m = td.m;
6403  rmi.im = td.mim;
6404  // rmi.interpolate_infos.clear();
6405  ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
6406  ga_compile_node(root, workspace, gis,rmi, *(td.m), false,
6407  rmi.current_hierarchy);
6408 
6409  // After compile tree
6410  workspace.assembled_tensor() = root->tensor();
6411  pga_instruction pgai = std::make_shared<ga_instruction_add_to>
6412  (workspace.assembled_tensor(), root->tensor());
6413  rmi.instructions.push_back(std::move(pgai));
6414  }
6415  }
6416  }
6417  }
6418 
6419  void ga_compile(ga_workspace &workspace,
6420  ga_instruction_set &gis, size_type order) {
6421  gis.transformations.clear();
6422  gis.whole_instructions.clear();
6423  for (size_type version : std::array<size_type, 3>{1, 0, 2}) {
6424  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
6425  ga_workspace::tree_description &td = workspace.tree_info(i);
6426 
6427  if ((version == td.interpolation) &&
6428  ((version == 0 && td.order == order) || // Assembly
6429  ((version > 0 && (td.order == size_type(-1) || // Assignment
6430  td.order == size_type(-2) - order))))) {
6431  ga_tree *added_tree = 0;
6432  if (td.interpolation) {
6433  gis.interpolation_trees.push_back(*(td.ptree));
6434  added_tree = &(gis.interpolation_trees.back());
6435  } else {
6436  gis.trees.push_back(*(td.ptree));
6437  added_tree = &(gis.trees.back());
6438  }
6439 
6440  // Semantic analysis mainly to evaluate fixed size variables and data
6441  ga_semantic_analysis(*added_tree, workspace,
6442  td.mim->linked_mesh(),
6443  ref_elt_dim_of_mesh(td.mim->linked_mesh()),
6444  true, false);
6445  pga_tree_node root = added_tree->root;
6446  if (root) {
6447  // Compile tree
6448  // cout << "Will compile "; ga_print_node(root, cout); cout << endl;
6449 
6450  ga_instruction_set::region_mim rm(td.mim, td.rg);
6451  ga_instruction_set::region_mim_instructions &rmi
6452  = gis.whole_instructions[rm];
6453  rmi.m = td.m;
6454  rmi.im = td.mim;
6455  // rmi.interpolate_infos.clear();
6456  ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
6457  ga_compile_node(root, workspace, gis, rmi, *(td.m), false,
6458  rmi.current_hierarchy);
6459  // cout << "compilation finished "; ga_print_node(root, cout);
6460  // cout << endl;
6461 
6462  if (version > 0) { // Assignment OR interpolation
6463  if(td.varname_interpolation.size() != 0) {// assignment
6464  auto *imd
6465  = workspace.associated_im_data(td.varname_interpolation);
6466  auto &V = const_cast<model_real_plain_vector &>
6467  (workspace.value(td.varname_interpolation));
6468  GMM_ASSERT1(imd, "Internal error");
6469  auto pgai = std::make_shared<ga_instruction_assignment>
6470  (root->tensor(), V, gis.ctx, imd);
6471  rmi.instructions.push_back(std::move(pgai));
6472  }
6473  } else { // Addition of an assembly instruction
6474  pga_instruction pgai;
6475  switch(order) {
6476  case 0:
6477  workspace.assembled_tensor() = root->tensor();
6478  pgai = std::make_shared<ga_instruction_add_to_coeff>
6479  (workspace.assembled_tensor(), root->tensor(), gis.coeff);
6480  break;
6481  case 1:
6482  {
6483  GMM_ASSERT1(root->tensor_proper_size() == 1,
6484  "Invalid vector or tensor quantity. An order 1 "
6485  "weak form has to be a scalar quantity");
6486  const mesh_fem *mf=workspace.associated_mf(root->name_test1);
6487  const mesh_fem **mfg = 0;
6488  add_interval_to_gis(workspace, root->name_test1, gis);
6489 
6490  if (mf) {
6491  const std::string &intn1 = root->interpolate_name_test1;
6492  const gmm::sub_interval *Ir = 0, *In = 0;
6493  if (intn1.size() &&
6494  workspace.variable_group_exists(root->name_test1)) {
6495  ga_instruction_set::variable_group_info &vgi =
6496  rmi.interpolate_infos[intn1]
6497  .groups_info[root->name_test1];
6498  Ir = &(vgi.Ir);
6499  In = &(vgi.In);
6500  mfg = &(vgi.mf);
6501  mf = 0;
6502  } else {
6503  Ir = &(gis.var_intervals[root->name_test1]);
6504  In = &(workspace.interval_of_variable(root->name_test1));
6505  }
6506  fem_interpolation_context &ctx
6507  = intn1.size() ? rmi.interpolate_infos[intn1].ctx
6508  : gis.ctx;
6509  bool interpolate
6510  = (!intn1.empty() && intn1.compare("neighbour_elt")!=0);
6511  pgai = std::make_shared<ga_instruction_fem_vector_assembly>
6512  (root->tensor(), workspace.unreduced_vector(),
6513  workspace.assembled_vector(), ctx, *Ir, *In, mf, mfg,
6514  gis.coeff, gis.nbpt, gis.ipt, interpolate);
6515  } else {
6516  pgai = std::make_shared<ga_instruction_vector_assembly>
6517  (root->tensor(), workspace.assembled_vector(),
6518  workspace.interval_of_variable(root->name_test1),
6519  gis.coeff);
6520  }
6521  }
6522  break;
6523  case 2:
6524  {
6525  GMM_ASSERT1(root->tensor_proper_size() == 1,
6526  "Invalid vector or tensor quantity. An order 2 "
6527  "weak form has to be a scalar quantity");
6528  const mesh_fem *mf1=workspace.associated_mf(root->name_test1);
6529  const mesh_fem *mf2=workspace.associated_mf(root->name_test2);
6530  const mesh_fem **mfg1 = 0, **mfg2 = 0;
6531  const std::string &intn1 = root->interpolate_name_test1;
6532  const std::string &intn2 = root->interpolate_name_test2;
6533  fem_interpolation_context &ctx1
6534  = intn1.empty() ? gis.ctx
6535  : rmi.interpolate_infos[intn1].ctx;
6536  fem_interpolation_context &ctx2
6537  = intn2.empty() ? gis.ctx
6538  : rmi.interpolate_infos[intn2].ctx;
6539  bool interpolate
6540  = (!intn1.empty() && intn1.compare("neighbour_elt")!=0)
6541  || (!intn2.empty() && intn2.compare("neighbour_elt")!=0);
6542 
6543  add_interval_to_gis(workspace, root->name_test1, gis);
6544  add_interval_to_gis(workspace, root->name_test2, gis);
6545 
6546  const gmm::sub_interval *Ir1 = 0, *In1 = 0, *Ir2 = 0, *In2=0;
6547  const scalar_type *alpha1 = 0, *alpha2 = 0;
6548 
6549  if (!intn1.empty() &&
6550  workspace.variable_group_exists(root->name_test1)) {
6551  ga_instruction_set::variable_group_info &vgi =
6552  rmi.interpolate_infos[intn1]
6553  .groups_info[root->name_test1];
6554  Ir1 = &(vgi.Ir);
6555  In1 = &(vgi.In);
6556  mfg1 = &(vgi.mf);
6557  mf1 = 0;
6558  alpha1 = &(vgi.alpha);
6559  } else {
6560  alpha1 = &(workspace.factor_of_variable(root->name_test1));
6561  Ir1 = &(gis.var_intervals[root->name_test1]);
6562  In1 = &(workspace.interval_of_variable(root->name_test1));
6563  }
6564 
6565  if (!intn2.empty() &&
6566  workspace.variable_group_exists(root->name_test2)) {
6567  ga_instruction_set::variable_group_info &vgi =
6568  rmi.interpolate_infos[intn2]
6569  .groups_info[root->name_test2];
6570  Ir2 = &(vgi.Ir);
6571  In2 = &(vgi.In);
6572  mfg2 = &(vgi.mf);
6573  mf2 = 0;
6574  alpha2 = &(vgi.alpha);
6575  } else {
6576  alpha2 = &(workspace.factor_of_variable(root->name_test2));
6577  Ir2 = &(gis.var_intervals[root->name_test2]);
6578  In2 = &(workspace.interval_of_variable(root->name_test2));
6579  }
6580 
6581  if (!interpolate && mfg1 == 0 && mfg2 == 0 && mf1 && mf2
6582  && mf1->get_qdim() == 1 && mf2->get_qdim() == 1
6583  && !(mf1->is_reduced()) && !(mf2->is_reduced())) {
6584  pgai = std::make_shared
6585  <ga_instruction_matrix_assembly_standard_scalar<>>
6586  (root->tensor(), workspace.assembled_matrix(), ctx1, ctx2,
6587  *In1, *In2, mf1, mf2,
6588  gis.coeff, *alpha1, *alpha2, gis.nbpt, gis.ipt);
6589  } else if (!interpolate && mfg1 == 0 && mfg2==0 && mf1 && mf2
6590  && !(mf1->is_reduced()) && !(mf2->is_reduced())) {
6591  if (root->sparsity() == 10 && root->t.qdim()==2)
6592  pgai = std::make_shared
6593  <ga_instruction_matrix_assembly_standard_vector_opt10_2>
6594  (root->tensor(), workspace.assembled_matrix(),ctx1,ctx2,
6595  *In1, *In2, mf1, mf2,
6596  gis.coeff, *alpha1, *alpha2, gis.nbpt, gis.ipt);
6597  else if (root->sparsity() == 10 && root->t.qdim()==3)
6598  pgai = std::make_shared
6599  <ga_instruction_matrix_assembly_standard_vector_opt10_3>
6600  (root->tensor(), workspace.assembled_matrix(),ctx1,ctx2,
6601  *In1, *In2, mf1, mf2,
6602  gis.coeff, *alpha1, *alpha2, gis.nbpt, gis.ipt);
6603  else
6604  pgai = std::make_shared
6605  <ga_instruction_matrix_assembly_standard_vector<>>
6606  (root->tensor(), workspace.assembled_matrix(),ctx1,ctx2,
6607  *In1, *In2, mf1, mf2,
6608  gis.coeff, *alpha1, *alpha2, gis.nbpt, gis.ipt);
6609 
6610  } else {
6611  pgai = std::make_shared<ga_instruction_matrix_assembly<>>
6612  (root->tensor(), workspace.unreduced_matrix(),
6613  workspace.assembled_matrix(), ctx1, ctx2,
6614  *Ir1, *In1, *Ir2, *In2, mf1, mfg1, mf2, mfg2,
6615  gis.coeff, *alpha1, *alpha2, gis.nbpt, gis.ipt,
6616  interpolate);
6617  }
6618  break;
6619  }
6620  }
6621  if (pgai)
6622  gis.whole_instructions[rm].instructions.push_back
6623  (std::move(pgai));
6624  }
6625  }
6626  }
6627  }
6628  }
6629  }
6630 
6631 
6632  //=========================================================================
6633  // Execution of a compiled set of assembly terms
6634  //=========================================================================
6635 
6636 
6637  void ga_function_exec(ga_instruction_set &gis) {
6638 
6639  for (auto &&instr : gis.whole_instructions) {
6640  ga_instruction_list &gil = instr.second.instructions;
6641  for (size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
6642  }
6643  }
6644 
6645  void ga_interpolation_exec(ga_instruction_set &gis,
6646  ga_workspace &workspace,
6647  ga_interpolation_context &gic) {
6648  base_matrix G;
6649  base_small_vector un, up;
6650 
6651  for (const std::string &t : gis.transformations)
6652  workspace.interpolate_transformation(t)->init(workspace);
6653 
6654  for (auto &&instr : gis.whole_instructions) {
6655 
6656  const getfem::mesh_im &mim = *(instr.first.mim());
6657  const mesh_region &region = *(instr.first.region());
6658  const getfem::mesh &m = *(instr.second.m);
6659  GMM_ASSERT1(&m == &(gic.linked_mesh()),
6660  "Incompatibility of meshes in interpolation");
6661  ga_instruction_list &gilb = instr.second.begin_instructions;
6662  ga_instruction_list &gile = instr.second.elt_instructions;
6663  ga_instruction_list &gil = instr.second.instructions;
6664 
6665  // iteration on elements (or faces of elements)
6666  std::vector<size_type> ind;
6667  auto pai_old = papprox_integration{};
6668  for (getfem::mr_visitor v(region, m, true); !v.finished(); ++v) {
6669  if (gic.use_mim()) {
6670  if (!mim.convex_index().is_in(v.cv())) continue;
6671  gis.pai = mim.int_method_of_element(v.cv())->approx_method();
6672  } else
6673  gis.pai = 0;
6674 
6675  ind.resize(0);
6676  bgeot::pstored_point_tab pspt
6677  = gic.ppoints_for_element(v.cv(), v.f(), ind);
6678 
6679  if (pspt.get() && ind.size() && pspt->size()) {
6680  m.points_of_convex(v.cv(), G);
6681  bgeot::pgeometric_trans pgt = m.trans_of_convex(v.cv());
6682  up.resize(G.nrows());
6683  un.resize(pgt->dim());
6684 
6685  if (gis.ctx.have_pgp() && gis.ctx.pgt() == pgt && pai_old == gis.pai) {
6686  gis.ctx.change(gis.ctx.pgp(), 0, 0, G, v.cv(), v.f());
6687  } else {
6688  if (!(gic.use_pgp(v.cv()))) {
6689  gis.ctx.change(pgt, 0, (*pspt)[0], G, v.cv(), v.f());
6690  } else {
6691  gis.ctx.change(gis.gp_pool(pgt, pspt), 0, 0, G, v.cv(), v.f());
6692  }
6693  }
6694  pai_old = gis.pai;
6695 
6696  if (gis.need_elt_size)
6697  gis.elt_size = m.convex_radius_estimate(v.cv()) * scalar_type(2);
6698 
6699  // iterations on interpolation points
6700  gis.nbpt = pspt->size();
6701  for (size_type ii = 0; ii < ind.size(); ++ii) {
6702  gis.ipt = ii;
6703  if (gis.ctx.have_pgp()) gis.ctx.set_ii(ind[ii]);
6704  else gis.ctx.set_xref((*pspt)[gis.ipt]);
6705 
6706  if (ii == 0 || !(pgt->is_linear())) {
6707  // Computation of unit normal vector in case of a boundary
6708  if (v.f() != short_type(-1)) {
6709  const base_matrix& B = gis.ctx.B();
6710  gmm::copy(pgt->normals()[v.f()], un);
6711  gmm::mult(B, un, up);
6712  scalar_type nup = gmm::vect_norm2(up);
6713  gmm::scale(up,1.0/nup);
6714  gmm::clean(up, 1e-13);
6715  gis.Normal = up;
6716  } else gis.Normal.resize(0);
6717  }
6718  gmm::clear(workspace.assembled_tensor().as_vector());
6719  if (ii == 0) {
6720  for (size_type j = 0; j < gilb.size(); ++j) j += gilb[j]->exec();
6721  for (size_type j = 0; j < gile.size(); ++j) j += gile[j]->exec();
6722  }
6723  for (size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
6724  gic.store_result(v.cv(), ind[ii], workspace.assembled_tensor());
6725  }
6726  }
6727  }
6728  }
6729  for (const std::string &t : gis.transformations)
6730  workspace.interpolate_transformation(t)->finalize();
6731 
6732  gic.finalize();
6733  }
6734 
6735  void ga_interpolation_single_point_exec
6736  (ga_instruction_set &gis, ga_workspace &workspace,
6737  const fem_interpolation_context &ctx_x, const base_small_vector &Normal,
6738  const mesh &interp_mesh) {
6739  gis.ctx = ctx_x;
6740  gis.Normal = Normal;
6741  gmm::clear(workspace.assembled_tensor().as_vector());
6742  gis.nbpt = 1;
6743  gis.ipt = 0;
6744  gis.pai = 0;
6745 
6746  for (auto &&instr : gis.whole_instructions) {
6747  const getfem::mesh &m = *(instr.second.m);
6748  GMM_ASSERT1(&m == &interp_mesh,
6749  "Incompatibility of meshes in interpolation");
6750  ga_instruction_list &gilb = instr.second.begin_instructions;
6751  for (size_type j = 0; j < gilb.size(); ++j) j += gilb[j]->exec();
6752  ga_instruction_list &gile = instr.second.elt_instructions;
6753  for (size_type j = 0; j < gile.size(); ++j) j+=gile[j]->exec();
6754  ga_instruction_list &gil = instr.second.instructions;
6755  for (size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
6756  }
6757  }
6758 
6759  void ga_exec(ga_instruction_set &gis, ga_workspace &workspace) {
6760  base_matrix G;
6761  base_small_vector un;
6762  scalar_type J(0);
6763 
6764  for (const std::string &t : gis.transformations)
6765  workspace.interpolate_transformation(t)->init(workspace);
6766 
6767  for (const auto &instr : gis.whole_instructions) {
6768  const getfem::mesh_im &mim = *(instr.first.mim());
6769  const getfem::mesh &m = *(instr.second.m);
6770  GMM_ASSERT1(&m == &(mim.linked_mesh()), "Incompatibility of meshes");
6771  const ga_instruction_list &gilb = instr.second.begin_instructions;
6772  const ga_instruction_list &gile = instr.second.elt_instructions;
6773  const ga_instruction_list &gil = instr.second.instructions;
6774 
6775  // if (gilb.size()) cout << "Begin instructions\n";
6776  // for (size_type j = 0; j < gilb.size(); ++j)
6777  // cout << typeid(*(gilb[j])).name() << endl;
6778  // if (gile.size()) cout << "\nElement instructions\n";
6779  // for (size_type j = 0; j < gile.size(); ++j)
6780  // cout << typeid(*(gile[j])).name() << endl;
6781  // cout << "\nGauss pt instructions\n";
6782  // for (size_type j = 0; j < gil.size(); ++j)
6783  // cout << typeid(*(gil[j])).name() << endl;
6784 
6785  const mesh_region &region = *(instr.first.region());
6786 
6787  // iteration on elements (or faces of elements)
6788  size_type old_cv = size_type(-1);
6789  bgeot::pgeometric_trans pgt = 0, pgt_old = 0;
6790  pintegration_method pim = 0;
6791  papprox_integration pai = 0;
6792  bgeot::pstored_point_tab pspt = 0, old_pspt = 0;
6793  bgeot::pgeotrans_precomp pgp = 0;
6794  bool first_gp = true;
6795  for (getfem::mr_visitor v(region, m, true); !v.finished(); ++v) {
6796  if (mim.convex_index().is_in(v.cv())) {
6797  // cout << "proceed with elt " << v.cv() << " face " << v.f() << endl;
6798  if (v.cv() != old_cv) {
6799  pgt = m.trans_of_convex(v.cv());
6800  pim = mim.int_method_of_element(v.cv());
6801  m.points_of_convex(v.cv(), G);
6802 
6803  if (pim->type() == IM_NONE) continue;
6804  GMM_ASSERT1(pim->type() == IM_APPROX, "Sorry, exact methods cannot "
6805  "be used in high level generic assembly");
6806  pai = pim->approx_method();
6807  pspt = pai->pintegration_points();
6808  if (pspt->size()) {
6809  if (pgp && gis.pai == pai && pgt_old == pgt) {
6810  gis.ctx.change(pgp, 0, 0, G, v.cv(), v.f());
6811  } else {
6812  if (pai->is_built_on_the_fly()) {
6813  gis.ctx.change(pgt, 0, (*pspt)[0], G, v.cv(), v.f());
6814  pgp = 0;
6815  } else {
6816  pgp = gis.gp_pool(pgt, pspt);
6817  gis.ctx.change(pgp, 0, 0, G, v.cv(), v.f());
6818  }
6819  pgt_old = pgt; gis.pai = pai;
6820  }
6821  if (gis.need_elt_size)
6822  gis.elt_size = convex_radius_estimate(pgt, G)*scalar_type(2);
6823  }
6824  old_cv = v.cv();
6825  } else {
6826  if (pim->type() == IM_NONE) continue;
6827  gis.ctx.set_face_num(v.f());
6828  }
6829  if (pspt != old_pspt) { first_gp = true; old_pspt = pspt; }
6830  if (pspt->size()) {
6831  // iterations on Gauss points
6832  gis.nbpt = pai->nb_points_on_convex();
6833  size_type first_ind = 0;
6834  if (v.f() != short_type(-1)) {
6835  gis.nbpt = pai->nb_points_on_face(v.f());
6836  first_ind = pai->ind_first_point_on_face(v.f());
6837  }
6838  for (gis.ipt = 0; gis.ipt < gis.nbpt; ++(gis.ipt)) {
6839  // cout << "Gauss pt " << gis.ipt << endl;
6840  if (pgp) gis.ctx.set_ii(first_ind+gis.ipt);
6841  else gis.ctx.set_xref((*pspt)[first_ind+gis.ipt]);
6842  if (gis.ipt == 0 || !(pgt->is_linear())) {
6843  J = gis.ctx.J();
6844  // Computation of unit normal vector in case of a boundary
6845  if (v.f() != short_type(-1)) {
6846  gis.Normal.resize(G.nrows());
6847  un.resize(pgt->dim());
6848  gmm::copy(pgt->normals()[v.f()], un);
6849  gmm::mult(gis.ctx.B(), un, gis.Normal);
6850  scalar_type nup = gmm::vect_norm2(gis.Normal);
6851  J *= nup;
6852  gmm::scale(gis.Normal, 1.0/nup);
6853  gmm::clean(gis.Normal, 1e-13);
6854  } else gis.Normal.resize(0);
6855  }
6856  auto ipt_coeff = pai->coeff(first_ind+gis.ipt);
6857  gis.coeff = J * ipt_coeff;
6858  bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
6859  workspace.include_empty_int_points());
6860  if (!enable_ipt) gis.coeff = scalar_type(0);
6861  if (first_gp) {
6862  for (size_type j = 0; j < gilb.size(); ++j) j+=gilb[j]->exec();
6863  first_gp = false;
6864  }
6865  if (gis.ipt == 0) {
6866  for (size_type j = 0; j < gile.size(); ++j) j+=gile[j]->exec();
6867  }
6868  if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
6869  for (size_type j = 0; j < gil.size(); ++j) j+=gil[j]->exec();
6870  }
6871  GA_DEBUG_INFO("");
6872  }
6873  }
6874  }
6875  }
6876  GA_DEBUG_INFO("-----------------------------");
6877  }
6878  for (const std::string &t : gis.transformations)
6879  workspace.interpolate_transformation(t)->finalize();
6880  }
6881 
6882 
6883 } /* end of namespace */
virtual pintegration_method int_method_of_element(size_type cv) const
return the integration method associated with an element (in no integration is associated, the function will crash! use the convex_index() of the mesh_im to check that a fem is associated to a given convex)
void slice_vector_on_basic_dof_of_element(const mesh_fem &mf, const VEC1 &vec, size_type cv, VEC2 &coeff, size_type qmult1=size_type(-1), size_type qmult2=size_type(-1))
Given a mesh_fem.
The object geotrans_precomp_pool Allow to allocate a certain number of geotrans_precomp and automatic...
Point tab storage.
Describe a mesh (collection of convexes (elements) and points).
Definition: getfem_mesh.h:95
does the inversion of the geometric transformation for a given convex
Describe an integration method linked to a mesh.
Semantic analysis of assembly trees and semantic manipulations.
static T & instance()
Instance from the current thread.
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norm2(const V &v)
Euclidean norm of a vector.
Definition: gmm_blas.h:557
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:49
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norminf(const V &v)
Infinity norm of a vector.
Definition: gmm_blas.h:693
virtual scalar_type convex_radius_estimate(size_type ic) const
Return an estimate of the convex largest dimension.
Definition: getfem_mesh.cc:420
a subclass of mesh_im which is conformal to a number of level sets.
bool invert(const base_node &n, base_node &n_ref, scalar_type IN_EPS=1e-12)
given the node on the real element, returns the node on the reference element (even if it is outside ...
"iterator" class for regions.
sparse vector built upon std::vector.
Definition: gmm_def.h:488
GEneric Tool for Finite Element Methods.
scalar_type APIDECL convex_radius_estimate(bgeot::pgeometric_trans pgt, const base_matrix &pts)
rough estimate of the radius of the convex using the largest eigenvalue of the jacobian of the geomet...
Definition: getfem_mesh.cc:756
std::shared_ptr< const getfem::virtual_fem > pfem
type of pointer on a fem description
Definition: getfem_fem.h:239
void clear(L &l)
clear (fill with zeros) a vector or matrix.
Definition: gmm_blas.h:59
gmm::uint16_type short_type
used as the common short type integer in the library
Definition: bgeot_config.h:79
base_small_vector compute_normal(const geotrans_interpolation_context &c, size_type face)
norm of returned vector is the ratio between the face surface on the real element and the face surfac...
const dal::bit_vector & convex_index(void) const
Get the set of convexes where an integration method has been assigned.
const mesh & linked_mesh() const
Give a reference to the linked mesh of type mesh.
Compilation and execution operations.
size_type alpha(short_type n, short_type d)
Return the value of which is the number of monomials of a polynomial of variables and degree ...
Definition: bgeot_poly.cc:46
std::shared_ptr< const bgeot::geometric_trans > pgeometric_trans
pointer type for a geometric transformation