25 #include "getfem/getfem_generic_assembly_compile_and_exec.h" 26 #include "getfem/getfem_generic_assembly_functions_and_operators.h" 31 #define GA_DEBUG_INFO(a) 38 bool operator <(
const gauss_pt_corresp &gpc1,
39 const gauss_pt_corresp &gpc2) {
40 if (gpc1.pai != gpc2.pai)
41 return (gpc1.pai < gpc2.pai );
42 if (gpc1.nodes.size() != gpc2.nodes.size())
43 return (gpc1.nodes.size() < gpc2.nodes.size());
44 for (
size_type i = 0; i < gpc1.nodes.size(); ++i)
45 if (gpc1.nodes[i] != gpc2.nodes[i])
46 return (gpc1.nodes[i] < gpc2.nodes[i]);
47 if (gpc1.pgt1 != gpc2.pgt1)
48 return (gpc1.pgt1 < gpc2.pgt1);
49 if (gpc1.pgt2 != gpc2.pgt2)
50 return (gpc1.pgt2 < gpc2.pgt2);
58 struct ga_instruction_extract_local_im_data :
public ga_instruction {
61 papprox_integration &pai;
63 const fem_interpolation_context &ctx;
66 GA_DEBUG_INFO(
"Instruction: extract local im data");
70 GMM_ASSERT1(imd.linked_mesh_im().int_method_of_element(cv)
71 ->approx_method() == pai,
"Im data have to be used only " 72 "on their original integration method.");
74 size_type ipt = imd.filtered_index_of_point(cv, ctx.ii());
76 "Im data with no data on the current integration point.");
77 auto it = U.begin()+ipt*qdim;
78 std::copy(it, it+qdim, t.begin());
81 ga_instruction_extract_local_im_data
82 (base_tensor &t_,
const im_data &imd_,
const base_vector &U_,
83 papprox_integration &pai_,
const fem_interpolation_context &ctx_,
85 : t(t_), imd(imd_), pai(pai_), U(U_), ctx(ctx_), qdim(qdim_),
90 struct ga_instruction_slice_local_dofs :
public ga_instruction {
93 const fem_interpolation_context &ctx;
97 GA_DEBUG_INFO(
"Instruction: Slice local dofs");
98 GMM_ASSERT1(qmult1 != 0 && qmult2 != 0,
"Internal error");
100 coeff, qmult1, qmult2);
103 ga_instruction_slice_local_dofs(
const mesh_fem &mf_,
const base_vector &U_,
104 const fem_interpolation_context &ctx_,
107 : mf(mf_), U(U_), ctx(ctx_), coeff(coeff_),
108 qmult1(qmult1_), qmult2(qmult2_) {}
111 struct ga_instruction_update_pfp :
public ga_instruction {
113 const fem_interpolation_context &ctx;
114 fem_precomp_pool &fp_pool;
118 GA_DEBUG_INFO(
"Instruction: Pfp update");
119 if (ctx.have_pgp()) {
121 ? ctx.convex_num() : mf.convex_index().first_true();
122 pfem pf = mf.fem_of_element(cv);
123 if (!pfp || pf != pfp->get_pfem() ||
124 ctx.pgp()->get_ppoint_tab() != pfp->get_ppoint_tab()) {
125 pfp = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
133 ga_instruction_update_pfp(
const mesh_fem &mf_, pfem_precomp &pfp_,
134 const fem_interpolation_context &ctx_,
135 fem_precomp_pool &fp_pool_)
136 : mf(mf_), ctx(ctx_), fp_pool(fp_pool_), pfp(pfp_) {}
139 struct ga_instruction_first_ind_tensor :
public ga_instruction {
141 const fem_interpolation_context &ctx;
143 const mesh_fem *mfn, **mfg;
146 GA_DEBUG_INFO(
"Instruction: adapt first index of tensor");
147 const mesh_fem &mf = *(mfg ? *mfg : mfn);
148 GA_DEBUG_ASSERT(mfg ? *mfg : mfn,
"Internal error");
149 size_type cv_1 = ctx.is_convex_num_valid()
150 ? ctx.convex_num() : mf.convex_index().first_true();
151 pfem pf = mf.fem_of_element(cv_1);
152 GMM_ASSERT1(pf,
"An element without finite element method defined");
153 size_type Qmult = qdim / pf->target_dim();
155 if (t.sizes()[0] != s)
156 { bgeot::multi_index mi = t.sizes(); mi[0] = s; t.adjust_sizes(mi); }
160 ga_instruction_first_ind_tensor(base_tensor &t_,
161 const fem_interpolation_context &ctx_,
163 const mesh_fem **mfg_)
164 : t(t_), ctx(ctx_), qdim(qdim_), mfn(mfn_), mfg(mfg_) {}
167 struct ga_instruction_second_ind_tensor
168 :
public ga_instruction_first_ind_tensor {
171 GA_DEBUG_INFO(
"Instruction: adapt second index of tensor");
172 const mesh_fem &mf = *(mfg ? *mfg : mfn);
173 size_type cv_1 = ctx.is_convex_num_valid()
174 ? ctx.convex_num() : mf.convex_index().first_true();
175 pfem pf = mf.fem_of_element(cv_1);
176 GMM_ASSERT1(pf,
"An element without finite element methode defined");
177 size_type Qmult = qdim / pf->target_dim();
179 if (t.sizes()[1] != s)
180 { bgeot::multi_index mi = t.sizes(); mi[1] = s; t.adjust_sizes(mi); }
184 ga_instruction_second_ind_tensor(base_tensor &t_,
185 fem_interpolation_context &ctx_,
187 const mesh_fem **mfg_)
188 : ga_instruction_first_ind_tensor(t_, ctx_, qdim_, mfn_, mfg_) {}
192 struct ga_instruction_two_first_ind_tensor :
public ga_instruction {
194 const fem_interpolation_context &ctx1, &ctx2;
196 const mesh_fem *mfn1, **mfg1;
198 const mesh_fem *mfn2, **mfg2;
201 GA_DEBUG_INFO(
"Instruction: adapt two first indices of tensor");
202 const mesh_fem &mf1 = *(mfg1 ? *mfg1 : mfn1);
203 const mesh_fem &mf2 = *(mfg2 ? *mfg2 : mfn2);
204 size_type cv_1 = ctx1.is_convex_num_valid()
205 ? ctx1.convex_num() : mf1.convex_index().first_true();
206 size_type cv_2 = ctx2.is_convex_num_valid()
207 ? ctx2.convex_num() : mf2.convex_index().first_true();
208 pfem pf1 = mf1.fem_of_element(cv_1);
209 GMM_ASSERT1(pf1,
"An element without finite element method defined");
210 pfem pf2 = mf2.fem_of_element(cv_2);
211 GMM_ASSERT1(pf2,
"An element without finite element method defined");
212 size_type Qmult1 = qdim1 / pf1->target_dim();
213 size_type s1 = pf1->nb_dof(cv_1) * Qmult1;
214 size_type Qmult2 = qdim2 / pf2->target_dim();
215 size_type s2 = pf2->nb_dof(cv_2) * Qmult2;
216 if (t.sizes()[0] != s1 || t.sizes()[1] != s2) {
217 bgeot::multi_index mi = t.sizes();
218 mi[0] = s1; mi[1] = s2;
224 ga_instruction_two_first_ind_tensor
225 (base_tensor &t_,
const fem_interpolation_context &ctx1_,
226 const fem_interpolation_context &ctx2_,
227 size_type qdim1_,
const mesh_fem *mfn1_,
const mesh_fem **mfg1_,
228 size_type qdim2_,
const mesh_fem *mfn2_,
const mesh_fem **mfg2_)
229 : t(t_), ctx1(ctx1_), ctx2(ctx2_), qdim1(qdim1_), mfn1(mfn1_),
230 mfg1(mfg1_), qdim2(qdim2_), mfn2(mfn2_), mfg2(mfg2_) {}
234 struct ga_instruction_X_component :
public ga_instruction {
236 const fem_interpolation_context &ctx;
240 GA_DEBUG_INFO(
"Instruction: X component");
245 ga_instruction_X_component
246 (scalar_type &t_,
const fem_interpolation_context &ctx_,
size_type n_)
247 : t(t_), ctx(ctx_), n(n_) {}
250 struct ga_instruction_X :
public ga_instruction {
252 const fem_interpolation_context &ctx;
255 GA_DEBUG_INFO(
"Instruction: X");
256 GA_DEBUG_ASSERT(t.size() == ctx.xreal().size(),
"dimensions mismatch");
257 gmm::copy(ctx.xreal(), t.as_vector());
261 ga_instruction_X(base_tensor &t_,
const fem_interpolation_context &ctx_)
262 : t(t_), ctx(ctx_) {}
265 struct ga_instruction_copy_small_vect :
public ga_instruction {
267 const base_small_vector &vec;
270 GA_DEBUG_INFO(
"Instruction: copy small vector");
271 GMM_ASSERT1(t.size() == vec.size(),
"Invalid vector size.");
272 gmm::copy(vec, t.as_vector());
275 ga_instruction_copy_small_vect(base_tensor &t_,
276 const base_small_vector &vec_)
277 : t(t_), vec(vec_) {}
280 struct ga_instruction_copy_Normal :
public ga_instruction_copy_small_vect {
283 GA_DEBUG_INFO(
"Instruction: unit normal vector");
284 GMM_ASSERT1(t.size() == vec.size(),
"Invalid outward unit normal " 285 "vector. Possible reasons: not on boundary or " 286 "transformation failed.");
287 gmm::copy(vec, t.as_vector());
290 ga_instruction_copy_Normal(base_tensor &t_,
291 const base_small_vector &Normal_)
292 : ga_instruction_copy_small_vect(t_, Normal_) {}
295 struct ga_instruction_level_set_normal_vector :
public ga_instruction {
297 const mesh_im_level_set *mimls;
298 const fem_interpolation_context &ctx;
299 base_small_vector vec;
302 GA_DEBUG_INFO(
"Instruction: unit normal vector to a level-set");
303 mimls->compute_normal_vector(ctx, vec);
304 GMM_ASSERT1(t.size() == vec.size(),
"Invalid outward unit normal " 305 "vector. Possible reasons: not on boundary or " 306 "transformation failed.");
307 gmm::copy(vec, t.as_vector());
310 ga_instruction_level_set_normal_vector
311 (base_tensor &t_,
const mesh_im_level_set *mimls_,
312 const fem_interpolation_context &ctx_)
313 : t(t_), mimls(mimls_), ctx(ctx_), vec(t.size()) {}
316 struct ga_instruction_element_size :
public ga_instruction {
321 GA_DEBUG_INFO(
"Instruction: element_size");
322 GMM_ASSERT1(t.size() == 1,
"Invalid element size.");
326 ga_instruction_element_size(base_tensor &t_, scalar_type &es_)
330 struct ga_instruction_element_K :
public ga_instruction {
332 const fem_interpolation_context &ctx;
335 GA_DEBUG_INFO(
"Instruction: element_K");
336 GMM_ASSERT1(t.size() == (ctx.K()).size(),
"Invalid tensor size.");
337 gmm::copy(ctx.K().as_vector(), t.as_vector());
340 ga_instruction_element_K(base_tensor &t_,
341 const fem_interpolation_context &ct)
345 struct ga_instruction_element_B :
public ga_instruction {
347 const fem_interpolation_context &ctx;
350 GA_DEBUG_INFO(
"Instruction: element_B");
351 GMM_ASSERT1(t.size() == (ctx.B()).size(),
"Invalid tensor size.");
352 gmm::copy(ctx.B().as_vector(), t.as_vector());
355 ga_instruction_element_B(base_tensor &t_,
356 const fem_interpolation_context &ct)
360 struct ga_instruction_val_base :
public ga_instruction {
362 fem_interpolation_context &ctx;
364 const pfem_precomp &pfp;
367 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
372 if (ctx.have_pgp()) ctx.pfp_base_value(t, pfp);
374 ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
375 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
381 ga_instruction_val_base(base_tensor &tt, fem_interpolation_context &ct,
382 const mesh_fem &mf_,
const pfem_precomp &pfp_)
383 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
386 struct ga_instruction_xfem_plus_val_base :
public ga_instruction {
388 fem_interpolation_context &ctx;
393 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
394 if (ctx.have_pgp()) ctx.set_pfp(pfp);
395 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
396 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
397 int old_xfem_side = ctx.xfem_side();
398 ctx.set_xfem_side(1);
400 ctx.set_xfem_side(old_xfem_side);
404 ga_instruction_xfem_plus_val_base(base_tensor &tt,
405 fem_interpolation_context &ct,
406 const mesh_fem &mf_, pfem_precomp &pfp_)
407 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
410 struct ga_instruction_xfem_minus_val_base :
public ga_instruction {
412 fem_interpolation_context &ctx;
417 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
418 if (ctx.have_pgp()) ctx.set_pfp(pfp);
419 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
420 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
421 int old_xfem_side = ctx.xfem_side();
422 ctx.set_xfem_side(-1);
424 ctx.set_xfem_side(old_xfem_side);
428 ga_instruction_xfem_minus_val_base
429 (base_tensor &tt, fem_interpolation_context &ct,
430 const mesh_fem &mf_, pfem_precomp &pfp_)
431 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
434 struct ga_instruction_grad_base :
public ga_instruction_val_base {
437 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
442 if (ctx.have_pgp()) ctx.pfp_grad_base_value(t, pfp);
444 ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
445 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
446 ctx.grad_base_value(t);
451 ga_instruction_grad_base(base_tensor &tt, fem_interpolation_context &ct,
452 const mesh_fem &mf_, pfem_precomp &pfp_)
453 : ga_instruction_val_base(tt, ct, mf_, pfp_)
457 struct ga_instruction_xfem_plus_grad_base :
public ga_instruction_val_base {
460 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
461 if (ctx.have_pgp()) ctx.set_pfp(pfp);
462 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
463 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
464 int old_xfem_side = ctx.xfem_side();
465 ctx.set_xfem_side(1);
466 ctx.grad_base_value(t);
467 ctx.set_xfem_side(old_xfem_side);
471 ga_instruction_xfem_plus_grad_base
472 (base_tensor &tt, fem_interpolation_context &ct,
473 const mesh_fem &mf_, pfem_precomp &pfp_)
474 : ga_instruction_val_base(tt, ct, mf_, pfp_)
478 struct ga_instruction_xfem_minus_grad_base :
public ga_instruction_val_base {
481 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
482 if (ctx.have_pgp()) ctx.set_pfp(pfp);
483 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
484 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
485 int old_xfem_side = ctx.xfem_side();
486 ctx.set_xfem_side(-1);
487 ctx.grad_base_value(t);
488 ctx.set_xfem_side(old_xfem_side);
492 ga_instruction_xfem_minus_grad_base
493 (base_tensor &tt, fem_interpolation_context &ct,
494 const mesh_fem &mf_, pfem_precomp &pfp_)
495 : ga_instruction_val_base(tt, ct, mf_, pfp_)
500 struct ga_instruction_hess_base :
public ga_instruction_val_base {
503 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
504 if (ctx.have_pgp()) ctx.set_pfp(pfp);
505 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
506 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
507 ctx.hess_base_value(t);
511 ga_instruction_hess_base(base_tensor &tt, fem_interpolation_context &ct,
512 const mesh_fem &mf_, pfem_precomp &pfp_)
513 : ga_instruction_val_base(tt, ct, mf_, pfp_)
517 struct ga_instruction_xfem_plus_hess_base :
public ga_instruction_val_base {
520 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
521 if (ctx.have_pgp()) ctx.set_pfp(pfp);
522 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
523 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
524 int old_xfem_side = ctx.xfem_side();
525 ctx.set_xfem_side(1);
526 ctx.hess_base_value(t);
527 ctx.set_xfem_side(old_xfem_side);
531 ga_instruction_xfem_plus_hess_base
532 (base_tensor &tt, fem_interpolation_context &ct,
533 const mesh_fem &mf_, pfem_precomp &pfp_)
534 : ga_instruction_val_base(tt, ct, mf_, pfp_)
538 struct ga_instruction_xfem_minus_hess_base :
public ga_instruction_val_base {
541 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
542 if (ctx.have_pgp()) ctx.set_pfp(pfp);
543 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
544 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
545 int old_xfem_side = ctx.xfem_side();
546 ctx.set_xfem_side(-1);
547 ctx.hess_base_value(t);
548 ctx.set_xfem_side(old_xfem_side);
552 ga_instruction_xfem_minus_hess_base
553 (base_tensor &tt, fem_interpolation_context &ct,
554 const mesh_fem &mf_, pfem_precomp &pfp_)
555 : ga_instruction_val_base(tt, ct, mf_, pfp_)
559 struct ga_instruction_val :
public ga_instruction {
562 const base_tensor &Z;
563 const base_vector &coeff;
567 GA_DEBUG_INFO(
"Instruction: variable value");
569 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
570 GA_DEBUG_ASSERT(t.size() == qdim,
"dimensions mismatch");
573 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
574 "Wrong size for coeff vector");
575 auto itc = coeff.begin();
auto itZ = Z.begin();
576 a = (*itc++) * (*itZ++);
577 while (itc != coeff.end()) a += (*itc++) * (*itZ++);
580 if (target_dim == 1) {
581 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
582 "Wrong size for coeff vector");
583 auto itc = coeff.begin();
auto itZ = Z.begin();
584 for (
auto it = t.begin(); it != t.end(); ++it)
585 *it = (*itc++) * (*itZ);
587 for (
size_type j = 1; j < ndof; ++j, ++itZ) {
588 for (
auto it = t.begin(); it != t.end(); ++it)
589 *it += (*itc++) * (*itZ);
593 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
594 "Wrong size for coeff vector");
597 auto itc = coeff.begin();
600 for (
size_type q = 0; q < Qmult; ++q, ++itc) {
601 for (
size_type r = 0; r < target_dim; ++r)
602 *it++ += (*itc) * Z[j + r*ndof];
610 ga_instruction_val(base_tensor &tt,
const base_tensor &Z_,
612 : a(tt[0]), t(tt), Z(Z_), coeff(co), qdim(q) {}
615 struct ga_instruction_grad :
public ga_instruction_val {
618 GA_DEBUG_INFO(
"Instruction: gradient");
620 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
623 GA_DEBUG_ASSERT(t.size() == N,
"dimensions mismatch");
624 GA_DEBUG_ASSERT(coeff.size() == ndof,
"Wrong size for coeff vector");
625 auto itZ = Z.begin();
626 for (
auto it = t.begin(); it != t.end(); ++it) {
627 auto itc = coeff.begin();
628 *it = (*itc++) * (*itZ++);
629 while (itc != coeff.end()) *it += (*itc++) * (*itZ++);
633 if (target_dim == 1) {
634 GA_DEBUG_ASSERT(t.size() == N*qdim,
"dimensions mismatch");
635 GA_DEBUG_ASSERT(coeff.size() == ndof*qdim,
636 "Wrong size for coeff vector");
638 auto itZ = Z.begin();
auto it = t.begin() + q;
641 auto itc = coeff.begin() + q;
642 *it = (*itc) * (*itZ++);
644 { itc += qdim; *it += (*itc) * (*itZ++); }
649 GA_DEBUG_ASSERT(t.size() == N*qdim,
"dimensions mismatch");
650 GA_DEBUG_ASSERT(coeff.size() == ndof*Qmult,
651 "Wrong size for coeff vector");
654 auto itZ = Z.begin();
656 for (
size_type r = 0; r < target_dim; ++r)
658 t[r + q*target_dim + k*qdim] += coeff[j*Qmult+q] * (*itZ++);
665 ga_instruction_grad(base_tensor &tt,
const base_tensor &Z_,
667 : ga_instruction_val(tt, Z_, co, q)
672 struct ga_instruction_hess :
public ga_instruction_val {
675 GA_DEBUG_INFO(
"Instruction: Hessian");
677 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
678 size_type NN = gmm::sqr(t.sizes().back());
679 GA_DEBUG_ASSERT(NN == Z.sizes()[2],
"Internal error");
681 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
682 "Wrong size for coeff vector");
683 auto it = Z.begin();
auto itt = t.begin();
684 for (
size_type kl = 0; kl < NN; ++kl, ++itt) {
685 *itt = scalar_type(0);
686 for (
auto itc = coeff.begin(); itc != coeff.end(); ++itc, ++it)
687 *itt += (*itc) * (*it);
689 GMM_ASSERT1(itt == t.end(),
"dimensions mismatch");
692 if (target_dim == 1) {
693 GA_DEBUG_ASSERT(t.size() == NN*qdim,
"dimensions mismatch");
694 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
695 "Wrong size for coeff vector");
698 base_tensor::const_iterator it = Z.begin();
700 for (
size_type j = 0; j < ndof; ++j, ++it)
701 t[q + kl*qdim] += coeff[j*qdim+q] * (*it);
705 GA_DEBUG_ASSERT(t.size() == NN*qdim,
"dimensions mismatch");
706 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
707 "Wrong size for coeff vector");
710 base_tensor::const_iterator it = Z.begin();
712 for (
size_type r = 0; r < target_dim; ++r)
713 for (
size_type j = 0; j < ndof; ++j, ++it)
714 t[r + q*target_dim + kl*qdim] += coeff[j*Qmult+q] * (*it);
721 ga_instruction_hess(base_tensor &tt,
const base_tensor &Z_,
723 : ga_instruction_val(tt, Z_, co, q)
727 struct ga_instruction_diverg :
public ga_instruction_val {
730 GA_DEBUG_INFO(
"Instruction: divergence");
732 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
736 GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
737 "Dimensions mismatch for divergence operator");
738 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
739 "Wrong size for coeff vector");
741 t[0] = scalar_type(0);
742 base_tensor::const_iterator it = Z.begin();
745 if (k) it += (N*ndof + 1);
748 t[0] += coeff[j] * (*it);
756 t[0] += coeff[j*N+k] * (*it);
762 ga_instruction_diverg(base_tensor &tt,
const base_tensor &Z_,
764 : ga_instruction_val(tt, Z_, co, q)
768 struct ga_instruction_copy_val_base :
public ga_instruction {
770 const base_tensor &Z;
774 GA_DEBUG_INFO(
"Instruction: value of test functions");
776 GA_DEBUG_ASSERT(t.size() == Z.size(),
"Wrong size for base vector");
777 std::copy(Z.begin(), Z.end(), t.begin());
782 std::copy(Z.begin(), Z.end(), t.begin());
784 if (target_dim == 1) {
786 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
787 "Wrong size for base vector");
788 std::fill(t.begin(), t.end(), scalar_type(0));
789 auto itZ = Z.begin();
794 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
798 for (
size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
802 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
803 "Wrong size for base vector");
804 std::fill(t.begin(), t.end(), scalar_type(0));
805 auto itZ = Z.begin();
806 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
809 for (
size_type k = 0; k < target_dim; ++k) {
810 auto it = t.begin() + (ss * k);
811 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
816 { it2 += sss; *it2 = *itZ; }
825 ga_instruction_copy_val_base(base_tensor &tt,
const base_tensor &Z_,
829 struct ga_instruction_copy_grad_base :
public ga_instruction_copy_val_base {
832 GA_DEBUG_INFO(
"Instruction: gradient of test functions");
834 std::copy(Z.begin(), Z.end(), t.begin());
839 std::copy(Z.begin(), Z.end(), t.begin());
841 if (target_dim == 1) {
844 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
845 "Wrong size for gradient vector");
846 std::fill(t.begin(), t.end(), scalar_type(0));
847 base_tensor::const_iterator itZ = Z.begin();
848 size_type s = t.sizes()[0], sss = s+1, ssss = s*target_dim*Qmult;
852 base_tensor::iterator it = t.begin() + (ssss*l);
853 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
855 base_tensor::iterator it2 = it;
857 for (
size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
863 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
864 "Wrong size for gradient vector");
865 std::fill(t.begin(), t.end(), scalar_type(0));
866 base_tensor::const_iterator itZ = Z.begin();
867 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
872 for (
size_type k = 0; k < target_dim; ++k) {
873 base_tensor::iterator it = t.begin() + (ss * k + ssss*l);
874 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
876 base_tensor::iterator it2 = it;
878 for (
size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
887 ga_instruction_copy_grad_base(base_tensor &tt,
const base_tensor &Z_,
889 : ga_instruction_copy_val_base(tt,Z_,q) {}
892 struct ga_instruction_copy_vect_val_base :
public ga_instruction {
894 const base_tensor &Z;
898 GA_DEBUG_INFO(
"Instruction: vectorized value of test functions");
901 GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
902 "Wrong size for base vector");
904 auto itZ = Z.begin();
909 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
913 for (
size_type j = 1; j < qdim; ++j) { it2 += sss; *it2 = *itZ; }
918 ga_instruction_copy_vect_val_base(base_tensor &tt,
const base_tensor &Z_,
922 struct ga_instruction_copy_vect_grad_base
923 :
public ga_instruction_copy_vect_val_base {
926 GA_DEBUG_INFO(
"Instruction: vectorized gradient of test functions");
929 GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
930 "Wrong size for gradient vector");
932 base_tensor::const_iterator itZ = Z.begin();
933 size_type s = t.sizes()[0], sss = s+1, ssss = s*qdim;
937 base_tensor::iterator it = t.begin() + (ssss*l);
938 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
940 base_tensor::iterator it2 = it;
942 for (
size_type j = 1; j < qdim; ++j) { it2+=sss; *it2=*itZ; }
948 ga_instruction_copy_vect_grad_base(base_tensor &tt,
const base_tensor &Z_,
950 : ga_instruction_copy_vect_val_base(tt,Z_,q) {}
953 struct ga_instruction_copy_hess_base :
public ga_instruction_copy_val_base {
956 GA_DEBUG_INFO(
"Instruction: Hessian of test functions");
960 gmm::copy(Z.as_vector(), t.as_vector());
963 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
964 "Wrong size for Hessian vector");
966 base_tensor::const_iterator itZ = Z.begin();
967 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
970 size_type NNdim = Z.sizes()[2]*target_dim;
971 for (
size_type klm = 0; klm < NNdim; ++klm) {
972 base_tensor::iterator it = t.begin() + (ss * klm);
973 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
975 base_tensor::iterator it2 = it;
977 for (
size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
984 ga_instruction_copy_hess_base(base_tensor &tt,
const base_tensor &Z_,
986 : ga_instruction_copy_val_base(tt, Z_, q) {}
989 struct ga_instruction_copy_diverg_base :
public ga_instruction_copy_val_base {
992 GA_DEBUG_INFO(
"Instruction: divergence of test functions");
997 GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
998 "Dimensions mismatch for divergence operator");
999 GA_DEBUG_ASSERT(t.size() == ndof * Qmult,
1000 "Wrong size for divergence vector");
1002 base_tensor::const_iterator itZ = Z.begin();
1006 base_tensor::iterator it = t.begin();
1007 if (l) itZ += target_dim*ndof+1;
1009 if (i) { ++it; ++itZ; }
1016 base_tensor::iterator it = t.begin() + j;
1019 if (i) { it += Qmult; ++itZ; }
1027 ga_instruction_copy_diverg_base(base_tensor &tt,
const base_tensor &Z_,
1029 : ga_instruction_copy_val_base(tt, Z_, q) {}
1032 struct ga_instruction_elementary_transformation {
1033 const base_vector &coeff_in;
1034 base_vector coeff_out;
1035 pelementary_transformation elemtrans;
1037 const fem_interpolation_context &ctx;
1039 const mesh_fem **mf_M;
1042 void do_transformation() {
1043 size_type nn = gmm::vect_size(coeff_in);
1044 if (M.size() == 0 || icv != ctx.convex_num() || &mf != *mf_M) {
1045 M.base_resize(nn, nn);
1046 *mf_M = &mf; icv = ctx.convex_num();
1047 elemtrans->give_transformation(mf, icv, M);
1049 coeff_out.resize(nn);
1050 gmm::mult(M, coeff_in, coeff_out);
1053 ga_instruction_elementary_transformation
1054 (
const base_vector &co, pelementary_transformation e,
1055 const mesh_fem &mf_,
const fem_interpolation_context &ctx_,
1056 base_matrix &M_,
const mesh_fem **mf_M_,
size_type &icv_)
1057 : coeff_in(co), elemtrans(e), mf(mf_), ctx(ctx_),
1058 M(M_), mf_M(mf_M_), icv(icv_) {}
1059 ~ga_instruction_elementary_transformation() {};
1062 struct ga_instruction_elementary_transformation_val
1063 :
public ga_instruction_val, ga_instruction_elementary_transformation {
1065 virtual int exec() {
1066 GA_DEBUG_INFO(
"Instruction: variable value with elementary " 1068 do_transformation();
1069 return ga_instruction_val::exec();
1072 ga_instruction_elementary_transformation_val
1073 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1074 pelementary_transformation e,
const mesh_fem &mf_,
1075 fem_interpolation_context &ctx_, base_matrix &M_,
1076 const mesh_fem **mf_M_,
size_type &icv_)
1077 : ga_instruction_val(tt, Z_, coeff_out, q),
1078 ga_instruction_elementary_transformation(co, e, mf_, ctx_, M_,
1082 struct ga_instruction_elementary_transformation_grad
1083 :
public ga_instruction_grad, ga_instruction_elementary_transformation {
1085 virtual int exec() {
1086 GA_DEBUG_INFO(
"Instruction: gradient with elementary transformation");
1087 do_transformation();
1088 return ga_instruction_grad::exec();
1091 ga_instruction_elementary_transformation_grad
1092 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1093 pelementary_transformation e,
const mesh_fem &mf_,
1094 fem_interpolation_context &ctx_, base_matrix &M_,
1095 const mesh_fem **mf_M_,
size_type &icv_)
1096 : ga_instruction_grad(tt, Z_, coeff_out, q),
1097 ga_instruction_elementary_transformation(co, e, mf_, ctx_, M_,
1101 struct ga_instruction_elementary_transformation_hess
1102 :
public ga_instruction_hess, ga_instruction_elementary_transformation {
1104 virtual int exec() {
1105 GA_DEBUG_INFO(
"Instruction: Hessian with elementary transformation");
1106 do_transformation();
1107 return ga_instruction_hess::exec();
1110 ga_instruction_elementary_transformation_hess
1111 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1112 pelementary_transformation e,
const mesh_fem &mf_,
1113 fem_interpolation_context &ctx_, base_matrix &M_,
1114 const mesh_fem **mf_M_,
size_type &icv_)
1115 : ga_instruction_hess(tt, Z_, coeff_out, q),
1116 ga_instruction_elementary_transformation(co, e, mf_, ctx_, M_,
1120 struct ga_instruction_elementary_transformation_diverg
1121 :
public ga_instruction_diverg, ga_instruction_elementary_transformation {
1123 virtual int exec() {
1124 GA_DEBUG_INFO(
"Instruction: divergence with elementary transformation");
1125 do_transformation();
1126 return ga_instruction_diverg::exec();
1129 ga_instruction_elementary_transformation_diverg
1130 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1131 pelementary_transformation e,
const mesh_fem &mf_,
1132 fem_interpolation_context &ctx_, base_matrix &M_,
1133 const mesh_fem **mf_M_,
size_type &icv_)
1134 : ga_instruction_diverg(tt, Z_, coeff_out, q),
1135 ga_instruction_elementary_transformation(co, e, mf_, ctx_, M_,
1139 struct ga_instruction_update_group_info :
public ga_instruction {
1140 const ga_workspace &workspace;
1141 ga_instruction_set &gis;
1142 ga_instruction_set::interpolate_info &inin;
1143 const std::string gname;
1144 ga_instruction_set::variable_group_info &vgi;
1146 virtual int exec() {
1147 GA_DEBUG_INFO(
"Instruction: Update group info for "+gname);
1149 &(workspace.associated_mf(*(vgi.varname))->linked_mesh())==inin.m)
1151 const std::string &varname
1152 = inin.m ? workspace.variable_in_group(gname, *(inin.m))
1153 : workspace.first_variable_of_group(gname);
1154 vgi.mf = workspace.associated_mf(varname);
1155 vgi.Ir = gis.var_intervals[varname];
1156 vgi.In = workspace.interval_of_variable(varname);
1157 vgi.alpha = workspace.factor_of_variable(varname);
1158 vgi.U = gis.extended_vars[varname];
1159 vgi.varname = &varname;
1163 ga_instruction_update_group_info
1164 (
const ga_workspace &workspace_, ga_instruction_set &gis_,
1165 ga_instruction_set::interpolate_info &inin_,
const std::string &gname_,
1166 ga_instruction_set::variable_group_info &vgi_) :
1167 workspace(workspace_), gis(gis_), inin(inin_), gname(gname_),
1171 struct ga_instruction_interpolate_filter :
public ga_instruction {
1173 const ga_instruction_set::interpolate_info &inin;
1177 virtual int exec() {
1178 GA_DEBUG_INFO(
"Instruction: interpolated filter");
1179 if ((pt_type ==
size_type(-1) && inin.pt_type) ||
1180 (pt_type !=
size_type(-1) && inin.pt_type == pt_type)) {
1181 GA_DEBUG_INFO(
"Instruction: interpolated filter: pass");
1185 GA_DEBUG_INFO(
"Instruction: interpolated filter: filtered");
1192 ga_instruction_interpolate_filter
1193 (base_tensor &t_,
const ga_instruction_set::interpolate_info &inin_,
1195 : t(t_), inin(inin_), pt_type(ind_), nb(nb_) {}
1199 struct ga_instruction_interpolate :
public ga_instruction {
1202 const mesh_fem *mfn, **mfg;
1203 const base_vector *Un, **Ug;
1204 fem_interpolation_context &ctx;
1208 fem_precomp_pool &fp_pool;
1209 ga_instruction_set::interpolate_info &inin;
1211 virtual int exec() {
1212 GMM_ASSERT1(ctx.is_convex_num_valid(),
"No valid element for the " 1213 "transformation. Probably transformation failed");
1214 const mesh_fem &mf = *(mfg ? *mfg : mfn);
1215 const base_vector &U = *(Ug ? *Ug : Un);
1216 GMM_ASSERT1(&(mf.linked_mesh()) == *m,
"Interpolation of a variable " 1217 "on another mesh than the one it is defined on");
1219 pfem pf = mf.fem_of_element(ctx.convex_num());
1220 GMM_ASSERT1(pf,
"Undefined finite element method");
1221 if (ctx.have_pgp()) {
1223 inin.pfps[&mf] = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
1224 ctx.set_pfp(inin.pfps[&mf]);
1231 ga_instruction_interpolate
1232 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1233 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1235 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1236 : t(tt), m(m_), mfn(mfn_), mfg(mfg_), Un(Un_), Ug(Ug_),
1237 ctx(ctx_), qdim(q), ipt(ipt_), fp_pool(fp_pool_), inin(inin_) {}
1240 struct ga_instruction_interpolate_val :
public ga_instruction_interpolate {
1242 virtual int exec() {
1243 GA_DEBUG_INFO(
"Instruction: interpolated variable value");
1244 ga_instruction_interpolate::exec();
1245 ctx.pf()->interpolation(ctx, coeff, t.as_vector(), dim_type(qdim));
1250 ga_instruction_interpolate_val
1251 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1252 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1254 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1255 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_,ctx_, q, ipt_,
1260 struct ga_instruction_interpolate_grad :
public ga_instruction_interpolate {
1262 virtual int exec() {
1263 GA_DEBUG_INFO(
"Instruction: interpolated variable grad");
1264 ga_instruction_interpolate::exec();
1265 base_matrix v(qdim, ctx.N());
1266 ctx.pf()->interpolation_grad(ctx, coeff, v, dim_type(qdim));
1267 gmm::copy(v.as_vector(), t.as_vector());
1271 ga_instruction_interpolate_grad
1272 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1273 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1275 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1276 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1281 struct ga_instruction_interpolate_hess :
public ga_instruction_interpolate {
1283 virtual int exec() {
1284 GA_DEBUG_INFO(
"Instruction: interpolated variable hessian");
1285 ga_instruction_interpolate::exec();
1286 base_matrix v(qdim, ctx.N()*ctx.N());
1287 ctx.pf()->interpolation_hess(ctx, coeff, v, dim_type(qdim));
1288 gmm::copy(v.as_vector(), t.as_vector());
1292 ga_instruction_interpolate_hess
1293 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1294 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1296 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1297 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1302 struct ga_instruction_interpolate_diverg :
public ga_instruction_interpolate {
1304 virtual int exec() {
1305 GA_DEBUG_INFO(
"Instruction: interpolated variable divergence");
1306 ga_instruction_interpolate::exec();
1307 ctx.pf()->interpolation_diverg(ctx, coeff, t[0]);
1311 ga_instruction_interpolate_diverg
1312 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1313 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1315 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1316 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1321 struct ga_instruction_interpolate_base {
1324 const mesh_fem *mfn, **mfg;
1326 ga_instruction_set::interpolate_info &inin;
1327 fem_precomp_pool &fp_pool;
1329 virtual int exec() {
1330 GMM_ASSERT1(inin.ctx.is_convex_num_valid(),
"No valid element for " 1331 "the transformation. Probably transformation failed");
1332 const mesh_fem &mf = *(mfg ? *mfg : mfn);
1333 GMM_ASSERT1(&(mf.linked_mesh()) == *m,
"Interpolation of a variable " 1334 "on another mesh than the one it is defined on");
1336 pfem pf = mf.fem_of_element(inin.ctx.convex_num());
1337 GMM_ASSERT1(pf,
"Undefined finite element method");
1339 if (inin.ctx.have_pgp()) {
1341 inin.pfps[&mf] = fp_pool(pf, inin.ctx.pgp()->get_ppoint_tab());
1342 inin.ctx.set_pfp(inin.pfps[&mf]);
1344 inin.ctx.set_pf(pf);
1349 ga_instruction_interpolate_base
1350 (
const mesh **m_,
const mesh_fem *mfn_,
const mesh_fem **mfg_,
1351 const size_type &ipt_, ga_instruction_set::interpolate_info &inin_,
1352 fem_precomp_pool &fp_pool_)
1353 : m(m_), mfn(mfn_), mfg(mfg_), ipt(ipt_), inin(inin_),
1354 fp_pool(fp_pool_) {}
1357 struct ga_instruction_interpolate_val_base
1358 :
public ga_instruction_copy_val_base, ga_instruction_interpolate_base {
1360 virtual int exec() {
1361 GA_DEBUG_INFO(
"Instruction: interpolated base value");
1362 ga_instruction_interpolate_base::exec();
1363 inin.ctx.pf()->real_base_value(inin.ctx, ZZ);
1364 return ga_instruction_copy_val_base::exec();
1367 ga_instruction_interpolate_val_base
1368 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1370 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1371 : ga_instruction_copy_val_base(t_, ZZ, q),
1372 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1376 struct ga_instruction_interpolate_grad_base
1377 :
public ga_instruction_copy_grad_base, ga_instruction_interpolate_base {
1379 virtual int exec() {
1380 GA_DEBUG_INFO(
"Instruction: interpolated base grad");
1381 ga_instruction_interpolate_base::exec();
1382 inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ);
1383 return ga_instruction_copy_grad_base::exec();
1386 ga_instruction_interpolate_grad_base
1387 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1389 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1390 : ga_instruction_copy_grad_base(t_, ZZ, q),
1391 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1395 struct ga_instruction_interpolate_hess_base
1396 :
public ga_instruction_copy_hess_base, ga_instruction_interpolate_base {
1398 virtual int exec() {
1399 GA_DEBUG_INFO(
"Instruction: interpolated base hessian");
1400 ga_instruction_interpolate_base::exec();
1401 inin.ctx.pf()->real_hess_base_value(inin.ctx, ZZ);
1402 return ga_instruction_copy_hess_base::exec();
1405 ga_instruction_interpolate_hess_base
1406 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1408 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1409 : ga_instruction_copy_hess_base(t_, ZZ, q),
1410 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1414 struct ga_instruction_interpolate_diverg_base
1415 :
public ga_instruction_copy_diverg_base, ga_instruction_interpolate_base {
1417 virtual int exec() {
1418 GA_DEBUG_INFO(
"Instruction: interpolated base divergence");
1419 ga_instruction_interpolate_base::exec();
1420 inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ);
1421 return ga_instruction_copy_diverg_base::exec();
1424 ga_instruction_interpolate_diverg_base
1425 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1427 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1428 : ga_instruction_copy_diverg_base(t_, ZZ, q),
1429 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1434 struct ga_instruction_elementary_transformation_base {
1437 pelementary_transformation elemtrans;
1439 const fem_interpolation_context &ctx;
1441 const mesh_fem **mf_M;
1445 if (M.size() == 0 || icv != ctx.convex_num() || &mf != *mf_M) {
1446 M.base_resize(n, n);
1447 *mf_M = &mf; icv = ctx.convex_num();
1448 elemtrans->give_transformation(mf, icv, M);
1450 t_out.mat_reduction(t_in, M, 0);
1453 ga_instruction_elementary_transformation_base
1454 (base_tensor &t_, pelementary_transformation e,
const mesh_fem &mf_,
1455 const fem_interpolation_context &ctx_, base_matrix &M_,
1456 const mesh_fem **mf_M_,
size_type &icv_)
1457 : t_out(t_), elemtrans(e), mf(mf_), ctx(ctx_),
1458 M(M_), mf_M(mf_M_), icv(icv_) {}
1461 struct ga_instruction_elementary_transformation_val_base
1462 :
public ga_instruction_copy_val_base,
1463 ga_instruction_elementary_transformation_base {
1465 virtual int exec() {
1466 GA_DEBUG_INFO(
"Instruction: value of test functions with elementary " 1470 t_in.adjust_sizes(t_out.sizes());
1471 ga_instruction_copy_val_base::exec();
1472 do_transformation(ndof*Qmult);
1476 ga_instruction_elementary_transformation_val_base
1477 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1478 pelementary_transformation e,
const mesh_fem &mf_,
1479 fem_interpolation_context &ctx_, base_matrix &M_,
1480 const mesh_fem **mf_M_,
size_type &icv_)
1481 : ga_instruction_copy_val_base(t_in, Z_, q),
1482 ga_instruction_elementary_transformation_base(t_, e, mf_, ctx_, M_,
1486 struct ga_instruction_elementary_transformation_grad_base
1487 :
public ga_instruction_copy_grad_base,
1488 ga_instruction_elementary_transformation_base {
1490 virtual int exec() {
1491 GA_DEBUG_INFO(
"Instruction: gradient of test functions with elementary " 1495 t_in.adjust_sizes(t_out.sizes());
1496 ga_instruction_copy_grad_base::exec();
1497 do_transformation(ndof*Qmult);
1501 ga_instruction_elementary_transformation_grad_base
1502 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1503 pelementary_transformation e,
const mesh_fem &mf_,
1504 fem_interpolation_context &ctx_, base_matrix &M_,
1505 const mesh_fem **mf_M_,
size_type &icv_)
1506 : ga_instruction_copy_grad_base(t_in, Z_, q),
1507 ga_instruction_elementary_transformation_base(t_, e, mf_, ctx_, M_,
1511 struct ga_instruction_elementary_transformation_hess_base
1512 :
public ga_instruction_copy_hess_base,
1513 ga_instruction_elementary_transformation_base {
1515 virtual int exec() {
1516 GA_DEBUG_INFO(
"Instruction: Hessian of test functions with elementary " 1520 t_in.adjust_sizes(t_out.sizes());
1521 ga_instruction_copy_hess_base::exec();
1522 do_transformation(ndof*Qmult);
1526 ga_instruction_elementary_transformation_hess_base
1527 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1528 pelementary_transformation e,
const mesh_fem &mf_,
1529 fem_interpolation_context &ctx_, base_matrix &M_,
1530 const mesh_fem **mf_M_,
size_type &icv_)
1531 : ga_instruction_copy_hess_base(t_in, Z_, q),
1532 ga_instruction_elementary_transformation_base(t_, e, mf_, ctx_, M_,
1536 struct ga_instruction_elementary_transformation_diverg_base
1537 :
public ga_instruction_copy_diverg_base,
1538 ga_instruction_elementary_transformation_base {
1540 virtual int exec() {
1541 GA_DEBUG_INFO(
"Instruction: divergence of test functions with elementary " 1545 t_in.adjust_sizes(t_out.sizes());
1546 ga_instruction_copy_diverg_base::exec();
1547 do_transformation(ndof*Qmult);
1551 ga_instruction_elementary_transformation_diverg_base
1552 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1553 pelementary_transformation e,
const mesh_fem &mf_,
1554 fem_interpolation_context &ctx_, base_matrix &M_,
1555 const mesh_fem **mf_M_,
size_type &icv_)
1556 : ga_instruction_copy_diverg_base(t_in, Z_, q),
1557 ga_instruction_elementary_transformation_base(t_, e, mf_, ctx_, M_,
1562 struct ga_instruction_add :
public ga_instruction {
1564 const base_tensor &tc1, &tc2;
1565 virtual int exec() {
1566 GA_DEBUG_INFO(
"Instruction: addition");
1567 GA_DEBUG_ASSERT(t.size() == tc1.size(),
1568 "internal error " << t.size() <<
" != " << tc1.size());
1569 GA_DEBUG_ASSERT(t.size() == tc2.size(),
1570 "internal error " << t.size() <<
" != " << tc2.size());
1571 gmm::add(tc1.as_vector(), tc2.as_vector(), t.as_vector());
1574 ga_instruction_add(base_tensor &t_,
1575 const base_tensor &tc1_,
const base_tensor &tc2_)
1576 : t(t_), tc1(tc1_), tc2(tc2_) {}
1579 struct ga_instruction_add_to :
public ga_instruction {
1581 const base_tensor &tc1;
1582 virtual int exec() {
1583 GA_DEBUG_INFO(
"Instruction: addition");
1584 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"internal error " << t.size()
1585 <<
" incompatible with " << tc1.size());
1586 gmm::add(tc1.as_vector(), t.as_vector());
1589 ga_instruction_add_to(base_tensor &t_,
const base_tensor &tc1_)
1590 : t(t_), tc1(tc1_) {}
1593 struct ga_instruction_add_to_coeff :
public ga_instruction {
1595 const base_tensor &tc1;
1597 virtual int exec() {
1598 GA_DEBUG_INFO(
"Instruction: addition with scale");
1599 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"internal error " << t.size()
1600 <<
" incompatible with " << tc1.size());
1601 gmm::add(gmm::scaled(tc1.as_vector(), coeff), t.as_vector());
1604 ga_instruction_add_to_coeff(base_tensor &t_,
const base_tensor &tc1_,
1605 scalar_type &coeff_)
1606 : t(t_), tc1(tc1_), coeff(coeff_) {}
1609 struct ga_instruction_sub :
public ga_instruction {
1611 const base_tensor &tc1, &tc2;
1612 virtual int exec() {
1613 GA_DEBUG_INFO(
"Instruction: subtraction");
1614 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
1616 gmm::add(tc1.as_vector(), gmm::scaled(tc2.as_vector(), scalar_type(-1)),
1620 ga_instruction_sub(base_tensor &t_,
1621 const base_tensor &tc1_,
const base_tensor &tc2_)
1622 : t(t_), tc1(tc1_), tc2(tc2_) {}
1625 struct ga_instruction_opposite :
public ga_instruction {
1627 virtual int exec() {
1628 GA_DEBUG_INFO(
"Instruction: multiplication with -1");
1629 gmm::scale(t.as_vector(), scalar_type(-1));
1632 ga_instruction_opposite(base_tensor &t_) : t(t_) {}
1635 struct ga_instruction_print_tensor :
public ga_instruction {
1637 pga_tree_node pnode;
1638 const fem_interpolation_context &ctx;
1640 virtual int exec() {
1641 GA_DEBUG_INFO(
"Instruction: tensor print");
1642 cout <<
"Print term "; ga_print_node(pnode, cout);
1643 cout <<
" on Gauss point " << ipt <<
"/" << nbpt <<
" of element " 1644 << ctx.convex_num() <<
": " << t << endl;
1647 ga_instruction_print_tensor(base_tensor &t_, pga_tree_node pnode_,
1648 const fem_interpolation_context &ctx_,
1650 : t(t_), pnode(pnode_), ctx(ctx_), nbpt(nbpt_), ipt(ipt_) {}
1653 struct ga_instruction_copy_tensor :
public ga_instruction {
1655 const base_tensor &tc1;
1656 virtual int exec() {
1657 GA_DEBUG_INFO(
"Instruction: tensor copy");
1658 std::copy(tc1.begin(), tc1.end(), t.begin());
1662 ga_instruction_copy_tensor(base_tensor &t_,
const base_tensor &tc1_)
1663 : t(t_), tc1(tc1_) {}
1666 struct ga_instruction_clear_tensor :
public ga_instruction {
1668 virtual int exec() {
1669 GA_DEBUG_INFO(
"Instruction: clear tensor");
1670 std::fill(t.begin(), t.end(), scalar_type(0));
1673 ga_instruction_clear_tensor(base_tensor &t_) : t(t_) {}
1676 struct ga_instruction_copy_tensor_possibly_void :
public ga_instruction {
1678 const base_tensor &tc1;
1679 virtual int exec() {
1680 GA_DEBUG_INFO(
"Instruction: tensor copy possibly void");
1682 gmm::copy(tc1.as_vector(), t.as_vector());
1687 ga_instruction_copy_tensor_possibly_void(base_tensor &t_,
1688 const base_tensor &tc1_)
1689 : t(t_), tc1(tc1_) {}
1692 struct ga_instruction_copy_scalar :
public ga_instruction {
1693 scalar_type &t;
const scalar_type &t1;
1694 virtual int exec() {
1695 GA_DEBUG_INFO(
"Instruction: scalar copy");
1699 ga_instruction_copy_scalar(scalar_type &t_,
const scalar_type &t1_)
1703 struct ga_instruction_copy_vect :
public ga_instruction {
1705 const base_vector &t1;
1706 virtual int exec() {
1707 GA_DEBUG_INFO(
"Instruction: fixed size tensor copy");
1711 ga_instruction_copy_vect(base_vector &t_,
const base_vector &t1_)
1715 struct ga_instruction_trace :
public ga_instruction {
1717 const base_tensor &tc1;
1720 virtual int exec() {
1721 GA_DEBUG_INFO(
"Instruction: Trace");
1722 GA_DEBUG_ASSERT(t.size()*n*n == tc1.size(),
"Wrong sizes");
1724 auto it = t.begin();
1725 auto it1 = tc1.begin();
1726 for (; it != t.end(); ++it, ++it1) {
1729 for (
size_type i = 1; i < n; ++i) { it2 += s; *it += *it2; }
1734 ga_instruction_trace(base_tensor &t_,
const base_tensor &tc1_,
size_type n_)
1735 : t(t_), tc1(tc1_), n(n_) {}
1738 struct ga_instruction_deviator :
public ga_instruction {
1740 const base_tensor &tc1;
1743 virtual int exec() {
1744 GA_DEBUG_INFO(
"Instruction: Deviator");
1745 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1747 gmm::copy(tc1.as_vector(), t.as_vector());
1751 base_tensor::iterator it = t.begin();
1752 base_tensor::const_iterator it1 = tc1.begin();
1753 for (; j < nb; ++it, ++it1, ++j) {
1755 base_tensor::const_iterator it2 = it1;
1757 for (
size_type i = 1; i < n; ++i) { it2 += s; tr += *it2; }
1758 tr /= scalar_type(n);
1760 base_tensor::iterator it3 = it;
1762 for (
size_type i = 1; i < n; ++i) { it3 += s; *it3 -= tr; }
1767 ga_instruction_deviator(base_tensor &t_,
const base_tensor &tc1_,
1769 : t(t_), tc1(tc1_), n(n_) {}
1772 struct ga_instruction_transpose :
public ga_instruction {
1774 const base_tensor &tc1;
1776 virtual int exec() {
1777 GA_DEBUG_INFO(
"Instruction: transpose");
1778 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1781 auto it = t.begin();
1788 for (
size_type l = 0; l < n0; ++l, ++it)
1793 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1796 ga_instruction_transpose(base_tensor &t_,
const base_tensor &tc1_,
1798 : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1801 struct ga_instruction_swap_indices :
public ga_instruction {
1803 const base_tensor &tc1;
1805 virtual int exec() {
1806 GA_DEBUG_INFO(
"Instruction: swap indices");
1807 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1808 size_type ii1 = t.size() / (nn1*nn2*ii2*ii3);
1810 auto it = t.begin();
1815 size_type ind = j*ii1+k*ii1*nn1+l*ii1*nn1*ii2+i*ii1*nn1*ii2*nn2;
1816 for (
size_type m = 0; m < ii1; ++m, ++it)
1819 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1822 ga_instruction_swap_indices(base_tensor &t_,
const base_tensor &tc1_,
1825 : t(t_), tc1(tc1_), nn1(n1_), nn2(n2_), ii2(i2_), ii3(i3_) {}
1828 struct ga_instruction_index_move_last :
public ga_instruction {
1830 const base_tensor &tc1;
1832 virtual int exec() {
1833 GA_DEBUG_INFO(
"Instruction: swap indices");
1834 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1837 auto it = t.begin();
1841 for (
size_type k = 0; k < ii1; ++k, ++it)
1844 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1847 ga_instruction_index_move_last(base_tensor &t_,
const base_tensor &tc1_,
1849 : t(t_), tc1(tc1_), nn(n_), ii2(i2_) {}
1852 struct ga_instruction_transpose_no_test :
public ga_instruction {
1854 const base_tensor &tc1;
1856 virtual int exec() {
1857 GA_DEBUG_INFO(
"Instruction: transpose");
1858 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1860 auto it = t.begin();
1865 for (
size_type k = 0; k < n2; ++k, ++it)
1866 *it = tc1[s2 + k*n1];
1869 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1872 ga_instruction_transpose_no_test(base_tensor &t_,
const base_tensor &tc1_,
1875 : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1878 struct ga_instruction_transpose_test :
public ga_instruction {
1880 const base_tensor &tc1;
1881 virtual int exec() {
1882 GA_DEBUG_INFO(
"Instruction: copy tensor and transpose test functions");
1883 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1884 GA_DEBUG_ASSERT(t.sizes().size() >= 2,
"Wrong sizes");
1886 size_type s1 = t.sizes()[0], s2 = t.sizes()[1], s3 = s1*s2;
1888 base_tensor::iterator it = t.begin();
1891 for (
size_type i = 0; i < s1; ++i, ++it)
1892 *it = tc1[j+s2*i+k*s3];
1895 ga_instruction_transpose_test(base_tensor &t_,
const base_tensor &tc1_)
1896 : t(t_), tc1(tc1_) {}
1899 struct ga_instruction_sym :
public ga_instruction {
1901 const base_tensor &tc1;
1902 virtual int exec() {
1903 GA_DEBUG_INFO(
"Instruction: symmetric part");
1904 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1906 size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
1910 base_tensor::iterator it = t.begin() + s*(i + s1*j);
1911 base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
1912 it1T = tc1.begin() + s*(j + s2*i);
1913 for (
size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ + *it1T++);
1917 ga_instruction_sym(base_tensor &t_,
const base_tensor &tc1_)
1918 : t(t_), tc1(tc1_) {}
1921 struct ga_instruction_skew :
public ga_instruction {
1923 const base_tensor &tc1;
1924 virtual int exec() {
1925 GA_DEBUG_INFO(
"Instruction: skew-symmetric part");
1926 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1928 size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
1932 base_tensor::iterator it = t.begin() + s*(i + s1*j);
1933 base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
1934 it1T = tc1.begin() + s*(j + s2*i);
1935 for (
size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ - *it1T++);
1939 ga_instruction_skew(base_tensor &t_,
const base_tensor &tc1_)
1940 : t(t_), tc1(tc1_) {}
1943 struct ga_instruction_scalar_add :
public ga_instruction {
1945 const scalar_type &c, &d;
1946 virtual int exec() {
1947 GA_DEBUG_INFO(
"Instruction: scalar addition");
1951 ga_instruction_scalar_add(scalar_type &t_,
const scalar_type &c_,
1952 const scalar_type &d_)
1953 : t(t_), c(c_), d(d_) {}
1956 struct ga_instruction_scalar_sub :
public ga_instruction {
1958 const scalar_type &c, &d;
1959 virtual int exec() {
1960 GA_DEBUG_INFO(
"Instruction: scalar subtraction");
1964 ga_instruction_scalar_sub(scalar_type &t_,
const scalar_type &c_,
1965 const scalar_type &d_)
1966 : t(t_), c(c_), d(d_) {}
1969 struct ga_instruction_scalar_scalar_mult :
public ga_instruction {
1971 const scalar_type &c, &d;
1972 virtual int exec() {
1973 GA_DEBUG_INFO(
"Instruction: scalar multiplication");
1977 ga_instruction_scalar_scalar_mult(scalar_type &t_,
const scalar_type &c_,
1978 const scalar_type &d_)
1979 : t(t_), c(c_), d(d_) {}
1982 struct ga_instruction_scalar_scalar_div :
public ga_instruction {
1984 const scalar_type &c, &d;
1985 virtual int exec() {
1986 GA_DEBUG_INFO(
"Instruction: scalar division");
1990 ga_instruction_scalar_scalar_div(scalar_type &t_,
const scalar_type &c_,
1991 const scalar_type &d_)
1992 : t(t_), c(c_), d(d_) {}
1995 struct ga_instruction_scalar_mult :
public ga_instruction {
1996 base_tensor &t, &tc1;
1997 const scalar_type &c;
1998 virtual int exec() {
1999 GA_DEBUG_INFO(
"Instruction: multiplication of a tensor by a scalar " << c);
2000 gmm::copy(gmm::scaled(tc1.as_vector(), c), t.as_vector());
2003 ga_instruction_scalar_mult(base_tensor &t_, base_tensor &tc1_,
2004 const scalar_type &c_)
2005 : t(t_), tc1(tc1_), c(c_) {}
2008 struct ga_instruction_scalar_div :
public ga_instruction {
2009 base_tensor &t, &tc1;
2010 const scalar_type &c;
2011 virtual int exec() {
2012 GA_DEBUG_INFO(
"Instruction: division of a tensor by a scalar");
2013 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
2015 base_tensor::iterator it = t.begin(), it1 = tc1.begin();
2016 for (; it != t.end(); ++it, ++it1) *it = *it1/c;
2019 ga_instruction_scalar_div(base_tensor &t_, base_tensor &tc1_,
2020 const scalar_type &c_)
2021 : t(t_), tc1(tc1_), c(c_) {}
2024 struct ga_instruction_dotmult :
public ga_instruction {
2025 base_tensor &t, &tc1, &tc2;
2026 virtual int exec() {
2027 GA_DEBUG_INFO(
"Instruction: componentwise multiplication");
2028 size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2029 GA_DEBUG_ASSERT(t.size() == s1_1*s2,
"Wrong sizes");
2031 base_tensor::iterator it = t.begin();
2033 for (
size_type m = 0; m < s1_1; ++m, ++it)
2034 *it = tc1[m+s1_1*i] * tc2[i];
2037 ga_instruction_dotmult(base_tensor &t_, base_tensor &tc1_,
2039 : t(t_), tc1(tc1_), tc2(tc2_) {}
2042 struct ga_instruction_dotdiv :
public ga_instruction {
2043 base_tensor &t, &tc1, &tc2;
2044 virtual int exec() {
2045 GA_DEBUG_INFO(
"Instruction: componentwise division");
2046 size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2047 GA_DEBUG_ASSERT(t.size() == s1_1*s2,
"Wrong sizes");
2049 base_tensor::iterator it = t.begin();
2051 for (
size_type m = 0; m < s1_1; ++m, ++it)
2052 *it = tc1[m+s1_1*i] / tc2[i];
2055 ga_instruction_dotdiv(base_tensor &t_, base_tensor &tc1_,
2057 : t(t_), tc1(tc1_), tc2(tc2_) {}
2061 struct ga_instruction_dotmult_spec :
public ga_instruction {
2062 base_tensor &t, &tc1, &tc2;
2063 virtual int exec() {
2064 GA_DEBUG_INFO(
"Instruction: specific componentwise multiplication");
2065 size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
2068 base_tensor::iterator it = t.begin();
2071 for (
size_type m = 0; m < s1_1; ++m, ++it)
2072 *it = tc1[m+s1_1*i] * tc2[n+s2_1*i];
2073 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2076 ga_instruction_dotmult_spec(base_tensor &t_, base_tensor &tc1_,
2078 : t(t_), tc1(tc1_), tc2(tc2_) {}
2082 struct ga_instruction_contract_1_1 :
public ga_instruction {
2083 base_tensor &t, &tc1;
2085 virtual int exec() {
2086 GA_DEBUG_INFO(
"Instruction: single contraction on a single tensor");
2088 size_type ii1 = tc1.size() / (nn*nn*ii2*ii3);
2090 base_tensor::iterator it = t.begin();
2093 for (
size_type k = 0; k < ii1; ++k, ++it) {
2094 *it = scalar_type(0);
2095 size_type pre_ind = k+j*ii1*nn+i*ii1*nn*ii2*nn;
2097 *it += tc1[pre_ind+n*ii1+n*ii1*nn*ii2];
2100 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2103 ga_instruction_contract_1_1(base_tensor &t_, base_tensor &tc1_,
2105 : t(t_), tc1(tc1_), nn(n_), ii2(i2_), ii3(i3_) {}
2109 struct ga_instruction_contract_2_1 :
public ga_instruction {
2110 base_tensor &t, &tc1, &tc2;
2112 virtual int exec() {
2113 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2115 size_type ift1 = tc1.size() / (nn*ii1*ii2);
2116 size_type ift2 = tc2.size() / (nn*ii3*ii4);
2118 base_tensor::iterator it = t.begin();
2124 for (
size_type q = 0; q < ift1; ++q, ++it) {
2125 *it = scalar_type(0);
2126 size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2127 size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2129 *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2132 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2135 ga_instruction_contract_2_1(base_tensor &t_, base_tensor &tc1_,
2139 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2140 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2144 struct ga_instruction_contract_2_1_rev :
public ga_instruction {
2145 base_tensor &t, &tc1, &tc2;
2147 virtual int exec() {
2148 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2150 size_type ift1 = tc1.size() / (nn*ii1*ii2);
2151 size_type ift2 = tc2.size() / (nn*ii3*ii4);
2153 base_tensor::iterator it = t.begin();
2159 for (
size_type p = 0; p < ift2; ++p, ++it) {
2160 *it = scalar_type(0);
2161 size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2162 size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2164 *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2167 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2170 ga_instruction_contract_2_1_rev(base_tensor &t_, base_tensor &tc1_,
2174 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2175 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2179 struct ga_instruction_contract_2_2 :
public ga_instruction {
2180 base_tensor &t, &tc1, &tc2;
2181 size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2183 virtual int exec() {
2184 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2186 size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2187 size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2189 size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2190 if (inv_tc2) std::swap(sn1, sn2);
2192 base_tensor::iterator it = t.begin();
2200 for (
size_type s = 0; s < ift1; ++s, ++it) {
2201 *it = scalar_type(0);
2203 = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2205 = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2208 *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2209 * tc2[ind2+n1*sn1+n2*sn2];
2212 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2215 ga_instruction_contract_2_2(base_tensor &t_, base_tensor &tc1_,
2221 : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2222 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2227 struct ga_instruction_contract_2_2_rev :
public ga_instruction {
2228 base_tensor &t, &tc1, &tc2;
2229 size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2231 virtual int exec() {
2232 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2234 size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2235 size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2237 size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2238 if (inv_tc2) std::swap(sn1, sn2);
2240 base_tensor::iterator it = t.begin();
2248 for (
size_type r = 0; r < ift2; ++r, ++it) {
2249 *it = scalar_type(0);
2251 = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2253 = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2256 *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2257 * tc2[ind2+n1*sn1+n2*sn2];
2260 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2263 ga_instruction_contract_2_2_rev(base_tensor &t_, base_tensor &tc1_,
2269 : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2270 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2276 struct ga_instruction_matrix_mult :
public ga_instruction {
2277 base_tensor &t, &tc1, &tc2;
2279 virtual int exec() {
2280 GA_DEBUG_INFO(
"Instruction: order one contraction " 2281 "(dot product or matrix multiplication)");
2286 base_tensor::iterator it = t.begin();
2288 for (
size_type i = 0; i < s1; ++i, ++it) {
2289 *it = scalar_type(0);
2291 *it += tc1[i+j*s1] * tc2[j+k*n];
2293 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2296 ga_instruction_matrix_mult(base_tensor &t_, base_tensor &tc1_,
2298 : t(t_), tc1(tc1_), tc2(tc2_), n(n_) {}
2302 struct ga_instruction_matrix_mult_spec :
public ga_instruction {
2303 base_tensor &t, &tc1, &tc2;
2306 virtual int exec() {
2307 GA_DEBUG_INFO(
"Instruction: specific order one contraction " 2308 "(dot product or matrix multiplication)");
2312 base_tensor::iterator it = t.begin();
2316 for (
size_type i = 0; i < q; ++i, ++it) {
2317 *it = scalar_type(0);
2319 *it += tc1[i+k*q+s*q*m] * tc2[j+s*l+r*l*n];
2321 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2324 ga_instruction_matrix_mult_spec(base_tensor &t_, base_tensor &tc1_,
2327 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), m(m_), p(p_) {}
2331 struct ga_instruction_matrix_mult_spec2 :
public ga_instruction {
2332 base_tensor &t, &tc1, &tc2;
2335 virtual int exec() {
2336 GA_DEBUG_INFO(
"Instruction: specific order one contraction " 2337 "(dot product or matrix multiplication)");
2341 base_tensor::iterator it = t.begin();
2345 for (
size_type j = 0; j < l; ++j, ++it) {
2346 *it = scalar_type(0);
2348 *it += tc1[i+k*q+s*q*m] * tc2[j+s*l+r*l*n];
2350 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2353 ga_instruction_matrix_mult_spec2(base_tensor &t_, base_tensor &tc1_,
2356 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), m(m_), p(p_) {}
2360 struct ga_instruction_contraction :
public ga_instruction {
2361 base_tensor &t, &tc1, &tc2;
2363 virtual int exec() {
2364 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << nn);
2366 long m = int(tc1.size()/nn), k =
int(nn), n = int(tc2.size()/nn);
2367 long lda = m, ldb = n, ldc = m;
2368 char T =
'T', N =
'N';
2369 scalar_type
alpha(1), beta(0);
2370 gmm::dgemm_(&N, &T, &m, &n, &k, &alpha, &(tc1[0]), &lda, &(tc2[0]), &ldb,
2371 &beta, &(t[0]), &ldc);
2373 size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2374 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2376 auto it1=tc1.begin(), it2=tc2.begin(), it2end=it2 + s2;
2377 for (
auto it = t.begin(); it != t.end(); ++it) {
2378 auto it11 = it1, it22 = it2;
2379 scalar_type a = (*it11) * (*it22);
2381 { it11 += s1; it22 += s2; a += (*it11) * (*it22); }
2383 ++it2;
if (it2 == it2end) { it2 = tc2.begin(), ++it1; }
2395 ga_instruction_contraction(base_tensor &t_, base_tensor &tc1_,
2397 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2401 struct ga_instruction_contraction_opt0_2 :
public ga_instruction {
2402 base_tensor &t, &tc1, &tc2;
2404 virtual int exec() {
2405 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << n*q <<
2406 " optimized for vectorized second tensor of type 2");
2407 size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2409 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2411 auto it = t.begin(), it1 = tc1.begin();
2412 for (
size_type i = 0; i < s1; ++i, ++it1) {
2413 auto it2 = tc2.begin();
2417 for (
size_type l = 0; l < q; ++l, ++it) {
2419 auto ittt1 = itt1, ittt2 = it2;
2420 *it = *ittt1 * (*ittt2);
2422 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2433 ga_instruction_contraction_opt0_2(base_tensor &t_, base_tensor &tc1_,
2436 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) {}
2441 struct ga_instruction_contraction_opt0_2_unrolled :
public ga_instruction {
2442 base_tensor &t, &tc1, &tc2;
2444 virtual int exec() {
2445 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N*q
2446 <<
" optimized for vectorized second tensor of type 2");
2447 size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2449 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2451 auto it = t.begin(), it1 = tc1.begin();
2452 for (
size_type i = 0; i < s1; ++i, ++it1) {
2453 auto it2 = tc2.begin();
2457 for (
size_type l = 0; l < q; ++l, ++it) {
2459 auto ittt1 = itt1, ittt2 = it2;
2460 *it = *ittt1 * (*ittt2);
2462 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2469 ga_instruction_contraction_opt0_2_unrolled(base_tensor &t_, base_tensor &tc1_,
2471 : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2475 template <
int N,
int Q>
2476 struct ga_instruction_contraction_opt0_2_dunrolled :
public ga_instruction {
2477 base_tensor &t, &tc1, &tc2;
2478 virtual int exec() {
2479 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N*Q
2480 <<
" optimized for vectorized second tensor of type 2");
2481 size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q), s2_q = s2/Q;
2483 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2485 auto it = t.begin(), it1 = tc1.begin();
2486 for (
size_type i = 0; i < s1; ++i, ++it1) {
2487 auto it2 = tc2.begin();
2491 for (
size_type l = 0; l < Q; ++l, ++it) {
2493 auto ittt1 = itt1, ittt2 = it2;
2494 *it = *ittt1 * (*ittt2);
2496 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2503 ga_instruction_contraction_opt0_2_dunrolled
2504 (base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2505 : t(t_), tc1(tc1_), tc2(tc2_) {}
2509 struct ga_instruction_contraction_opt2_0 :
public ga_instruction {
2510 base_tensor &t, &tc1, &tc2;
2512 virtual int exec() {
2513 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << n*q <<
2514 " optimized for vectorized second tensor of type 2");
2515 size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2516 size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2517 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2519 auto it = t.begin();
2521 auto it1 = tc1.begin() + i*q;
2523 auto it2 = tc2.begin() + l*s2;
2524 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
2525 auto itt1 = it1, itt2 = it2;
2526 *it = *itt1 * (*itt2);
2528 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2535 ga_instruction_contraction_opt2_0(base_tensor &t_, base_tensor &tc1_,
2538 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) { }
2543 struct ga_instruction_contraction_opt2_0_unrolled :
public ga_instruction {
2544 base_tensor &t, &tc1, &tc2;
2546 virtual int exec() {
2547 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N*q
2548 <<
" optimized for vectorized second tensor of type 2");
2549 size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2550 size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2551 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2553 auto it = t.begin(), it1 = tc1.begin();
2554 for (
size_type i = 0; i < s1_q; ++i, it1 += q) {
2556 auto it2 = tc2.begin() + l*s2;
2557 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
2558 auto itt1 = it1, itt2 = it2;
2559 *it = *itt1 * (*itt2);
2561 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2568 ga_instruction_contraction_opt2_0_unrolled(base_tensor &t_, base_tensor &tc1_,
2570 : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2574 template <
int N,
int Q>
2575 struct ga_instruction_contraction_opt2_0_dunrolled :
public ga_instruction {
2576 base_tensor &t, &tc1, &tc2;
2577 virtual int exec() {
2578 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N*Q
2579 <<
" optimized for vectorized second tensor of type 2");
2580 size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q);
2581 size_type s1_q = s1/Q, s1_qq = s1*Q, s2_qq = s2*Q;
2582 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2584 auto it = t.begin(), it1 = tc1.begin();
2585 for (
size_type i = 0; i < s1_q; ++i, it1 += Q) {
2587 auto it2 = tc2.begin() + l*s2;
2588 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
2589 auto itt1 = it1, itt2 = it2;
2590 *it = *itt1 * (*itt2);
2592 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2599 ga_instruction_contraction_opt2_0_dunrolled
2600 (base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2601 : t(t_), tc1(tc1_), tc2(tc2_) {}
2605 struct ga_instruction_contraction_opt0_1 :
public ga_instruction {
2606 base_tensor &t, &tc1, &tc2;
2608 virtual int exec() {
2609 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << nn <<
2610 " optimized for vectorized second tensor of type 1");
2611 size_type ss1=tc1.size(), s1 = ss1/nn, s2=tc2.size()/nn, s2_n=s2/nn;
2613 auto it = t.begin(), it1 = tc1.begin();
2614 for (
size_type i = 0; i < s1; ++i, ++it1) {
2615 auto it2 = tc2.begin();
2619 *it++ = (*itt1) * (*it2);
2621 { itt1 += s1; *it++ = (*itt1) * (*it2); }
2626 ga_instruction_contraction_opt0_1(base_tensor &t_, base_tensor &tc1_,
2628 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2631 template<
int N>
inline void reduc_elem_unrolled_opt1_
2632 (
const base_vector::iterator &it,
const base_vector::iterator &it1,
2634 it[N-1] = it1[(N-1)*s1] * a;
2635 reduc_elem_unrolled_opt1_<N-1>(it, it1, a, s1);
2637 template<>
inline void reduc_elem_unrolled_opt1_<1>
2638 (
const base_vector::iterator &it,
const base_vector::iterator &it1,
2640 { *it = (*it1) * a; }
2644 struct ga_instruction_contraction_opt0_1_unrolled :
public ga_instruction {
2645 base_tensor &t, &tc1, &tc2;
2646 virtual int exec() {
2647 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N
2648 <<
" optimized for vectorized second tensor of type 1");
2649 size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2650 auto it = t.begin(), it1 = tc1.begin();
2651 for (
size_type i = 0; i < s1; ++i, ++it1) {
2652 auto it2 = tc2.begin(), it2e = it2 + s2;
2653 for (; it2 != it2e; it2 += N, it += N)
2654 reduc_elem_unrolled_opt1_<N>(it, it1, *it2, s1);
2658 ga_instruction_contraction_opt0_1_unrolled(base_tensor &t_, base_tensor &tc1_,
2660 : t(t_), tc1(tc1_), tc2(tc2_) {}
2664 struct ga_instruction_contraction_opt1_1 :
public ga_instruction {
2665 base_tensor &t, &tc1, &tc2;
2667 virtual int exec() {
2668 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << nn <<
2669 " optimized for both vectorized tensor of type 1");
2670 size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_1 = s2+1;
2671 GA_DEBUG_ASSERT(t.size() == s2*s1,
"Internal error");
2675 auto it2 = tc2.begin();
2678 auto it1 = tc1.begin(), it = t.begin() + j*nn;
2680 if (i) { it1 += nn, it += s2*nn; }
2681 scalar_type a = (*it1) * (*it2);
2683 *itt = a; itt += s2_1; *itt = a;
2684 for (
size_type k = 2; k < nn; ++k) { itt += s2_1; *itt = a; }
2689 ga_instruction_contraction_opt1_1(base_tensor &t_, base_tensor &tc1_,
2691 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2696 template<
int N>
inline scalar_type reduc_elem_unrolled__
2697 (base_tensor::iterator &it1, base_tensor::iterator &it2,
2699 return (it1[(N-1)*s1])*(it2[(N-1)*s2])
2700 + reduc_elem_unrolled__<N-1>(it1, it2, s1, s2);
2702 template<>
inline scalar_type reduc_elem_unrolled__<1>
2703 (base_tensor::iterator &it1, base_tensor::iterator &it2,
2705 {
return (*it1)*(*it2); }
2708 template<
int N>
struct ga_instruction_contraction_unrolled
2709 :
public ga_instruction {
2710 base_tensor &t, &tc1, &tc2;
2711 virtual int exec() {
2712 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N);
2713 size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2714 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error, " << t.size()
2715 <<
" != " << s1 <<
"*" << s2);
2716 base_tensor::iterator it1=tc1.begin(), it2=tc2.begin(), it2end=it2 + s2;
2717 for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
2718 *it = reduc_elem_unrolled__<N>(it1, it2, s1, s2);
2719 ++it2;
if (it2 == it2end) { it2 = tc2.begin(), ++it1; }
2723 ga_instruction_contraction_unrolled(base_tensor &t_, base_tensor &tc1_,
2725 : t(t_), tc1(tc1_), tc2(tc2_) {}
2728 template<
int N,
int S2>
inline void reduc_elem_d_unrolled__
2729 (base_tensor::iterator &it, base_tensor::iterator &it1,
2730 base_tensor::iterator &it2, size_type s1, size_type s2) {
2731 *it++ = reduc_elem_unrolled__<N>(it1, it2, s1, s2);
2732 reduc_elem_d_unrolled__<N, S2-1>(it, it1, ++it2, s1, s2);
2737 template<>
inline void reduc_elem_d_unrolled__<1, 0>
2738 (base_tensor::iterator &, base_tensor::iterator &,
2739 base_tensor::iterator &, size_type , size_type ) { }
2740 template<>
inline void reduc_elem_d_unrolled__<2, 0>
2741 (base_tensor::iterator &, base_tensor::iterator &,
2742 base_tensor::iterator &, size_type , size_type ) { }
2743 template<>
inline void reduc_elem_d_unrolled__<3, 0>
2744 (base_tensor::iterator &, base_tensor::iterator &,
2745 base_tensor::iterator &, size_type , size_type ) { }
2746 template<>
inline void reduc_elem_d_unrolled__<4, 0>
2747 (base_tensor::iterator &, base_tensor::iterator &,
2748 base_tensor::iterator &, size_type , size_type ) { }
2749 template<>
inline void reduc_elem_d_unrolled__<5, 0>
2750 (base_tensor::iterator &, base_tensor::iterator &,
2751 base_tensor::iterator &, size_type , size_type ) { }
2752 template<>
inline void reduc_elem_d_unrolled__<6, 0>
2753 (base_tensor::iterator &, base_tensor::iterator &,
2754 base_tensor::iterator &, size_type , size_type ) { }
2755 template<>
inline void reduc_elem_d_unrolled__<7, 0>
2756 (base_tensor::iterator &, base_tensor::iterator &,
2757 base_tensor::iterator &, size_type , size_type ) { }
2758 template<>
inline void reduc_elem_d_unrolled__<8, 0>
2759 (base_tensor::iterator &, base_tensor::iterator &,
2760 base_tensor::iterator &, size_type , size_type ) { }
2761 template<>
inline void reduc_elem_d_unrolled__<9, 0>
2762 (base_tensor::iterator &, base_tensor::iterator &,
2763 base_tensor::iterator &, size_type , size_type ) { }
2764 template<>
inline void reduc_elem_d_unrolled__<10, 0>
2765 (base_tensor::iterator &, base_tensor::iterator &,
2766 base_tensor::iterator &, size_type , size_type ) { }
2767 template<>
inline void reduc_elem_d_unrolled__<11, 0>
2768 (base_tensor::iterator &, base_tensor::iterator &,
2769 base_tensor::iterator &, size_type , size_type ) { }
2770 template<>
inline void reduc_elem_d_unrolled__<12, 0>
2771 (base_tensor::iterator &, base_tensor::iterator &,
2772 base_tensor::iterator &, size_type , size_type ) { }
2773 template<>
inline void reduc_elem_d_unrolled__<13, 0>
2774 (base_tensor::iterator &, base_tensor::iterator &,
2775 base_tensor::iterator &, size_type , size_type ) { }
2776 template<>
inline void reduc_elem_d_unrolled__<14, 0>
2777 (base_tensor::iterator &, base_tensor::iterator &,
2778 base_tensor::iterator &, size_type , size_type ) { }
2779 template<>
inline void reduc_elem_d_unrolled__<15, 0>
2780 (base_tensor::iterator &, base_tensor::iterator &,
2781 base_tensor::iterator &, size_type , size_type ) { }
2782 template<>
inline void reduc_elem_d_unrolled__<16, 0>
2783 (base_tensor::iterator &, base_tensor::iterator &,
2784 base_tensor::iterator &, size_type , size_type ) { }
2788 template<
int N,
int S2>
struct ga_ins_red_d_unrolled
2789 :
public ga_instruction {
2790 base_tensor &t, &tc1, &tc2;
2791 virtual int exec() {
2792 GA_DEBUG_INFO(
"Instruction: doubly unrolled contraction operation of size " 2794 size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2795 GA_DEBUG_ASSERT(s2 == S2,
"Internal error");
2796 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error, " << t.size()
2797 <<
" != " << s1 <<
"*" << s2);
2798 base_tensor::iterator it = t.begin(), it1 = tc1.begin();
2799 for (size_type ii = 0; ii < s1; ++ii, ++it1) {
2800 base_tensor::iterator it2 = tc2.begin();
2801 reduc_elem_d_unrolled__<N, S2>(it, it1, it2, s1, s2);
2803 GA_DEBUG_ASSERT(it == t.end(),
"Internal error");
2806 ga_ins_red_d_unrolled(base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2807 : t(t_), tc1(tc1_), tc2(tc2_) {}
2811 pga_instruction ga_instruction_contraction_switch
2812 (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
2813 size_type n,
bool &to_clear) {
2814 base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
2816 if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
2817 tc1_.qdim() == n && tc2_.qdim() == n) {
2819 t_.set_sparsity(10, tc1_.qdim());
2820 return std::make_shared<ga_instruction_contraction_opt1_1>(t, tc1, tc2, n);
2823 if (tc2_.sparsity() == 1) {
2826 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
2829 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
2832 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
2835 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
2838 return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
2841 if (tc2_.sparsity() == 2) {
2842 size_type q2 = tc2.sizes()[1];
2843 size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[1] : 1;
2850 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
2854 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
2858 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
2861 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
2868 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
2872 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
2876 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
2879 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
2886 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
2890 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
2894 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
2897 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
2901 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
2904 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
2907 return std::make_shared<ga_instruction_contraction_opt0_2>
2912 if (tc1_.sparsity() == 2) {
2913 size_type q1 = tc1.sizes()[1];
2914 size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[1] : 1;
2921 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
2925 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
2929 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
2932 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
2939 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
2943 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
2947 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
2950 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
2957 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
2961 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
2965 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
2968 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
2971 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
2974 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
2977 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
2980 return std::make_shared<ga_instruction_contraction_opt2_0>
2981 (t,tc1,tc2, n1, q1);
2987 case 2 :
return std::make_shared<ga_instruction_contraction_unrolled< 2>>
2989 case 3 :
return std::make_shared<ga_instruction_contraction_unrolled< 3>>
2991 case 4 :
return std::make_shared<ga_instruction_contraction_unrolled< 4>>
2993 case 5 :
return std::make_shared<ga_instruction_contraction_unrolled< 5>>
2995 case 6 :
return std::make_shared<ga_instruction_contraction_unrolled< 6>>
2997 case 7 :
return std::make_shared<ga_instruction_contraction_unrolled< 7>>
2999 case 8 :
return std::make_shared<ga_instruction_contraction_unrolled< 8>>
3001 case 9 :
return std::make_shared<ga_instruction_contraction_unrolled< 9>>
3003 case 10 :
return std::make_shared<ga_instruction_contraction_unrolled<10>>
3005 case 11 :
return std::make_shared<ga_instruction_contraction_unrolled<11>>
3007 case 12 :
return std::make_shared<ga_instruction_contraction_unrolled<12>>
3009 case 13 :
return std::make_shared<ga_instruction_contraction_unrolled<13>>
3011 case 14 :
return std::make_shared<ga_instruction_contraction_unrolled<14>>
3013 case 15 :
return std::make_shared<ga_instruction_contraction_unrolled<15>>
3015 case 16 :
return std::make_shared<ga_instruction_contraction_unrolled<16>>
3017 default :
return std::make_shared<ga_instruction_contraction>
3022 pga_instruction ga_uniform_instruction_contraction_switch
3023 (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
3024 size_type n,
bool &to_clear) {
3025 base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
3027 if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
3028 tc1_.qdim() == n && tc2_.qdim() == n) {
3030 t_.set_sparsity(10, tc1_.qdim());
3031 return std::make_shared<ga_instruction_contraction_opt1_1>(t,tc1,tc2,n);
3033 if (tc2_.sparsity() == 1) {
3036 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
3039 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
3042 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
3045 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
3048 return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
3051 if (tc2_.sparsity() == 2) {
3052 size_type q2 = tc2.sizes()[1];
3053 size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[1] : 1;
3060 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3064 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3068 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3071 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3078 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3082 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3086 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3089 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3096 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3100 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3104 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3107 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3111 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3114 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3117 return std::make_shared<ga_instruction_contraction_opt0_2>
3122 if (tc1_.sparsity() == 2) {
3123 size_type q1 = tc1.sizes()[1];
3124 size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[1] : 1;
3131 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3135 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3139 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3142 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3149 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3153 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3157 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3160 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3167 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3171 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3175 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3178 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3181 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3184 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3187 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3190 return std::make_shared<ga_instruction_contraction_opt2_0>
3191 (t,tc1,tc2, n1, q1);
3197 size_type s2 = tc2.size()/n;
3201 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,1>>(t, tc1, tc2);
3202 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,1>>(t, tc1, tc2);
3203 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,1>>(t, tc1, tc2);
3204 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3208 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,2>>(t, tc1, tc2);
3209 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,2>>(t, tc1, tc2);
3210 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,2>>(t, tc1, tc2);
3211 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3215 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,3>>(t, tc1, tc2);
3216 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,3>>(t, tc1, tc2);
3217 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,3>>(t, tc1, tc2);
3218 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3222 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,4>>(t, tc1, tc2);
3223 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,4>>(t, tc1, tc2);
3224 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,4>>(t, tc1, tc2);
3225 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3229 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,5>>(t, tc1, tc2);
3230 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,5>>(t, tc1, tc2);
3231 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,5>>(t, tc1, tc2);
3232 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3236 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,6>>(t, tc1, tc2);
3237 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,6>>(t, tc1, tc2);
3238 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,6>>(t, tc1, tc2);
3239 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3243 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,7>>(t, tc1, tc2);
3244 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,7>>(t, tc1, tc2);
3245 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,7>>(t, tc1, tc2);
3246 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3250 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,8>>(t, tc1, tc2);
3251 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,8>>(t, tc1, tc2);
3252 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,8>>(t, tc1, tc2);
3253 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3257 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,9>>(t, tc1, tc2);
3258 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,9>>(t, tc1, tc2);
3259 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,9>>(t, tc1, tc2);
3260 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3264 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,10>>(t, tc1, tc2);
3265 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,10>>(t, tc1, tc2);
3266 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,10>>(t, tc1, tc2);
3267 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3269 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3275 struct ga_instruction_spec_contraction :
public ga_instruction {
3276 base_tensor &t, &tc1, &tc2;
3278 virtual int exec() {
3279 GA_DEBUG_INFO(
"Instruction: specific contraction operation of " 3281 size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3282 size_type s2 = tc2.sizes()[0];
3283 base_tensor::iterator it = t.begin();
3284 for (size_type i = 0; i < s11; ++i)
3285 for (size_type n = 0; n < s2; ++n)
3286 for (size_type m = 0; m < s1; ++m, ++it) {
3287 *it = scalar_type(0);
3288 for (size_type j = 0; j < nn; ++j)
3289 *it += tc1[m+i*s1+j*s111] * tc2[n+j*s2];
3291 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3294 ga_instruction_spec_contraction(base_tensor &t_, base_tensor &tc1_,
3295 base_tensor &tc2_, size_type n_)
3296 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3300 struct ga_instruction_spec2_contraction :
public ga_instruction {
3301 base_tensor &t, &tc1, &tc2;
3303 virtual int exec() {
3304 GA_DEBUG_INFO(
"Instruction: second specific contraction operation of " 3306 size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3307 size_type s2 = tc2.sizes()[0], s22 = tc2.size() / (s2*nn), s222 = s2*s22;
3308 base_tensor::iterator it = t.begin();
3309 for (size_type j = 0; j < s22; ++j)
3310 for (size_type i = 0; i < s11; ++i)
3311 for (size_type m = 0; m < s1; ++m)
3312 for (size_type n = 0; n < s2; ++n, ++it) {
3313 *it = scalar_type(0);
3314 for (size_type k = 0; k < nn; ++k)
3315 *it += tc1[m+i*s1+k*s111] * tc2[n+j*s2+k*s222];
3317 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3320 ga_instruction_spec2_contraction(base_tensor &t_, base_tensor &tc1_,
3321 base_tensor &tc2_, size_type n_)
3322 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3326 struct ga_instruction_simple_tmult :
public ga_instruction {
3327 base_tensor &t, &tc1, &tc2;
3328 virtual int exec() {
3329 GA_DEBUG_INFO(
"Instruction: simple tensor product");
3330 size_type s1 = tc1.size();
3331 GA_DEBUG_ASSERT(t.size() == s1 * tc2.size(),
"Wrong sizes");
3332 base_tensor::iterator it2=tc2.begin(), it1=tc1.begin(), it1end=it1 + s1;
3333 for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
3334 *it = *(it2) * (*it1);
3335 ++it1;
if (it1 == it1end) { it1 = tc1.begin(), ++it2; }
3339 ga_instruction_simple_tmult(base_tensor &t_, base_tensor &tc1_,
3341 : t(t_), tc1(tc1_), tc2(tc2_) {}
3344 template<
int S1>
inline void tmult_elem_unrolled__
3345 (base_tensor::iterator &it, base_tensor::iterator &it1,
3346 base_tensor::iterator &it2) {
3347 *it++ = (*it1++)*(*it2);
3348 tmult_elem_unrolled__<S1-1>(it, it1, it2);
3350 template<>
inline void tmult_elem_unrolled__<0>
3351 (base_tensor::iterator &, base_tensor::iterator &,
3352 base_tensor::iterator &) { }
3355 template<
int S1>
struct ga_instruction_simple_tmult_unrolled
3356 :
public ga_instruction {
3357 base_tensor &t, &tc1, &tc2;
3358 virtual int exec() {
3359 size_type s2 = tc2.size();
3360 GA_DEBUG_INFO(
"Instruction: simple tensor product, unrolled with " 3361 << tc1.size() <<
" operations");
3362 GA_DEBUG_ASSERT(t.size() == tc1.size() * s2,
"Wrong sizes");
3363 GA_DEBUG_ASSERT(tc1.size() == S1,
"Wrong sizes");
3365 base_tensor::iterator it = t.begin(), it2 = tc2.begin();
3366 for (size_type ii = 0; ii < s2; ++ii, ++it2) {
3367 base_tensor::iterator it1 = tc1.begin();
3368 tmult_elem_unrolled__<S1>(it, it1, it2);
3370 GA_DEBUG_ASSERT(it == t.end(),
"Internal error");
3373 ga_instruction_simple_tmult_unrolled(base_tensor &t_, base_tensor &tc1_,
3375 : t(t_), tc1(tc1_), tc2(tc2_) {}
3378 pga_instruction ga_uniform_instruction_simple_tmult
3379 (base_tensor &t, base_tensor &tc1, base_tensor &tc2) {
3380 switch(tc1.size()) {
3381 case 2 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 2>>
3383 case 3 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 3>>
3385 case 4 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 4>>
3387 case 5 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 5>>
3389 case 6 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 6>>
3391 case 7 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 7>>
3393 case 8 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 8>>
3395 case 9 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 9>>
3397 case 10 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<10>>
3399 case 11 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<11>>
3401 case 12 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<12>>
3403 case 13 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<13>>
3405 case 14 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<14>>
3407 case 15 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<15>>
3409 case 16 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<16>>
3411 default :
return std::make_shared<ga_instruction_simple_tmult>
3418 struct ga_instruction_spec_tmult :
public ga_instruction {
3419 base_tensor &t, &tc1, &tc2;
3420 size_type s1_2, s2_2;
3421 virtual int exec() {
3422 GA_DEBUG_INFO(
"Instruction: specific tensor product");
3423 GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(),
"Wrong sizes");
3424 size_type s1_1 = tc1.size() / s1_2;
3425 size_type s2_1 = tc2.size() / s2_2;
3427 base_tensor::iterator it = t.begin();
3428 for (size_type j = 0; j < s2_2; ++j)
3429 for (size_type i = 0; i < s1_2; ++i)
3430 for (size_type n = 0; n < s2_1; ++n)
3431 for (size_type m = 0; m < s1_1; ++m, ++it)
3432 *it = tc1[m+i*s1_1] * tc2[n+j*s2_1];
3433 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3436 ga_instruction_spec_tmult(base_tensor &t_, base_tensor &tc1_,
3437 base_tensor &tc2_, size_type s1_2_,
3439 : t(t_), tc1(tc1_), tc2(tc2_), s1_2(s1_2_), s2_2(s2_2_) {}
3443 struct ga_instruction_spec2_tmult :
public ga_instruction {
3444 base_tensor &t, &tc1, &tc2;
3445 virtual int exec() {
3446 GA_DEBUG_INFO(
"Instruction: second specific tensor product");
3447 GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(),
"Wrong sizes");
3448 size_type s1 = tc1.size();
3449 size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
3451 base_tensor::iterator it = t.begin();
3452 for (size_type j = 0; j < s2_2; ++j)
3453 for (size_type i = 0; i < s1; ++i)
3454 for (size_type m = 0; m < s2_1; ++m, ++it)
3455 *it = tc1[i] * tc2[m+j*s2_1];
3456 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3459 ga_instruction_spec2_tmult(base_tensor &t_, base_tensor &tc1_,
3461 : t(t_), tc1(tc1_), tc2(tc2_) {}
3466 struct ga_instruction_simple_c_matrix :
public ga_instruction {
3468 std::vector<scalar_type *> components;
3469 virtual int exec() {
3470 GA_DEBUG_INFO(
"Instruction: gathering components for explicit " 3472 GA_DEBUG_ASSERT(t.size() == components.size(),
"Wrong sizes");
3473 for (size_type i = 0; i < components.size(); ++i)
3474 t[i] = *(components[i]);
3477 ga_instruction_simple_c_matrix(base_tensor &t_,
3478 std::vector<scalar_type *> &components_)
3479 : t(t_), components(components_) {}
3482 struct ga_instruction_c_matrix_with_tests :
public ga_instruction {
3484 const std::vector<const base_tensor *> components;
3485 virtual int exec() {
3486 GA_DEBUG_INFO(
"Instruction: gathering components for explicit " 3487 "matrix with tests functions");
3488 size_type s = t.size() / components.size();
3489 GA_DEBUG_ASSERT(s,
"Wrong sizes");
3490 base_tensor::iterator it = t.begin();
3491 for (size_type i = 0; i < components.size(); ++i) {
3492 const base_tensor &t1 = *(components[i]);
3493 if (t1.size() > 1) {
3494 GA_DEBUG_ASSERT(t1.size() == s,
"Wrong sizes, " << t1.size()
3496 for (size_type j = 0; j < s; ++j) *it++ = t1[j];
3498 for (size_type j = 0; j < s; ++j) *it++ = t1[0];
3503 ga_instruction_c_matrix_with_tests
3504 (base_tensor &t_,
const std::vector<const base_tensor *> &components_)
3505 : t(t_), components(components_) {}
3508 struct ga_instruction_eval_func_1arg_1res :
public ga_instruction {
3510 const scalar_type &c;
3511 pscalar_func_onearg f1;
3512 virtual int exec() {
3513 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument " 3514 "predefined function on a scalar");
3518 ga_instruction_eval_func_1arg_1res(scalar_type &t_,
const scalar_type &c_,
3519 pscalar_func_onearg f1_)
3520 : t(t_), c(c_), f1(f1_) {}
3523 struct ga_instruction_eval_func_1arg_1res_expr :
public ga_instruction {
3525 const scalar_type &c;
3526 const ga_predef_function &F;
3527 virtual int exec() {
3528 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument " 3529 "predefined function on a scalar");
3533 ga_instruction_eval_func_1arg_1res_expr(scalar_type &t_,
3534 const scalar_type &c_,
3535 const ga_predef_function &F_)
3536 : t(t_), c(c_), F(F_) {}
3539 struct ga_instruction_eval_func_1arg :
public ga_instruction {
3540 base_tensor &t, &tc1;
3541 pscalar_func_onearg f1;
3542 virtual int exec() {
3543 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument " 3544 "predefined function on tensor");
3545 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
3546 for (size_type i = 0; i < t.size(); ++i) t[i] = (*f1)(tc1[i]);
3549 ga_instruction_eval_func_1arg(base_tensor &t_, base_tensor &c_,
3550 pscalar_func_onearg f1_)
3551 : t(t_), tc1(c_), f1(f1_) {}
3554 struct ga_instruction_eval_func_1arg_expr :
public ga_instruction {
3555 base_tensor &t, &tc1;
3556 const ga_predef_function &F;
3557 virtual int exec() {
3558 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument " 3559 "predefined function on tensor");
3560 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
3561 for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i]);
3564 ga_instruction_eval_func_1arg_expr(base_tensor &t_, base_tensor &c_,
3565 const ga_predef_function &F_)
3566 : t(t_), tc1(c_), F(F_) {}
3569 struct ga_instruction_eval_func_2arg_1res :
public ga_instruction {
3571 const scalar_type &c, &d;
3572 pscalar_func_twoargs f2;
3573 virtual int exec() {
3574 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments " 3575 "predefined function on two scalar");
3579 ga_instruction_eval_func_2arg_1res(scalar_type &t_,
const scalar_type &c_,
3580 const scalar_type &d_,
3581 pscalar_func_twoargs f2_)
3582 : t(t_), c(c_), d(d_), f2(f2_) {}
3585 struct ga_instruction_eval_func_2arg_1res_expr :
public ga_instruction {
3587 const scalar_type &c, &d;
3588 const ga_predef_function &F;
3589 virtual int exec() {
3590 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments " 3591 "predefined function on two scalar");
3595 ga_instruction_eval_func_2arg_1res_expr(scalar_type &t_,
3596 const scalar_type &c_,
3597 const scalar_type &d_,
3598 const ga_predef_function &F_)
3599 : t(t_), c(c_), d(d_), F(F_) {}
3602 struct ga_instruction_eval_func_2arg_first_scalar :
public ga_instruction {
3603 base_tensor &t, &tc1, &tc2;
3604 pscalar_func_twoargs f2;
3605 virtual int exec() {
3606 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments " 3607 "predefined function on one scalar and one tensor");
3608 GA_DEBUG_ASSERT(t.size() == tc2.size(),
"Wrong sizes");
3609 for (size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[0], tc2[i]);
3612 ga_instruction_eval_func_2arg_first_scalar
3613 (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3614 pscalar_func_twoargs f2_)
3615 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3618 struct ga_instruction_eval_func_2arg_first_scalar_expr
3619 :
public ga_instruction {
3620 base_tensor &t, &tc1, &tc2;
3621 const ga_predef_function &F;
3622 virtual int exec() {
3623 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments " 3624 "predefined function on one scalar and one tensor");
3625 GA_DEBUG_ASSERT(t.size() == tc2.size(),
"Wrong sizes");
3626 for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[0], tc2[i]);
3629 ga_instruction_eval_func_2arg_first_scalar_expr
3630 (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3631 const ga_predef_function &F_)
3632 : t(t_), tc1(c_), tc2(d_), F(F_) {}
3635 struct ga_instruction_eval_func_2arg_second_scalar :
public ga_instruction {
3636 base_tensor &t, &tc1, &tc2;
3637 pscalar_func_twoargs f2;
3638 virtual int exec() {
3639 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments " 3640 "predefined function on one tensor and one scalar");
3641 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
3642 for (size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[i], tc2[0]);
3645 ga_instruction_eval_func_2arg_second_scalar(base_tensor &t_,
3648 pscalar_func_twoargs f2_)
3649 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3652 struct ga_instruction_eval_func_2arg_second_scalar_expr
3653 :
public ga_instruction {
3654 base_tensor &t, &tc1, &tc2;
3655 const ga_predef_function &F;
3656 virtual int exec() {
3657 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments " 3658 "predefined function on one tensor and one scalar");
3659 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
3660 for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i], tc2[0]);
3663 ga_instruction_eval_func_2arg_second_scalar_expr
3664 (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3665 const ga_predef_function &F_)
3666 : t(t_), tc1(c_), tc2(d_), F(F_) {}
3669 struct ga_instruction_eval_func_2arg :
public ga_instruction {
3670 base_tensor &t, &tc1, &tc2;
3671 pscalar_func_twoargs f2;
3672 virtual int exec() {
3673 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments " 3674 "predefined function on two tensors");
3675 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
3678 for (size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[i], tc2[i]);
3681 ga_instruction_eval_func_2arg(base_tensor &t_, base_tensor &c_,
3682 base_tensor &d_, pscalar_func_twoargs f2_)
3683 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3686 struct ga_instruction_eval_func_2arg_expr :
public ga_instruction {
3687 base_tensor &t, &tc1, &tc2;
3688 const ga_predef_function &F;
3689 virtual int exec() {
3690 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments " 3691 "predefined function on two tensors");
3692 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
3695 for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i], tc2[i]);
3698 ga_instruction_eval_func_2arg_expr(base_tensor &t_, base_tensor &c_,
3700 const ga_predef_function &F_)
3701 : t(t_), tc1(c_), tc2(d_), F(F_) {}
3704 struct ga_instruction_eval_OP :
public ga_instruction {
3706 const ga_nonlinear_operator &OP;
3707 ga_nonlinear_operator::arg_list args;
3708 virtual int exec() {
3709 GA_DEBUG_INFO(
"Instruction: operator evaluation");
3713 ga_instruction_eval_OP(base_tensor &t_,
const ga_nonlinear_operator &OP_,
3714 ga_nonlinear_operator::arg_list &args_)
3715 : t(t_), OP(OP_), args(args_) {}
3718 struct ga_instruction_eval_derivative_OP :
public ga_instruction {
3720 const ga_nonlinear_operator &OP;
3721 ga_nonlinear_operator::arg_list args;
3723 virtual int exec() {
3724 GA_DEBUG_INFO(
"Instruction: operator derivative evaluation");
3725 OP.derivative(args, der1, t);
3728 ga_instruction_eval_derivative_OP(base_tensor &t_,
3729 const ga_nonlinear_operator &OP_,
3730 ga_nonlinear_operator::arg_list &args_,
3732 : t(t_), OP(OP_), args(args_), der1(der1_) {}
3735 struct ga_instruction_eval_second_derivative_OP :
public ga_instruction {
3737 const ga_nonlinear_operator &OP;
3738 ga_nonlinear_operator::arg_list args;
3739 size_type der1, der2;
3740 virtual int exec() {
3741 GA_DEBUG_INFO(
"Instruction: operator second derivative evaluation");
3742 OP.second_derivative(args, der1, der2, t);
3745 ga_instruction_eval_second_derivative_OP
3746 (base_tensor &t_,
const ga_nonlinear_operator &OP_,
3747 ga_nonlinear_operator::arg_list &args_, size_type der1_, size_type der2_)
3748 : t(t_), OP(OP_), args(args_), der1(der1_), der2(der2_) {}
3751 struct ga_instruction_tensor_slice :
public ga_instruction {
3752 base_tensor &t, &tc1;
3753 bgeot::multi_index mi, indices;
3754 virtual int exec() {
3755 GA_DEBUG_INFO(
"Instruction: tensor slice");
3756 size_type order = t.sizes().size();
3757 for (bgeot::multi_index mi3(order); !mi3.finished(t.sizes());
3758 mi3.incrementation(t.sizes())) {
3759 for (size_type j = 0; j < order; ++j)
3760 mi[indices[j]] = mi3[j];
3765 ga_instruction_tensor_slice(base_tensor &t_, base_tensor &tc1_,
3766 bgeot::multi_index &mi_,
3767 bgeot::multi_index &indices_)
3768 : t(t_), tc1(tc1_), mi(mi_), indices(indices_) {}
3771 struct ga_instruction_transformation_call :
public ga_instruction {
3772 const ga_workspace &workspace;
3773 ga_instruction_set::interpolate_info &inin;
3774 pinterpolate_transformation trans;
3775 fem_interpolation_context &ctx;
3776 const base_small_vector &Normal;
3780 virtual int exec() {
3781 GA_DEBUG_INFO(
"Instruction: call interpolate transformation");
3786 inin.pt_type = trans->transform(workspace, m, ctx, Normal, &(inin.m), cv,
3787 face_num, P_ref, inin.Normal,
3788 inin.derivatives, compute_der);
3791 inin.m->points_of_convex(cv, inin.G);
3792 inin.ctx.change((inin.m)->trans_of_convex(cv),
3793 0, P_ref, inin.G, cv, face_num);
3794 inin.has_ctx =
true;
3797 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
3799 inin.Normal.resize(0);
3800 inin.pt_y = inin.ctx.xreal();
3802 inin.ctx.invalid_convex_num();
3804 inin.has_ctx =
false;
3807 inin.ctx.invalid_convex_num();
3808 inin.Normal.resize(0);
3809 inin.pt_y.resize(0);
3810 inin.has_ctx =
false;
3812 GA_DEBUG_INFO(
"Instruction: end of call interpolate transformation");
3815 ga_instruction_transformation_call
3816 (
const ga_workspace &w, ga_instruction_set::interpolate_info &i,
3817 pinterpolate_transformation t, fem_interpolation_context &ctxx,
3818 const base_small_vector &No,
const mesh &mm,
bool compute_der_)
3819 : workspace(w), inin(i), trans(t), ctx(ctxx), Normal(No), m(mm),
3820 compute_der(compute_der_) {}
3823 struct ga_instruction_neighbour_transformation_call :
public ga_instruction {
3824 const ga_workspace &workspace;
3825 ga_instruction_set::interpolate_info &inin;
3826 pinterpolate_transformation trans;
3827 fem_interpolation_context &ctx;
3828 base_small_vector &Normal;
3831 papprox_integration &pai;
3833 std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbour_corresp;
3835 virtual int exec() {
3836 bool cancel_optimization =
false;
3837 GA_DEBUG_INFO(
"Instruction: call interpolate neighbour transformation");
3839 if (!(ctx.have_pgp()) || !pai || pai->is_built_on_the_fly()
3840 || cancel_optimization) {
3841 inin.ctx.invalid_convex_num();
3844 size_type cv = ctx.convex_num();
3846 auto adj_face = m.adjacent_face(cv, f);
3847 if (adj_face.cv == size_type(-1)) {
3848 inin.ctx.invalid_convex_num();
3850 gauss_pt_corresp gpc;
3851 gpc.pgt1 = m.trans_of_convex(cv);
3852 gpc.pgt2 = m.trans_of_convex(adj_face.cv);
3854 auto inds_pt1 = m.ind_points_of_face_of_convex(cv, f);
3855 auto inds_pt2 = m.ind_points_of_face_of_convex(adj_face.cv,
3857 auto str1 = gpc.pgt1->structure();
3858 auto str2 = gpc.pgt2->structure();
3859 size_type nbptf1 = str1->nb_points_of_face(f);
3860 size_type nbptf2 = str2->nb_points_of_face(adj_face.f);
3861 gpc.nodes.resize(nbptf1*2);
3862 for (size_type i = 0; i < nbptf1; ++i) {
3863 gpc.nodes[2*i] = str1->ind_points_of_face(f)[i];
3865 for (size_type j = 0; j < nbptf2; ++j) {
3866 if (inds_pt2[j] == inds_pt1[i]) {
3867 gpc.nodes[2*i+1] = str2->ind_points_of_face(adj_face.f)[j];
3872 GMM_ASSERT1(found,
"Internal error");
3874 bgeot::pstored_point_tab pspt = 0;
3875 auto itm = neighbour_corresp.find(gpc);
3876 if (itm != neighbour_corresp.end()) {
3879 size_type nbpt = pai->nb_points_on_face(f);
3881 gic.init(m.points_of_convex(adj_face.cv), gpc.pgt2);
3882 size_type first_ind = pai->ind_first_point_on_face(f);
3884 &spt = *(pai->pintegration_points());
3886 m.points_of_convex(cv, G);
3887 fem_interpolation_context ctx_x(gpc.pgt1, 0, spt[0], G, cv, f);
3888 std::vector<base_node> P_ref(nbpt);
3890 for (size_type i = 0; i < nbpt; ++i) {
3891 ctx_x.set_xref(spt[first_ind+i]);
3892 bool converged =
true;
3893 gic.
invert(ctx_x.xreal(), P_ref[i], converged);
3894 bool is_in = (gpc.pgt2->convex_ref()->is_in(P_ref[i]) < 1E-4);
3895 GMM_ASSERT1(is_in && converged,
"Geometric transformation " 3896 "inversion has failed in neighbour transformation");
3898 pspt = store_point_tab(P_ref);
3899 neighbour_corresp[gpc] = pspt;
3901 m.points_of_convex(adj_face.cv, inin.G);
3902 bgeot::pgeotrans_precomp pgp = gp_pool(gpc.pgt2, pspt);
3903 inin.ctx.change(pgp, 0, 0, inin.G, adj_face.cv, adj_face.f);
3908 if (inin.ctx.have_pgp()) {
3909 inin.ctx.set_ii(ipt);
3911 inin.has_ctx =
true;
3912 inin.pt_y = inin.ctx.xreal();
3914 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
3921 inin.pt_type = trans->transform(workspace, m, ctx, Normal, &(inin.m),
3922 cv, face_num, P_ref, inin.Normal,
3923 inin.derivatives,
false);
3926 inin.m->points_of_convex(cv, inin.G);
3927 inin.ctx.change((inin.m)->trans_of_convex(cv),
3928 0, P_ref, inin.G, cv, face_num);
3929 inin.has_ctx =
true;
3932 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
3934 inin.Normal.resize(0);
3935 inin.pt_y = inin.ctx.xreal();
3937 inin.ctx.invalid_convex_num();
3939 inin.has_ctx =
false;
3942 inin.ctx.invalid_convex_num();
3943 inin.Normal.resize(0);
3944 inin.pt_y.resize(0);
3945 inin.has_ctx =
false;
3948 GA_DEBUG_INFO(
"Instruction: end of call neighbour interpolate " 3952 ga_instruction_neighbour_transformation_call
3953 (
const ga_workspace &w, ga_instruction_set::interpolate_info &i,
3954 pinterpolate_transformation t, fem_interpolation_context &ctxx,
3955 base_small_vector &No,
const mesh &mm, size_type &ipt_,
3957 std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbour_corresp_)
3958 : workspace(w), inin(i), trans(t), ctx(ctxx), Normal(No), m(mm),
3959 ipt(ipt_), pai(pai_), gp_pool(gp_pool_),
3960 neighbour_corresp(neighbour_corresp_) {}
3964 struct ga_instruction_scalar_assembly :
public ga_instruction {
3966 scalar_type &E, &coeff;
3967 virtual int exec() {
3968 GA_DEBUG_INFO(
"Instruction: scalar term assembly");
3972 ga_instruction_scalar_assembly(base_tensor &t_, scalar_type &E_,
3973 scalar_type &coeff_)
3974 : t(t_), E(E_), coeff(coeff_) {}
3977 struct ga_instruction_fem_vector_assembly :
public ga_instruction {
3979 base_vector &Vr, &Vn;
3980 const fem_interpolation_context &ctx;
3981 const gmm::sub_interval &Ir, &In;
3982 const mesh_fem *mfn, **mfg;
3984 const size_type &nbpt, &ipt;
3987 virtual int exec() {
3988 GA_DEBUG_INFO(
"Instruction: vector term assembly for fem variable");
3989 bool empty_weight = (coeff == scalar_type(0));
3990 if (ipt == 0 || interpolate) {
3991 if (empty_weight) elem.resize(0);
3992 elem.resize(t.size());
3993 if (!empty_weight) {
3994 auto itt = t.begin();
auto it = elem.begin(), ite = elem.end();
3995 size_type nd = ((t.size()) >> 2);
3996 for (size_type i = 0; i < nd; ++i) {
3997 *it++ = (*itt++) * coeff; *it++ = (*itt++) * coeff;
3998 *it++ = (*itt++) * coeff; *it++ = (*itt++) * coeff;
4000 for (; it != ite;) *it++ = (*itt++) * coeff;
4002 }
else if (!empty_weight) {
4003 auto itt = t.begin();
auto it = elem.begin(), ite = elem.end();
4004 size_type nd = ((t.size()) >> 2);
4005 for (size_type i = 0; i < nd; ++i) {
4006 *it++ += (*itt++) * coeff; *it++ += (*itt++) * coeff;
4007 *it++ += (*itt++) * coeff; *it++ += (*itt++) * coeff;
4009 for (; it != ite;) *it++ += (*itt++) * coeff;
4012 if (ipt == nbpt-1 || interpolate) {
4013 const mesh_fem &mf = *(mfg ? *mfg : mfn);
4014 GMM_ASSERT1(mfg ? *mfg : mfn,
"Internal error");
4015 const gmm::sub_interval &I = mf.is_reduced() ? Ir : In;
4016 base_vector &V = mf.is_reduced() ? Vr : Vn;
4017 if (!(ctx.is_convex_num_valid()))
return 0;
4018 size_type cv_1 = ctx.convex_num();
4021 GA_DEBUG_ASSERT(V.size() >= I.first() + mf.nb_basic_dof(),
4022 "Bad assembly vector size");
4023 auto &ct = mf.ind_scalar_basic_dof_of_element(cv_1);
4024 size_type qmult = mf.get_qdim();
4025 if (qmult > 1) qmult /= mf.fem_of_element(cv_1)->target_dim();
4026 size_type ifirst = I.first();
4027 auto ite = elem.begin();
4028 for (
auto itc = ct.begin(); itc != ct.end(); ++itc)
4029 for (size_type q = 0; q < qmult; ++q)
4030 V[ifirst+(*itc)+q] += *ite++;
4031 GMM_ASSERT1(ite == elem.end(),
"Internal error");
4035 ga_instruction_fem_vector_assembly
4036 (base_tensor &t_, base_vector &Vr_, base_vector &Vn_,
4037 const fem_interpolation_context &ctx_,
4038 const gmm::sub_interval &Ir_,
const gmm::sub_interval &In_,
4039 const mesh_fem *mfn_,
const mesh_fem **mfg_,
4040 scalar_type &coeff_,
4041 const size_type &nbpt_,
const size_type &ipt_,
bool interpolate_)
4042 : t(t_), Vr(Vr_), Vn(Vn_), ctx(ctx_), Ir(Ir_), In(In_), mfn(mfn_),
4043 mfg(mfg_), coeff(coeff_), nbpt(nbpt_), ipt(ipt_),
4044 interpolate(interpolate_) {}
4047 struct ga_instruction_vector_assembly :
public ga_instruction {
4050 const gmm::sub_interval &I;
4052 virtual int exec() {
4053 GA_DEBUG_INFO(
"Instruction: vector term assembly for " 4054 "fixed size variable");
4055 gmm::add(gmm::scaled(t.as_vector(), coeff), gmm::sub_vector(V, I));
4058 ga_instruction_vector_assembly(base_tensor &t_, base_vector &V_,
4059 const gmm::sub_interval &I_,
4060 scalar_type &coeff_)
4061 : t(t_), V(V_), I(I_), coeff(coeff_) {}
4064 struct ga_instruction_assignment :
public ga_instruction {
4067 const fem_interpolation_context &ctx;
4069 virtual int exec() {
4070 GA_DEBUG_INFO(
"Instruction: Assignement to im_data");
4071 imd->set_tensor(V, ctx.convex_num(), ctx.ii(), t);
4074 ga_instruction_assignment(base_tensor &t_, base_vector &V_,
4075 const fem_interpolation_context &ctx_,
4076 const im_data *imd_)
4077 : t(t_), V(V_), ctx(ctx_), imd(imd_) {}
4080 template <
class MAT>
4081 inline void add_elem_matrix_
4082 (MAT &K,
const std::vector<size_type> &dofs1,
4083 const std::vector<size_type> &dofs2, std::vector<size_type> &,
4084 base_vector &elem, scalar_type threshold, size_type ) {
4085 base_vector::const_iterator it = elem.cbegin();
4086 for (
const size_type &dof2 : dofs2)
4087 for (
const size_type &dof1 : dofs1) {
4088 if (gmm::abs(*it) > threshold)
4089 K(dof1, dof2) += *it;
4101 inline void add_elem_matrix_
4103 const std::vector<size_type> &dofs1,
const std::vector<size_type> &dofs2,
4104 std::vector<size_type> &dofs1_sort,
4105 base_vector &elem, scalar_type threshold, size_type N) {
4106 size_type maxest = (N+1) * std::max(dofs1.size(), dofs2.size());
4107 size_type s1 = dofs1.size(), s2 = dofs2.size();
4108 gmm::elt_rsvector_<scalar_type> ev;
4110 dofs1_sort.resize(s1);
4111 for (size_type i = 0; i < s1; ++i) {
4112 size_type j = i, k = j-1;
4113 while (j > 0 && dofs1[i] < dofs1[dofs1_sort[k]])
4114 { dofs1_sort[j] = dofs1_sort[k]; j--; k--; }
4123 base_vector::const_iterator it = elem.cbegin();
4124 for (size_type j = 0; j < s2; ++j) {
4126 std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dofs2[j]];
4127 size_type nb = col.size();
4130 col.reserve(maxest);
4131 for (size_type i = 0; i < s1; ++i) {
4132 size_type k = dofs1_sort[i]; ev.e = *(it+k);
4133 if (gmm::abs(ev.e) > threshold) { ev.c=dofs1[k]; col.push_back(ev); }
4137 for (size_type i = 0; i < s1; ++i) {
4138 size_type k = dofs1_sort[i]; ev.e = *(it+k);
4139 if (gmm::abs(ev.e) > threshold) {
4142 size_type count = nb - ind, step, l;
4144 step = count / 2; l = ind + step;
4145 if (col[l].c < ev.c) { ind = ++l; count -= step + 1; }
4149 auto itc = col.begin() + ind;
4150 if (ind != nb && itc->c == ev.c) itc->e += ev.e;
4152 if (nb - ind > 1300)
4153 GMM_WARNING2(
"Inefficient addition of element in rsvector with " 4154 << col.size() - ind <<
" non-zero entries");
4157 itc = col.begin() + ind;
4158 auto ite = col.end(); --ite;
auto itee = ite;
4159 for (; ite != itc; --ite) { --itee; *ite = *itee; }
4172 template <
class MAT = model_real_sparse_matrix>
4173 struct ga_instruction_matrix_assembly :
public ga_instruction {
4174 const base_tensor &t;
4176 const fem_interpolation_context &ctx1, &ctx2;
4177 const gmm::sub_interval &Ir1, &Ir2;
4178 const gmm::sub_interval &In1, &In2;
4179 const mesh_fem *mfn1, *mfn2;
4180 const mesh_fem **mfg1, **mfg2;
4181 const scalar_type &coeff, &alpha1, &alpha2;
4182 const size_type &nbpt, &ipt;
4185 std::vector<size_type> dofs1, dofs2, dofs1_sort;
4186 virtual int exec() {
4187 GA_DEBUG_INFO(
"Instruction: matrix term assembly");
4188 bool empty_weight = (coeff == scalar_type(0));
4189 if (ipt == 0 || interpolate) {
4190 if (empty_weight) elem.resize(0);
4191 elem.resize(t.size());
4192 if (!empty_weight) {
4193 auto itt = t.begin();
auto it = elem.begin(), ite = elem.end();
4194 scalar_type e = coeff*alpha1*alpha2;
4195 size_type nd = ((t.size()) >> 2);
4196 for (size_type i = 0; i < nd; ++i) {
4197 *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4198 *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4200 for (; it != ite;) *it++ = (*itt++) * e;
4202 }
else if (!empty_weight){
4204 auto itt = t.begin();
auto it = elem.begin(), ite = elem.end();
4205 scalar_type e = coeff*alpha1*alpha2;
4206 size_type nd = ((t.size()) >> 2);
4207 for (size_type i = 0; i < nd; ++i) {
4208 *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4209 *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4211 for (; it != ite;) *it++ += (*itt++) * e;
4214 if (ipt == nbpt-1 || interpolate) {
4215 const mesh_fem *pmf1 = mfg1 ? *mfg1 : mfn1;
4216 const mesh_fem *pmf2 = mfg2 ? *mfg2 : mfn2;
4217 bool reduced = (pmf1 && pmf1->is_reduced())
4218 || (pmf2 && pmf2->is_reduced());
4219 MAT &K = reduced ? Kr : Kn;
4220 const gmm::sub_interval &I1 = reduced ? Ir1 : In1;
4221 const gmm::sub_interval &I2 = reduced ? Ir2 : In2;
4222 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
4225 if (ninf == scalar_type(0))
return 0;
4227 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4228 size_type cv1 = pmf1 ? ctx1.convex_num() : s1;
4229 size_type cv2 = pmf2 ? ctx2.convex_num() : s2;
4232 dofs1.assign(s1, I1.first());
4234 if (!(ctx1.is_convex_num_valid()))
return 0;
4236 auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
4237 size_type qmult1 = pmf1->get_qdim();
4238 if (qmult1 > 1) qmult1 /= pmf1->fem_of_element(cv1)->target_dim();
4239 auto itd = dofs1.begin();
4241 for (
auto itt = ct1.begin(); itt != ct1.end(); ++itt)
4244 for (
auto itt = ct1.begin(); itt != ct1.end(); ++itt)
4245 for (size_type q = 0; q < qmult1; ++q)
4249 for (size_type i=0; i < s1; ++i) dofs1[i] += i;
4251 if (pmf1 == pmf2 && cv1 == cv2) {
4252 if (I1.first() == I2.first()) {
4253 add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4255 dofs2.resize(dofs1.size());
4256 for (size_type i = 0; i < dofs1.size(); ++i)
4257 dofs2[i] = dofs1[i] + I2.first() - I1.first();
4258 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4261 dofs2.assign(s2, I2.first());
4263 if (!(ctx2.is_convex_num_valid()))
return 0;
4264 N = std::max(N, ctx2.N());
4265 auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
4266 size_type qmult2 = pmf2->get_qdim();
4267 if (qmult2 > 1) qmult2 /= pmf2->fem_of_element(cv2)->target_dim();
4268 auto itd = dofs2.begin();
4270 for (
auto itt = ct2.begin(); itt != ct2.end(); ++itt)
4273 for (
auto itt = ct2.begin(); itt != ct2.end(); ++itt)
4274 for (size_type q = 0; q < qmult2; ++q)
4278 for (size_type i=0; i < s2; ++i) dofs2[i] += i;
4280 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4285 ga_instruction_matrix_assembly
4286 (
const base_tensor &t_, MAT &Kr_, MAT &Kn_,
4287 const fem_interpolation_context &ctx1_,
4288 const fem_interpolation_context &ctx2_,
4289 const gmm::sub_interval &Ir1_,
const gmm::sub_interval &In1_,
4290 const gmm::sub_interval &Ir2_,
const gmm::sub_interval &In2_,
4291 const mesh_fem *mfn1_,
const mesh_fem **mfg1_,
4292 const mesh_fem *mfn2_,
const mesh_fem **mfg2_,
4293 const scalar_type &coeff_,
4294 const scalar_type &alpha2_,
const scalar_type &alpha1_,
4295 const size_type &nbpt_,
const size_type &ipt_,
bool interpolate_)
4296 : t(t_), Kr(Kr_), Kn(Kn_), ctx1(ctx1_), ctx2(ctx2_),
4297 Ir1(Ir1_), Ir2(Ir2_), In1(In1_), In2(In2_),
4298 mfn1(mfn1_), mfn2(mfn2_), mfg1(mfg1_), mfg2(mfg2_),
4299 coeff(coeff_), alpha1(alpha1_), alpha2(alpha2_),
4300 nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_),
4301 dofs1(0), dofs2(0) {}
4304 template <
class MAT = model_real_sparse_matrix>
4305 struct ga_instruction_matrix_assembly_standard_scalar:
public ga_instruction {
4306 const base_tensor &t;
4308 const fem_interpolation_context &ctx1, &ctx2;
4309 const gmm::sub_interval &I1, &I2;
4310 const mesh_fem *pmf1, *pmf2;
4311 const scalar_type &coeff, &alpha1, &alpha2;
4312 const size_type &nbpt, &ipt;
4314 std::vector<size_type> dofs1, dofs2, dofs1_sort;
4315 virtual int exec() {
4316 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard " 4319 elem.resize(t.size());
4320 auto itt = t.begin();
auto it = elem.begin(), ite = elem.end();
4321 scalar_type e = coeff*alpha1*alpha2;
4322 size_type nd = ((t.size()) >> 2);
4323 for (size_type i = 0; i < nd; ++i) {
4324 *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4325 *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4327 for (; it != ite;) *it++ = (*itt++) * e;
4331 auto itt = t.begin();
auto it = elem.begin(), ite = elem.end();
4332 scalar_type e = coeff*alpha1*alpha2;
4333 size_type nd = ((t.size()) >> 2);
4334 for (size_type i = 0; i < nd; ++i) {
4335 *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4336 *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4338 for (; it != ite;) *it++ += (*itt++) * e;
4341 if (ipt == nbpt-1) {
4342 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
4345 if (ninf == scalar_type(0))
return 0;
4347 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num(), N=ctx1.N();
4349 auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
4350 GA_DEBUG_ASSERT(ct1.size() == t.sizes()[0],
"Internal error");
4351 dofs1.resize(ct1.size());
4352 for (size_type i = 0; i < ct1.size(); ++i)
4353 dofs1[i] = ct1[i] + I1.first();
4355 if (pmf2 == pmf1 && cv1 == cv2) {
4356 if (I1.first() == I2.first()) {
4357 add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4359 dofs2.resize(dofs1.size());
4360 for (size_type i = 0; i < dofs1.size(); ++i)
4361 dofs2[i] = dofs1[i] + I2.first() - I1.first();
4362 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4366 auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
4367 GA_DEBUG_ASSERT(ct2.size() == t.sizes()[1],
"Internal error");
4368 dofs2.resize(ct2.size());
4369 for (size_type i = 0; i < ct2.size(); ++i)
4370 dofs2[i] = ct2[i] + I2.first();
4371 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4376 ga_instruction_matrix_assembly_standard_scalar
4377 (
const base_tensor &t_, MAT &Kn_,
4378 const fem_interpolation_context &ctx1_,
4379 const fem_interpolation_context &ctx2_,
4380 const gmm::sub_interval &In1_,
const gmm::sub_interval &In2_,
4381 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
4382 const scalar_type &coeff_,
const scalar_type &alpha2_,
4383 const scalar_type &alpha1_,
4384 const size_type &nbpt_,
const size_type &ipt_)
4385 : t(t_), K(Kn_), ctx1(ctx1_), ctx2(ctx2_),
4386 I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_),
4387 coeff(coeff_), alpha1(alpha1_), alpha2(alpha2_),
4388 nbpt(nbpt_), ipt(ipt_) {}
4391 template <
class MAT = model_real_sparse_matrix>
4392 struct ga_instruction_matrix_assembly_standard_vector:
public ga_instruction {
4393 const base_tensor &t;
4395 const fem_interpolation_context &ctx1, &ctx2;
4396 const gmm::sub_interval &I1, &I2;
4397 const mesh_fem *pmf1, *pmf2;
4398 const scalar_type &coeff, &alpha1, &alpha2;
4399 const size_type &nbpt, &ipt;
4400 mutable base_vector elem;
4401 mutable std::vector<size_type> dofs1, dofs2, dofs1_sort;
4402 virtual int exec() {
4403 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard " 4406 elem.resize(t.size());
4407 auto itt = t.begin();
auto it = elem.begin(), ite = elem.end();
4408 scalar_type e = coeff*alpha1*alpha2;
4409 size_type nd = ((t.size()) >> 3);
4410 for (size_type i = 0; i < nd; ++i) {
4411 *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4412 *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4413 *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4414 *it++ = (*itt++) * e; *it++ = (*itt++) * e;
4416 for (; it != ite;) *it++ = (*itt++) * e;
4420 auto itt = t.begin();
auto it = elem.begin(), ite = elem.end();
4421 scalar_type e = coeff*alpha1*alpha2;
4422 size_type nd = ((t.size()) >> 3);
4423 for (size_type i = 0; i < nd; ++i) {
4424 *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4425 *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4426 *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4427 *it++ += (*itt++) * e; *it++ += (*itt++) * e;
4429 for (; it != ite;) *it++ += (*itt++) * e;
4432 if (ipt == nbpt-1) {
4433 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
4436 if (ninf == scalar_type(0))
return 0;
4437 size_type s1 = t.sizes()[0], s2 = t.sizes()[1], N = ctx1.N();
4439 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4441 auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
4442 size_type qmult1 = pmf1->get_qdim();
4443 if (qmult1 > 1) qmult1 /= pmf1->fem_of_element(cv1)->target_dim();
4444 dofs1.assign(s1, I1.first());
4445 auto itd = dofs1.begin();
4446 for (
auto itt = ct1.begin(); itt != ct1.end(); ++itt)
4447 for (size_type q = 0; q < qmult1; ++q)
4450 if (pmf2 == pmf1 && cv1 == cv2) {
4451 if (I1.first() == I2.first()) {
4452 add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4454 dofs2.resize(dofs1.size());
4455 for (size_type i = 0; i < dofs1.size(); ++i)
4456 dofs2[i] = dofs1[i] + I2.first() - I1.first();
4457 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4461 auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
4462 size_type qmult2 = pmf2->get_qdim();
4463 if (qmult2 > 1) qmult2 /= pmf2->fem_of_element(cv2)->target_dim();
4464 dofs2.assign(s2, I2.first());
4465 itd = dofs2.begin();
4466 for (
auto itt = ct2.begin(); itt != ct2.end(); ++itt)
4467 for (size_type q = 0; q < qmult2; ++q)
4470 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4475 ga_instruction_matrix_assembly_standard_vector
4476 (
const base_tensor &t_, MAT &Kn_,
4477 const fem_interpolation_context &ctx1_,
4478 const fem_interpolation_context &ctx2_,
4479 const gmm::sub_interval &In1_,
const gmm::sub_interval &In2_,
4480 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
4481 const scalar_type &coeff_,
const scalar_type &alpha2_,
4482 const scalar_type &alpha1_,
const size_type &nbpt_,
4483 const size_type &ipt_)
4484 : t(t_), K(Kn_), ctx1(ctx1_), ctx2(ctx2_),
4485 I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_),
4486 coeff(coeff_), alpha1(alpha1_), alpha2(alpha2_),
4487 nbpt(nbpt_), ipt(ipt_), dofs1(0), dofs2(0) {}
4490 struct ga_instruction_matrix_assembly_standard_vector_opt10_2
4491 :
public ga_instruction {
4492 const base_tensor &t;
4493 model_real_sparse_matrix &K;
4494 const fem_interpolation_context &ctx1, &ctx2;
4495 const gmm::sub_interval &I1, &I2;
4496 const mesh_fem *pmf1, *pmf2;
4497 const scalar_type &coeff, &alpha1, &alpha2;
4498 const size_type &nbpt, &ipt;
4499 mutable base_vector elem;
4500 mutable std::vector<size_type> dofs1, dofs2, dofs1_sort;
4501 virtual int exec() {
4502 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard " 4503 "vector fems optimized for format 10 qdim 2");
4504 size_type s1 = t.sizes()[0], s2 = t.sizes()[1], s1_q = 2*s1;
4505 size_type ss1 = s1/2, ss2 = s2/2;
4506 scalar_type e = coeff*alpha1*alpha2;
4508 elem.resize(ss1*ss2);
4509 auto itel = elem.begin();
4510 for (size_type j = 0; j < ss2; ++j) {
4511 auto it = t.begin() + j*s1_q;
4512 for (size_type i = 0; i < ss1; ++i, it += 2)
4513 *itel++ = (*it) * e;
4516 auto itel = elem.begin();
4517 for (size_type j = 0; j < ss2; ++j) {
4518 auto it = t.begin() + j*s1_q;
4519 for (size_type i = 0; i < ss1; ++i, it += 2)
4520 *itel++ += (*it) * e;
4523 if (ipt == nbpt-1) {
4524 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
4527 if (ninf == scalar_type(0))
return 0;
4528 size_type N = ctx1.N();
4529 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4530 size_type i1 = I1.first(), i2 = I2.first();
4532 auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
4534 for (size_type i = 0; i < ss1; ++i) dofs1[i] = i1 + ct1[i];
4536 if (pmf2 == pmf1 && cv1 == cv2) {
4538 add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf, N);
4539 for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4540 add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf, N);
4543 for (size_type i = 0; i < ss2; ++i) dofs2[i] = i2 + ct1[i];
4544 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4545 for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4546 for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
4547 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4551 auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
4553 for (size_type i = 0; i < ss2; ++i) dofs2[i] = i2 + ct2[i];
4554 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4555 for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4556 for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
4557 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4562 ga_instruction_matrix_assembly_standard_vector_opt10_2
4563 (
const base_tensor &t_, model_real_sparse_matrix &Kn_,
4564 const fem_interpolation_context &ctx1_,
4565 const fem_interpolation_context &ctx2_,
4566 const gmm::sub_interval &In1_,
const gmm::sub_interval &In2_,
4567 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
4568 const scalar_type &coeff_,
const scalar_type &alpha2_,
4569 const scalar_type &alpha1_,
const size_type &nbpt_,
4570 const size_type &ipt_)
4571 : t(t_), K(Kn_), ctx1(ctx1_), ctx2(ctx2_),
4572 I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_),
4573 coeff(coeff_), alpha1(alpha1_), alpha2(alpha2_),
4574 nbpt(nbpt_), ipt(ipt_), dofs1(0), dofs2(0) {}
4577 struct ga_instruction_matrix_assembly_standard_vector_opt10_3
4578 :
public ga_instruction {
4579 const base_tensor &t;
4580 model_real_sparse_matrix &K;
4581 const fem_interpolation_context &ctx1, &ctx2;
4582 const gmm::sub_interval &I1, &I2;
4583 const mesh_fem *pmf1, *pmf2;
4584 const scalar_type &coeff, &alpha1, &alpha2;
4585 const size_type &nbpt, &ipt;
4586 mutable base_vector elem;
4587 mutable std::vector<size_type> dofs1, dofs2, dofs1_sort;
4588 virtual int exec() {
4589 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard " 4590 "vector fems optimized for format 10 qdim 3");
4591 size_type s1 = t.sizes()[0], s2 = t.sizes()[1], s1_q = 3*s1;
4592 size_type ss1 = s1/3, ss2 = s2/3;
4593 scalar_type e = coeff*alpha1*alpha2;
4595 elem.resize(ss1*ss2);
4596 auto itel = elem.begin();
4597 for (size_type j = 0; j < ss2; ++j) {
4598 auto it = t.begin() + j*s1_q;
4599 for (size_type i = 0; i < ss1; ++i, it += 3)
4600 *itel++ = (*it) * e;
4603 auto itel = elem.begin();
4604 for (size_type j = 0; j < ss2; ++j) {
4605 auto it = t.begin() + j*s1_q;
4606 for (size_type i = 0; i < ss1; ++i, it += 3)
4607 *itel++ += (*it) * e;
4610 if (ipt == nbpt-1) {
4611 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
4614 if (ninf == scalar_type(0))
return 0;
4615 size_type N = ctx1.N();
4616 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4617 size_type i1 = I1.first(), i2 = I2.first();
4619 auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
4621 for (size_type i = 0; i < ss1; ++i) dofs1[i] = i1 + ct1[i];
4623 if (pmf2 == pmf1 && cv1 == cv2) {
4625 add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf, N);
4626 for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4627 add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf, N);
4628 for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4629 add_elem_matrix_(K, dofs1, dofs1, dofs1_sort, elem, ninf, N);
4632 for (size_type i = 0; i < ss2; ++i) dofs2[i] = i2 + ct1[i];
4633 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4634 for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4635 for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
4636 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4637 for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4638 for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
4639 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4643 auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
4645 for (size_type i = 0; i < ss2; ++i) dofs2[i] = i2 + ct2[i];
4646 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4647 for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4648 for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
4649 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4650 for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
4651 for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
4652 add_elem_matrix_(K, dofs1, dofs2, dofs1_sort, elem, ninf, N);
4657 ga_instruction_matrix_assembly_standard_vector_opt10_3
4658 (
const base_tensor &t_, model_real_sparse_matrix &Kn_,
4659 const fem_interpolation_context &ctx1_,
4660 const fem_interpolation_context &ctx2_,
4661 const gmm::sub_interval &In1_,
const gmm::sub_interval &In2_,
4662 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
4663 const scalar_type &coeff_,
const scalar_type &alpha2_,
4664 const scalar_type &alpha1_,
const size_type &nbpt_,
4665 const size_type &ipt_)
4666 : t(t_), K(Kn_), ctx1(ctx1_), ctx2(ctx2_),
4667 I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_),
4668 coeff(coeff_), alpha1(alpha1_), alpha2(alpha2_),
4669 nbpt(nbpt_), ipt(ipt_), dofs1(0), dofs2(0) {}
4681 static void add_interval_to_gis(
const ga_workspace &workspace,
4682 const std::string &varname,
4683 ga_instruction_set &gis) {
4684 if (workspace.variable_group_exists(varname)) {
4685 for (
const std::string &v : workspace.variable_group(varname))
4686 add_interval_to_gis(workspace, v, gis);
4688 if (gis.var_intervals.find(varname) == gis.var_intervals.end()) {
4689 const mesh_fem *mf = workspace.associated_mf(varname);
4690 size_type nd = mf ? mf->nb_basic_dof() :
4691 gmm::vect_size(workspace.value(varname));
4692 gis.var_intervals[varname]=gmm::sub_interval(gis.nb_dof, nd);
4695 gis.max_dof = std::max(gis.max_dof,
4696 workspace.interval_of_variable(varname).last());
4700 static void extend_variable_in_gis(
const ga_workspace &workspace,
4701 const std::string &varname,
4702 ga_instruction_set &gis) {
4703 if (workspace.variable_group_exists(varname)) {
4704 for (
const std::string &v : workspace.variable_group(varname))
4705 extend_variable_in_gis(workspace, v, gis);
4706 }
else if (gis.extended_vars.find(varname)==gis.extended_vars.end()) {
4707 const mesh_fem *mf = workspace.associated_mf(varname);
4708 if (mf->is_reduced()) {
4709 auto n = (mf->get_qdim() == 1) ? workspace.qdim(varname) : 1;
4710 base_vector U(mf->nb_basic_dof() * n);
4711 mf->extend_vector(workspace.value(varname), U);
4712 gis.really_extended_vars[varname] = U;
4713 gis.extended_vars[varname] = &(gis.really_extended_vars[varname]);
4715 gis.extended_vars[varname] = &(workspace.value(varname));
4720 static void ga_clear_node_list
4721 (pga_tree_node pnode, std::map<scalar_type,
4722 std::list<pga_tree_node> > &node_list) {
4723 std::list<pga_tree_node> &loc_node_list = node_list[pnode->hash_value];
4724 for (std::list<pga_tree_node>::iterator it = loc_node_list.begin();
4725 it != loc_node_list.end(); ) {
4726 if (*it == pnode) it = loc_node_list.erase(it);
else ++it;
4728 for (size_type i = 0; i < pnode->children.size(); ++i)
4729 ga_clear_node_list(pnode->children[i], node_list);
4732 static void ga_compile_node(
const pga_tree_node pnode,
4733 const ga_workspace &workspace,
4734 ga_instruction_set &gis,
4735 ga_instruction_set::region_mim_instructions &rmi,
4736 const mesh &m,
bool function_case,
4737 ga_if_hierarchy &if_hierarchy) {
4739 if (pnode->node_type == GA_NODE_PREDEF_FUNC ||
4740 pnode->node_type == GA_NODE_OPERATOR ||
4741 pnode->node_type == GA_NODE_SPEC_FUNC ||
4742 pnode->node_type == GA_NODE_CONSTANT ||
4743 pnode->node_type == GA_NODE_ALLINDICES ||
4744 pnode->node_type == GA_NODE_RESHAPE ||
4745 pnode->node_type == GA_NODE_SWAP_IND ||
4746 pnode->node_type == GA_NODE_IND_MOVE_LAST ||
4747 pnode->node_type == GA_NODE_CONTRACT)
return;
4751 pga_instruction pgai;
4752 ga_if_hierarchy *pif_hierarchy = &if_hierarchy;
4753 ga_if_hierarchy new_if_hierarchy;
4755 const mesh_fem *mf1 = 0, *mf2 = 0;
4756 const mesh_fem **mfg1 = 0, **mfg2 = 0;
4757 fem_interpolation_context *pctx1 = 0, *pctx2 = 0;
4758 bool tensor_to_clear =
false;
4759 bool tensor_to_adapt =
false;
4761 if (pnode->test_function_type) {
4762 if (pnode->name_test1.size())
4763 mf1 = workspace.associated_mf(pnode->name_test1);
4766 const std::string &intn1 = pnode->interpolate_name_test1;
4768 tensor_to_adapt =
true;
4769 pctx1 = &(rmi.interpolate_infos[intn1].ctx);
4770 if (workspace.variable_group_exists(pnode->name_test1)) {
4771 ga_instruction_set::variable_group_info &vgi =
4772 rmi.interpolate_infos[intn1].groups_info[pnode->name_test1];
4778 if (pnode->name_test2.size())
4779 mf2 = workspace.associated_mf(pnode->name_test2);
4782 const std::string &intn2 = pnode->interpolate_name_test2;
4784 tensor_to_adapt =
true;
4785 pctx2 = &(rmi.interpolate_infos[intn2].ctx);
4786 if (workspace.variable_group_exists(pnode->name_test2)) {
4787 ga_instruction_set::variable_group_info &vgi =
4788 rmi.interpolate_infos[intn2].groups_info[pnode->name_test2];
4798 pnode->t.set_to_original(); pnode->t.set_sparsity(0, 0);
4799 bool is_uniform =
false;
4800 if (pnode->test_function_type == 1) {
4802 pgai = std::make_shared<ga_instruction_first_ind_tensor>
4803 (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
4804 if (mf1 && mf1->is_uniform())
4805 { is_uniform =
true; pctx1->invalid_convex_num(); }
4806 }
else if (pnode->test_function_type == 2) {
4808 pgai = std::make_shared<ga_instruction_first_ind_tensor>
4809 (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
4810 if (mf2 && mf2->is_uniform())
4811 { is_uniform =
true; pctx2->invalid_convex_num(); }
4812 }
else if (pnode->test_function_type == 3) {
4813 if ((mf1 || mfg1) && (mf2 || mfg2)) {
4814 pgai = std::make_shared<ga_instruction_two_first_ind_tensor>
4815 (pnode->tensor(), *pctx1, *pctx2, pnode->qdim1, mf1, mfg1,
4816 pnode->qdim2, mf2, mfg2);
4817 if (mf1 && mf1->is_uniform() && mf2 && mf2->is_uniform()) {
4819 pctx1->invalid_convex_num();
4820 pctx2->invalid_convex_num();
4822 }
else if (mf1 || mfg1) {
4823 pgai = std::make_shared<ga_instruction_first_ind_tensor>
4824 (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
4825 if (mf1 && mf1->is_uniform())
4826 { is_uniform =
true; pctx1->invalid_convex_num(); }
4827 }
else if (mf2 || mfg2) {
4828 pgai = std::make_shared<ga_instruction_second_ind_tensor>
4829 (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
4830 if (mf2 && mf2->is_uniform())
4831 { is_uniform =
true; pctx2->invalid_convex_num(); }
4836 pnode->t.set_to_original();
4837 if (rmi.node_list.find(pnode->hash_value) != rmi.node_list.end()) {
4838 std::list<pga_tree_node> &node_list = rmi.node_list[pnode->hash_value];
4839 for (std::list<pga_tree_node>::iterator it = node_list.begin();
4840 it != node_list.end(); ++it) {
4844 if (sub_tree_are_equal(pnode, *it, workspace, 1)) {
4845 pnode->t.set_to_copy((*it)->t);
4848 if (sub_tree_are_equal(pnode, *it, workspace, 2)) {
4850 if (pnode->nb_test_functions() == 2) {
4854 else { rmi.instructions.push_back(std::move(pgai)); }
4856 pgai = std::make_shared<ga_instruction_transpose_test>
4857 (pnode->tensor(), (*it)->tensor());
4858 rmi.instructions.push_back(std::move(pgai));
4860 "No use of X is allowed in scalar functions");
4862 pnode->t.set_to_copy((*it)->t);
4866 std::stringstream ss;
4867 ss <<
"Detected wrong equivalent nodes: ";
4868 ga_print_node(pnode, ss);
4869 ss <<
" and "; ga_print_node(*it, ss);
4870 ss <<
" (no problem, but hash code would be adapted) " << endl;
4871 GMM_TRACE2(ss.str());
4876 if (is_uniform) { pgai->exec(); }
4878 if (tensor_to_adapt)
4879 rmi.instructions.push_back(std::move(pgai));
4881 rmi.elt_instructions.push_back(std::move(pgai));
4885 size_type interpolate_filter_inst = rmi.instructions.size();
4886 if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
4887 pgai = pga_instruction();
4888 rmi.instructions.push_back(std::move(pgai));
4889 if_hierarchy.increment();
4890 new_if_hierarchy.child_of(if_hierarchy);
4891 pif_hierarchy = &new_if_hierarchy;
4894 for (size_type i = 0; i < pnode->children.size(); ++i)
4895 ga_compile_node(pnode->children[i], workspace, gis, rmi, m,
4896 function_case, *pif_hierarchy);
4898 if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
4899 const std::string &intn = pnode->interpolate_name;
4900 ga_instruction_set::interpolate_info &inin = rmi.interpolate_infos[intn];
4901 pgai = std::make_shared<ga_instruction_interpolate_filter>
4902 (pnode->tensor(), inin, pnode->nbc1,
4903 int(rmi.instructions.size() - interpolate_filter_inst));
4904 rmi.instructions[interpolate_filter_inst].swap(pgai);
4905 pgai = std::make_shared<ga_instruction_copy_tensor>
4906 (pnode->tensor(), pnode->children[0]->tensor());
4907 rmi.instructions.push_back(std::move(pgai));
4908 ga_clear_node_list(pnode->children[0], rmi.node_list);
4911 static scalar_type minus = -scalar_type(1);
4912 size_type nbch = pnode->children.size();
4913 pga_tree_node child0 = (nbch > 0) ? pnode->children[0] : 0;
4914 pga_tree_node child1 = (nbch > 1) ? pnode->children[1] : 0;
4915 bgeot::multi_index mi;
4916 const bgeot::multi_index &size0 = child0 ? child0->t.sizes() : mi;
4918 size_type dim0 = child0 ? child0->tensor_order() : 0;
4919 size_type dim1 = child1 ? child1->tensor_order() : 0;
4921 switch (pnode->node_type) {
4923 case GA_NODE_PREDEF_FUNC:
case GA_NODE_OPERATOR:
case GA_NODE_SPEC_FUNC:
4924 case GA_NODE_CONSTANT:
case GA_NODE_ALLINDICES:
case GA_NODE_ZERO:
4925 case GA_NODE_RESHAPE:
case GA_NODE_SWAP_IND:
case GA_NODE_IND_MOVE_LAST:
4926 case GA_NODE_CONTRACT:
case GA_NODE_INTERPOLATE_FILTER:
4930 GMM_ASSERT1(!function_case,
4931 "No use of X is allowed in scalar functions");
4933 GA_DEBUG_ASSERT(pnode->tensor().size() == 1,
"dimensions mismatch");
4934 GMM_ASSERT1(pnode->nbc1 <= m.dim(),
4935 "Bad index for X in expression");
4936 pgai = std::make_shared<ga_instruction_X_component>
4937 (pnode->tensor()[0], gis.ctx, pnode->nbc1-1);
4939 if (pnode->tensor().size() != m.dim())
4940 pnode->init_vector_tensor(m.dim());
4941 pgai = std::make_shared<ga_instruction_X>(pnode->tensor(), gis.ctx);
4943 rmi.instructions.push_back(std::move(pgai));
4946 case GA_NODE_ELT_SIZE:
4947 GMM_ASSERT1(!function_case,
4948 "No use of element_size is allowed in functions");
4949 if (pnode->tensor().size() != 1) pnode->init_scalar_tensor(0);
4950 pgai = std::make_shared<ga_instruction_element_size>
4951 (pnode->tensor(), gis.elt_size);
4952 gis.need_elt_size =
true;
4953 rmi.instructions.push_back(std::move(pgai));
4957 GMM_ASSERT1(!function_case,
4958 "No use of element_K is allowed in functions");
4959 pgai = std::make_shared<ga_instruction_element_K>(pnode->tensor(),
4961 rmi.instructions.push_back(std::move(pgai));
4965 GMM_ASSERT1(!function_case,
4966 "No use of element_B is allowed in functions");
4967 pgai = std::make_shared<ga_instruction_element_B>(pnode->tensor(),
4969 rmi.instructions.push_back(std::move(pgai));
4972 case GA_NODE_NORMAL:
4974 GMM_ASSERT1(!function_case,
4975 "No use of Normal is allowed in functions");
4976 if (pnode->tensor().size() != m.dim())
4977 pnode->init_vector_tensor(m.dim());
4978 const mesh_im_level_set *mimls
4979 =
dynamic_cast<const mesh_im_level_set *
>(rmi.im);
4980 if (mimls && mimls->location()==mesh_im_level_set::INTEGRATE_BOUNDARY) {
4982 pgai = std::make_shared<ga_instruction_level_set_normal_vector>
4983 (pnode->tensor(), mimls, gis.ctx);
4984 rmi.instructions.push_back(std::move(pgai));
4986 pgai = std::make_shared<ga_instruction_copy_Normal>
4987 (pnode->tensor(), gis.Normal);
4988 rmi.instructions.push_back(std::move(pgai));
4993 case GA_NODE_INTERPOLATE_X:
4994 case GA_NODE_INTERPOLATE_NORMAL:
4995 GMM_ASSERT1(!function_case,
4996 "No use of Interpolate is allowed in functions");
4997 if (pnode->tensor().size() != m.dim())
4998 pnode->init_vector_tensor(m.dim());
4999 if (pnode->node_type == GA_NODE_INTERPOLATE_X)
5000 pgai = std::make_shared<ga_instruction_copy_small_vect>
5002 rmi.interpolate_infos[pnode->interpolate_name].pt_y);
5003 else if (pnode->node_type == GA_NODE_INTERPOLATE_NORMAL)
5004 pgai = std::make_shared<ga_instruction_copy_Normal>
5006 rmi.interpolate_infos[pnode->interpolate_name].Normal);
5007 rmi.instructions.push_back(std::move(pgai));
5010 case GA_NODE_VAL:
case GA_NODE_GRAD:
5011 case GA_NODE_HESS:
case GA_NODE_DIVERG:
5012 case GA_NODE_ELEMENTARY_VAL:
case GA_NODE_ELEMENTARY_GRAD:
5013 case GA_NODE_ELEMENTARY_HESS:
case GA_NODE_ELEMENTARY_DIVERG:
5014 case GA_NODE_XFEM_PLUS_VAL:
case GA_NODE_XFEM_PLUS_GRAD:
5015 case GA_NODE_XFEM_PLUS_HESS:
case GA_NODE_XFEM_PLUS_DIVERG:
5016 case GA_NODE_XFEM_MINUS_VAL:
case GA_NODE_XFEM_MINUS_GRAD:
5017 case GA_NODE_XFEM_MINUS_HESS:
case GA_NODE_XFEM_MINUS_DIVERG:
5018 if (function_case) {
5019 GMM_ASSERT1(pnode->node_type != GA_NODE_ELEMENTARY_VAL &&
5020 pnode->node_type != GA_NODE_ELEMENTARY_GRAD &&
5021 pnode->node_type != GA_NODE_ELEMENTARY_HESS &&
5022 pnode->node_type != GA_NODE_ELEMENTARY_DIVERG,
5023 "No elementary transformation is allowed in functions");
5024 GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_PLUS_VAL &&
5025 pnode->node_type != GA_NODE_XFEM_PLUS_GRAD &&
5026 pnode->node_type != GA_NODE_XFEM_PLUS_HESS &&
5027 pnode->node_type != GA_NODE_XFEM_PLUS_DIVERG,
5028 "Xfem_plus not allowed in functions");
5029 GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_MINUS_VAL &&
5030 pnode->node_type != GA_NODE_XFEM_MINUS_GRAD &&
5031 pnode->node_type != GA_NODE_XFEM_MINUS_HESS &&
5032 pnode->node_type != GA_NODE_XFEM_MINUS_DIVERG,
5033 "Xfem_plus not allowed in functions");
5034 const mesh_fem *mf = workspace.associated_mf(pnode->name);
5035 const im_data *imd = workspace.associated_im_data(pnode->name);
5036 GMM_ASSERT1(!mf,
"No fem expression is allowed in function expression");
5037 GMM_ASSERT1(!imd,
"No integration method data is allowed in " 5038 "function expression");
5039 if (gmm::vect_size(workspace.value(pnode->name)) == 1)
5040 pgai = std::make_shared<ga_instruction_copy_scalar>
5041 (pnode->tensor()[0], (workspace.value(pnode->name))[0]);
5043 pgai = std::make_shared<ga_instruction_copy_vect>
5044 (pnode->tensor().as_vector(), workspace.value(pnode->name));
5045 rmi.instructions.push_back(std::move(pgai));
5047 const mesh_fem *mf = workspace.associated_mf(pnode->name);
5048 const im_data *imd = workspace.associated_im_data(pnode->name);
5051 pgai = std::make_shared<ga_instruction_extract_local_im_data>
5052 (pnode->tensor(), *imd, workspace.value(pnode->name),
5053 gis.pai, gis.ctx, workspace.qdim(pnode->name));
5054 rmi.instructions.push_back(std::move(pgai));
5056 GMM_ASSERT1(mf,
"Internal error");
5058 GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
5059 "The finite element of variable " << pnode->name <<
5060 " has to be defined on the same mesh than the " 5061 "integration method or interpolation used");
5064 if (rmi.local_dofs.count(pnode->name) == 0) {
5065 rmi.local_dofs[pnode->name] = base_vector(1);
5066 extend_variable_in_gis(workspace, pnode->name, gis);
5068 size_type qmult2 = mf->get_qdim();
5069 if (qmult2 > 1 && !(mf->is_uniformly_vectorized()))
5071 pgai = std::make_shared<ga_instruction_slice_local_dofs>
5072 (*mf, *(gis.extended_vars[pnode->name]), gis.ctx,
5073 rmi.local_dofs[pnode->name],
5074 workspace.qdim(pnode->name) / mf->get_qdim(), qmult2);
5075 rmi.elt_instructions.push_back(std::move(pgai));
5079 if (rmi.pfps.count(mf) == 0) {
5081 pgai = std::make_shared<ga_instruction_update_pfp>
5082 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
5083 if (mf->is_uniform())
5084 rmi.begin_instructions.push_back(std::move(pgai));
5086 rmi.instructions.push_back(std::move(pgai));
5090 pgai = pga_instruction();
5091 switch (pnode->node_type) {
5092 case GA_NODE_VAL:
case GA_NODE_ELEMENTARY_VAL:
5093 if (rmi.base.count(mf) == 0 ||
5094 !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
5095 rmi.base_hierarchy[mf].push_back(if_hierarchy);
5096 pgai = std::make_shared<ga_instruction_val_base>
5097 (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5100 case GA_NODE_XFEM_PLUS_VAL:
5101 if (rmi.xfem_plus_base.count(mf) == 0 ||
5102 !(if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))) {
5103 rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
5104 pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
5105 (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5108 case GA_NODE_XFEM_MINUS_VAL:
5109 if (rmi.xfem_minus_base.count(mf) == 0 ||
5110 !(if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))) {
5111 rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
5112 pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
5113 (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5116 case GA_NODE_GRAD:
case GA_NODE_DIVERG:
5117 case GA_NODE_ELEMENTARY_GRAD:
case GA_NODE_ELEMENTARY_DIVERG:
5118 if (rmi.grad.count(mf) == 0 ||
5119 !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
5120 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
5121 pgai = std::make_shared<ga_instruction_grad_base>
5122 (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5125 case GA_NODE_XFEM_PLUS_GRAD:
case GA_NODE_XFEM_PLUS_DIVERG:
5126 if (rmi.xfem_plus_grad.count(mf) == 0 ||
5127 !(if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))) {
5128 rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
5129 pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
5130 (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5133 case GA_NODE_XFEM_MINUS_GRAD:
case GA_NODE_XFEM_MINUS_DIVERG:
5134 if (rmi.xfem_minus_grad.count(mf) == 0 ||
5135 !(if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))) {
5136 rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
5137 pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
5138 (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5141 case GA_NODE_HESS:
case GA_NODE_ELEMENTARY_HESS:
5142 if (rmi.hess.count(mf) == 0 ||
5143 !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
5144 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
5145 pgai = std::make_shared<ga_instruction_hess_base>
5146 (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5149 case GA_NODE_XFEM_PLUS_HESS:
5150 if (rmi.xfem_plus_hess.count(mf) == 0 ||
5151 !(if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))) {
5152 rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
5153 pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
5154 (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5157 case GA_NODE_XFEM_MINUS_HESS:
5158 if (rmi.xfem_minus_hess.count(mf) == 0 ||
5159 !(if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))) {
5160 rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
5161 pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
5162 (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5166 default : GMM_ASSERT1(
false,
"Internal error");
5168 if (pgai) rmi.instructions.push_back(std::move(pgai));
5171 switch (pnode->node_type) {
5173 pgai = std::make_shared<ga_instruction_val>
5174 (pnode->tensor(), rmi.base[mf], rmi.local_dofs[pnode->name],
5175 workspace.qdim(pnode->name));
5178 pgai = std::make_shared<ga_instruction_grad>
5179 (pnode->tensor(), rmi.grad[mf],
5180 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5183 pgai = std::make_shared<ga_instruction_hess>
5184 (pnode->tensor(), rmi.hess[mf],
5185 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5187 case GA_NODE_DIVERG:
5188 pgai = std::make_shared<ga_instruction_diverg>
5189 (pnode->tensor(), rmi.grad[mf],
5190 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5192 case GA_NODE_XFEM_PLUS_VAL:
5193 pgai = std::make_shared<ga_instruction_val>
5194 (pnode->tensor(), rmi.xfem_plus_base[mf],
5195 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5197 case GA_NODE_XFEM_PLUS_GRAD:
5198 pgai = std::make_shared<ga_instruction_grad>
5199 (pnode->tensor(), rmi.xfem_plus_grad[mf],
5200 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5202 case GA_NODE_XFEM_PLUS_HESS:
5203 pgai = std::make_shared<ga_instruction_hess>
5204 (pnode->tensor(), rmi.xfem_plus_hess[mf],
5205 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5207 case GA_NODE_XFEM_PLUS_DIVERG:
5208 pgai = std::make_shared<ga_instruction_diverg>
5209 (pnode->tensor(), rmi.xfem_plus_grad[mf],
5210 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5212 case GA_NODE_XFEM_MINUS_VAL:
5213 pgai = std::make_shared<ga_instruction_val>
5214 (pnode->tensor(), rmi.xfem_minus_base[mf],
5215 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5217 case GA_NODE_XFEM_MINUS_GRAD:
5218 pgai = std::make_shared<ga_instruction_grad>
5219 (pnode->tensor(), rmi.xfem_minus_grad[mf],
5220 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5222 case GA_NODE_XFEM_MINUS_HESS:
5223 pgai = std::make_shared<ga_instruction_hess>
5224 (pnode->tensor(), rmi.xfem_minus_hess[mf],
5225 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5227 case GA_NODE_XFEM_MINUS_DIVERG:
5228 pgai = std::make_shared<ga_instruction_diverg>
5229 (pnode->tensor(), rmi.xfem_minus_grad[mf],
5230 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5232 case GA_NODE_ELEMENTARY_VAL:
5234 ga_instruction_set::elementary_trans_info &eti
5235 = rmi.elementary_trans_infos[pnode->elementary_name];
5237 std::make_shared<ga_instruction_elementary_transformation_val>
5238 (pnode->tensor(), rmi.base[mf],
5239 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name),
5240 workspace.elementary_transformation(pnode->elementary_name),
5241 *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5244 case GA_NODE_ELEMENTARY_GRAD:
5246 ga_instruction_set::elementary_trans_info &eti
5247 = rmi.elementary_trans_infos[pnode->elementary_name];
5249 std::make_shared<ga_instruction_elementary_transformation_grad>
5250 (pnode->tensor(), rmi.grad[mf],
5251 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name),
5252 workspace.elementary_transformation(pnode->elementary_name),
5253 *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5256 case GA_NODE_ELEMENTARY_HESS:
5258 ga_instruction_set::elementary_trans_info &eti
5259 = rmi.elementary_trans_infos[pnode->elementary_name];
5261 std::make_shared<ga_instruction_elementary_transformation_hess>
5262 (pnode->tensor(), rmi.hess[mf],
5263 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name),
5264 workspace.elementary_transformation(pnode->elementary_name),
5265 *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5268 case GA_NODE_ELEMENTARY_DIVERG:
5270 ga_instruction_set::elementary_trans_info &eti
5271 = rmi.elementary_trans_infos[pnode->elementary_name];
5273 std::make_shared<ga_instruction_elementary_transformation_diverg>
5274 (pnode->tensor(), rmi.grad[mf],
5275 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name),
5276 workspace.elementary_transformation(pnode->elementary_name),
5277 *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5282 rmi.instructions.push_back(std::move(pgai));
5287 case GA_NODE_INTERPOLATE_VAL:
case GA_NODE_INTERPOLATE_GRAD:
5288 case GA_NODE_INTERPOLATE_HESS:
case GA_NODE_INTERPOLATE_DIVERG:
5290 extend_variable_in_gis(workspace, pnode->name, gis);
5292 const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
5293 const std::string &intn = pnode->interpolate_name;
5294 const base_vector *Un = gis.extended_vars[pnode->name], **Ug = 0;
5295 fem_interpolation_context *pctx = &(rmi.interpolate_infos[intn].ctx);
5296 const mesh **m2 = &(rmi.interpolate_infos[intn].m);
5297 if (workspace.variable_group_exists(pnode->name)) {
5298 ga_instruction_set::variable_group_info &vgi =
5299 rmi.interpolate_infos[intn].groups_info[pnode->name];
5300 mfg = &(vgi.mf); mfn = 0; Ug = &(vgi.U); Un = 0;
5303 if (pnode->node_type == GA_NODE_INTERPOLATE_VAL) {
5305 pgai = std::make_shared<ga_instruction_interpolate_val>
5306 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
5307 workspace.qdim(pnode->name),
5308 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
5309 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD) {
5311 pgai = std::make_shared<ga_instruction_interpolate_grad>
5312 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
5313 workspace.qdim(pnode->name),
5314 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
5315 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS) {
5317 pgai = std::make_shared<ga_instruction_interpolate_hess>
5318 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
5319 workspace.qdim(pnode->name),
5320 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
5322 pgai = std::make_shared<ga_instruction_interpolate_diverg>
5323 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
5324 workspace.qdim(pnode->name),
5325 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
5327 rmi.instructions.push_back(std::move(pgai));
5331 case GA_NODE_INTERPOLATE_DERIVATIVE:
5332 GMM_ASSERT1(!function_case,
5333 "No use of Interpolate is allowed in functions");
5334 pgai = std::make_shared<ga_instruction_copy_tensor_possibly_void>
5336 rmi.interpolate_infos[pnode->interpolate_name_der]
5337 .derivatives[var_trans_pair(pnode->name, pnode->interpolate_name)]);
5338 rmi.instructions.push_back(std::move(pgai));
5341 case GA_NODE_VAL_TEST:
case GA_NODE_GRAD_TEST:
5342 case GA_NODE_HESS_TEST:
case GA_NODE_DIVERG_TEST:
5343 case GA_NODE_ELEMENTARY_VAL_TEST:
case GA_NODE_ELEMENTARY_GRAD_TEST:
5344 case GA_NODE_ELEMENTARY_HESS_TEST:
case GA_NODE_ELEMENTARY_DIVERG_TEST:
5345 case GA_NODE_XFEM_PLUS_VAL_TEST:
case GA_NODE_XFEM_PLUS_GRAD_TEST:
5346 case GA_NODE_XFEM_PLUS_HESS_TEST:
case GA_NODE_XFEM_PLUS_DIVERG_TEST:
5347 case GA_NODE_XFEM_MINUS_VAL_TEST:
case GA_NODE_XFEM_MINUS_GRAD_TEST:
5348 case GA_NODE_XFEM_MINUS_HESS_TEST:
case GA_NODE_XFEM_MINUS_DIVERG_TEST:
5352 const mesh_fem *mf = workspace.associated_mf(pnode->name);
5354 GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
5355 "The finite element of variable " << pnode->name <<
5356 " and the applied integration method have to be" 5357 " defined on the same mesh");
5360 if (rmi.pfps.count(mf) == 0) {
5362 pgai = std::make_shared<ga_instruction_update_pfp>
5363 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
5365 rmi.begin_instructions.push_back(std::move(pgai));
5367 rmi.instructions.push_back(std::move(pgai));
5371 pgai = pga_instruction();
5372 switch (pnode->node_type) {
5373 case GA_NODE_VAL_TEST:
case GA_NODE_ELEMENTARY_VAL_TEST:
5374 if (rmi.base.find(mf) == rmi.base.end() ||
5375 !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
5376 rmi.base_hierarchy[mf].push_back(if_hierarchy);
5377 pgai = std::make_shared<ga_instruction_val_base>
5378 (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5381 case GA_NODE_XFEM_PLUS_VAL_TEST:
5382 if (rmi.xfem_plus_base.find(mf) == rmi.xfem_plus_base.end() ||
5383 !(if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))) {
5384 rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
5385 pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
5386 (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5389 case GA_NODE_XFEM_MINUS_VAL_TEST:
5390 if (rmi.xfem_minus_base.find(mf) == rmi.xfem_minus_base.end() ||
5391 !(if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))) {
5392 rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
5393 pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
5394 (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5397 case GA_NODE_GRAD_TEST:
case GA_NODE_DIVERG_TEST:
5398 case GA_NODE_ELEMENTARY_GRAD_TEST:
5399 case GA_NODE_ELEMENTARY_DIVERG_TEST:
5400 if (rmi.grad.find(mf) == rmi.grad.end() ||
5401 !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
5402 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
5403 pgai = std::make_shared<ga_instruction_grad_base>
5404 (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5407 case GA_NODE_XFEM_PLUS_GRAD_TEST:
case GA_NODE_XFEM_PLUS_DIVERG_TEST:
5408 if (rmi.xfem_plus_grad.find(mf) == rmi.xfem_plus_grad.end() ||
5409 !(if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))) {
5410 rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
5411 pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
5412 (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5415 case GA_NODE_XFEM_MINUS_GRAD_TEST:
5416 case GA_NODE_XFEM_MINUS_DIVERG_TEST:
5417 if (rmi.xfem_minus_grad.find(mf) == rmi.xfem_minus_grad.end() ||
5418 !(if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))) {
5419 rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
5420 pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
5421 (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5424 case GA_NODE_HESS_TEST:
case GA_NODE_ELEMENTARY_HESS_TEST:
5425 if (rmi.hess.count(mf) == 0 ||
5426 !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
5427 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
5428 pgai = std::make_shared<ga_instruction_hess_base>
5429 (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5432 case GA_NODE_XFEM_PLUS_HESS_TEST:
5433 if (rmi.xfem_plus_hess.count(mf) == 0 ||
5434 !(if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
5436 rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
5437 pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
5438 (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5441 case GA_NODE_XFEM_MINUS_HESS_TEST:
5442 if (rmi.xfem_minus_hess.find(mf) == rmi.xfem_minus_hess.end() ||
5443 !(if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))) {
5444 rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
5445 pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
5446 (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5450 default : GMM_ASSERT1(
false,
"Internal error");
5452 if (pgai) rmi.instructions.push_back(std::move(pgai));
5455 switch(pnode->node_type) {
5456 case GA_NODE_VAL_TEST:
5458 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
5459 pnode->t.set_sparsity(1, mf->get_qdim());
5460 tensor_to_clear =
true;
5461 pgai = std::make_shared<ga_instruction_copy_vect_val_base>
5462 (pnode->tensor(), rmi.base[mf], mf->get_qdim());
5464 pgai = std::make_shared<ga_instruction_copy_val_base>
5465 (pnode->tensor(), rmi.base[mf], mf->get_qdim());
5468 case GA_NODE_GRAD_TEST:
5470 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
5471 pnode->t.set_sparsity(2, mf->get_qdim());
5472 tensor_to_clear =
true;
5473 pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
5474 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
5476 pgai = std::make_shared<ga_instruction_copy_grad_base>
5477 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
5480 case GA_NODE_HESS_TEST:
5482 pgai = std::make_shared<ga_instruction_copy_hess_base>
5483 (pnode->tensor(), rmi.hess[mf], mf->get_qdim());
5484 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
5485 pnode->t.set_sparsity(3, mf->get_qdim());
5487 case GA_NODE_DIVERG_TEST:
5489 pgai = std::make_shared<ga_instruction_copy_diverg_base>
5490 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
5492 case GA_NODE_XFEM_PLUS_VAL_TEST:
5494 pgai = std::make_shared<ga_instruction_copy_val_base>
5495 (pnode->tensor(), rmi.xfem_plus_base[mf], mf->get_qdim());
5496 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
5497 pnode->t.set_sparsity(1, mf->get_qdim());
5499 case GA_NODE_XFEM_PLUS_GRAD_TEST:
5501 pgai = std::make_shared<ga_instruction_copy_grad_base>
5502 (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
5503 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
5504 pnode->t.set_sparsity(2, mf->get_qdim());
5506 case GA_NODE_XFEM_PLUS_HESS_TEST:
5508 pgai = std::make_shared<ga_instruction_copy_hess_base>
5509 (pnode->tensor(), rmi.xfem_plus_hess[mf], mf->get_qdim());
5510 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
5511 pnode->t.set_sparsity(3, mf->get_qdim());
5513 case GA_NODE_XFEM_PLUS_DIVERG_TEST:
5515 pgai = std::make_shared<ga_instruction_copy_diverg_base>
5516 (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
5518 case GA_NODE_XFEM_MINUS_VAL_TEST:
5520 pgai = std::make_shared<ga_instruction_copy_val_base>
5521 (pnode->tensor(), rmi.xfem_minus_base[mf], mf->get_qdim());
5522 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
5523 pnode->t.set_sparsity(1, mf->get_qdim());
5525 case GA_NODE_XFEM_MINUS_GRAD_TEST:
5527 pgai = std::make_shared<ga_instruction_copy_grad_base>
5528 (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
5529 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
5530 pnode->t.set_sparsity(2, mf->get_qdim());
5532 case GA_NODE_XFEM_MINUS_HESS_TEST:
5534 pgai = std::make_shared<ga_instruction_copy_hess_base>
5535 (pnode->tensor(), rmi.xfem_minus_hess[mf], mf->get_qdim());
5536 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
5537 pnode->t.set_sparsity(3, mf->get_qdim());
5539 case GA_NODE_XFEM_MINUS_DIVERG_TEST:
5541 pgai = std::make_shared<ga_instruction_copy_diverg_base>
5542 (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
5544 case GA_NODE_ELEMENTARY_VAL_TEST:
5546 ga_instruction_set::elementary_trans_info &eti
5547 = rmi.elementary_trans_infos[pnode->elementary_name];
5549 std::make_shared<ga_instruction_elementary_transformation_val_base>
5550 (pnode->tensor(), rmi.base[mf], mf->get_qdim(),
5551 workspace.elementary_transformation(pnode->elementary_name),
5552 *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5555 case GA_NODE_ELEMENTARY_GRAD_TEST:
5557 ga_instruction_set::elementary_trans_info &eti
5558 = rmi.elementary_trans_infos[pnode->elementary_name];
5560 std::make_shared<ga_instruction_elementary_transformation_grad_base>
5561 (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
5562 workspace.elementary_transformation(pnode->elementary_name),
5563 *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5566 case GA_NODE_ELEMENTARY_HESS_TEST:
5568 ga_instruction_set::elementary_trans_info &eti
5569 = rmi.elementary_trans_infos[pnode->elementary_name];
5571 std::make_shared<ga_instruction_elementary_transformation_hess_base>
5572 (pnode->tensor(), rmi.hess[mf], mf->get_qdim(),
5573 workspace.elementary_transformation(pnode->elementary_name),
5574 *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5577 case GA_NODE_ELEMENTARY_DIVERG_TEST:
5579 ga_instruction_set::elementary_trans_info &eti
5580 = rmi.elementary_trans_infos[pnode->elementary_name];
5582 std::make_shared<ga_instruction_elementary_transformation_diverg_base>
5583 (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
5584 workspace.elementary_transformation(pnode->elementary_name),
5585 *mf, gis.ctx, eti.M, &(eti.mf), eti.icv);
5590 if (pgai) rmi.instructions.push_back(std::move(pgai));
5592 add_interval_to_gis(workspace, pnode->name, gis);
5596 case GA_NODE_INTERPOLATE_VAL_TEST:
case GA_NODE_INTERPOLATE_GRAD_TEST:
5597 case GA_NODE_INTERPOLATE_HESS_TEST:
case GA_NODE_INTERPOLATE_DIVERG_TEST:
5599 const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
5600 const std::string &intn = pnode->interpolate_name;
5601 const mesh **m2 = &(rmi.interpolate_infos[intn].m);
5602 if (workspace.variable_group_exists(pnode->name)) {
5603 ga_instruction_set::variable_group_info &vgi =
5604 rmi.interpolate_infos[intn].groups_info[pnode->name];
5605 mfg = &(vgi.mf); mfn = 0;
5608 if (pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST) {
5610 pgai = std::make_shared<ga_instruction_interpolate_val_base>
5611 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
5612 workspace.qdim(pnode->name), rmi.interpolate_infos[intn],
5614 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST) {
5616 pgai = std::make_shared<ga_instruction_interpolate_grad_base>
5617 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
5618 workspace.qdim(pnode->name),
5619 rmi.interpolate_infos[intn], gis.fp_pool);
5620 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST) {
5622 pgai = std::make_shared<ga_instruction_interpolate_hess_base>
5623 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
5624 workspace.qdim(pnode->name),
5625 rmi.interpolate_infos[intn], gis.fp_pool);
5628 pgai = std::make_shared<ga_instruction_interpolate_diverg_base>
5629 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
5630 workspace.qdim(pnode->name),
5631 rmi.interpolate_infos[intn], gis.fp_pool);
5633 rmi.instructions.push_back(std::move(pgai));
5634 add_interval_to_gis(workspace, pnode->name, gis);
5639 switch(pnode->op_type) {
5642 if (pnode->tensor().size() == 1) {
5643 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
5644 "Internal error: child0 not scalar");
5645 GA_DEBUG_ASSERT(child1->tensor().size() == 1,
5646 "Internal error: child1 not scalar");
5647 pgai = std::make_shared<ga_instruction_scalar_add>
5648 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
5650 pgai = std::make_shared<ga_instruction_add>
5651 (pnode->tensor(), child0->tensor(), child1->tensor());
5653 if (child0->t.sparsity() == child1->t.sparsity()
5654 && child0->t.qdim() == child1->t.qdim())
5655 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5656 rmi.instructions.push_back(std::move(pgai));
5660 if (pnode->tensor().size() == 1) {
5661 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
5662 "Internal error: child0 not scalar");
5663 GA_DEBUG_ASSERT(child1->tensor().size() == 1,
5664 "Internal error: child1 not scalar");
5665 pgai = std::make_shared<ga_instruction_scalar_sub>
5666 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
5668 pgai = std::make_shared<ga_instruction_sub>
5669 (pnode->tensor(), child0->tensor(), child1->tensor());
5671 if (child0->t.sparsity() == child1->t.sparsity()
5672 && child0->t.qdim() == child1->t.qdim())
5673 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5674 rmi.instructions.push_back(std::move(pgai));
5677 case GA_UNARY_MINUS:
5678 if (pnode->tensor().size() == 1) {
5679 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
"Internal error");
5680 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
5681 (pnode->tensor()[0], child0->tensor()[0], minus);
5683 pgai = std::make_shared<ga_instruction_scalar_mult>
5684 (pnode->tensor(), child0->tensor(), minus);
5686 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5687 rmi.instructions.push_back(std::move(pgai));
5691 case GA_DOT:
case GA_COLON:
case GA_MULT:
5693 size_type tps0 = child0->tensor_proper_size();
5694 size_type tps1 = child1->tensor_proper_size();
5695 size_type s1 = (tps0 * tps1) / pnode->tensor_proper_size();
5696 size_type s2 =
size_type(round(sqrt(scalar_type(s1))));
5698 pgai = pga_instruction();
5699 if ((pnode->op_type == GA_DOT && dim1 <= 1) ||
5700 pnode->op_type == GA_COLON ||
5701 (pnode->op_type == GA_MULT && dim0 == 4) ||
5702 (pnode->op_type == GA_MULT && dim1 <= 1) ||
5703 child0->tensor().size() == 1 || tps1 == 1) {
5705 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
5706 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
5707 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
5709 else if (child0->tensor().size() == 1) {
5710 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
5711 pgai = std::make_shared<ga_instruction_scalar_mult>
5712 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
5714 else if (child1->tensor().size() == 1) {
5715 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5716 pgai = std::make_shared<ga_instruction_scalar_mult>
5717 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
5719 else if (pnode->test_function_type < 3) {
5722 pgai = ga_uniform_instruction_simple_tmult
5723 (pnode->tensor(), child0->tensor(), child1->tensor());
5725 pgai = std::make_shared<ga_instruction_simple_tmult>
5726 (pnode->tensor(), child0->tensor(), child1->tensor());
5730 pgai = ga_uniform_instruction_simple_tmult
5731 (pnode->tensor(), child1->tensor(), child0->tensor());
5733 pgai = std::make_shared<ga_instruction_simple_tmult>
5734 (pnode->tensor(), child1->tensor(), child0->tensor());
5735 }
else if (is_uniform)
5736 pgai = ga_uniform_instruction_contraction_switch
5737 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
5739 pgai = ga_instruction_contraction_switch
5740 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
5743 if (child1->test_function_type == 1 ||
5744 child1->test_function_type == 3) {
5745 if (child1->test_function_type == 3 ||
5746 child1->tensor_proper_size() <= s2) {
5749 pgai = ga_uniform_instruction_simple_tmult
5750 (pnode->tensor(), child1->tensor(), child0->tensor());
5752 pgai = std::make_shared<ga_instruction_simple_tmult>
5753 (pnode->tensor(), child1->tensor(), child0->tensor());
5754 }
else if (is_uniform)
5755 pgai = ga_uniform_instruction_contraction_switch
5756 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
5758 pgai = ga_instruction_contraction_switch
5759 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
5761 pgai = std::make_shared<ga_instruction_spec_contraction>
5762 (pnode->tensor(), child1->tensor(), child0->tensor(), s2);
5763 }
else if (child1->test_function_type == 0 ||
5764 (child0->tensor_proper_size() == s2 &&
5765 child1->tensor_proper_size() == s2)) {
5768 pgai = ga_uniform_instruction_simple_tmult
5769 (pnode->tensor(), child0->tensor(), child1->tensor());
5771 pgai = std::make_shared<ga_instruction_simple_tmult>
5772 (pnode->tensor(), child0->tensor(), child1->tensor());
5775 pgai = ga_uniform_instruction_contraction_switch
5776 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
5778 pgai = ga_instruction_contraction_switch
5779 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
5782 if (child0->tensor_proper_size() == s2)
5783 pgai = ga_uniform_instruction_contraction_switch
5784 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
5785 else if (child1->tensor_proper_size() == s2)
5786 pgai = std::make_shared<ga_instruction_spec_contraction>
5787 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
5789 pgai = std::make_shared<ga_instruction_spec2_contraction>
5790 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
5795 if (pnode->test_function_type < 3) {
5798 pgai = ga_uniform_instruction_simple_tmult
5799 (pnode->tensor(), child0->tensor(), child1->tensor());
5801 pgai = std::make_shared<ga_instruction_simple_tmult>
5802 (pnode->tensor(), child0->tensor(), child1->tensor());
5804 if (child1->test_function_type == 0)
5805 pgai = std::make_shared<ga_instruction_matrix_mult>
5806 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
5808 pgai = std::make_shared<ga_instruction_matrix_mult_spec>
5809 (pnode->tensor(), child0->tensor(), child1->tensor(),
5810 s2, tps0/s2, tps1/s2);
5813 if (child0->tensor_proper_size() == 1) {
5814 if (child0->test_function_type == 0 ||
5815 child0->test_function_type == 1) {
5817 pgai = ga_uniform_instruction_simple_tmult
5818 (pnode->tensor(), child0->tensor(), child1->tensor());
5820 pgai = std::make_shared<ga_instruction_simple_tmult>
5821 (pnode->tensor(), child0->tensor(), child1->tensor());
5823 pgai = std::make_shared<ga_instruction_spec_tmult>
5824 (pnode->tensor(), child1->tensor(), child0->tensor(),
5827 if (child1->test_function_type == 0)
5828 pgai = std::make_shared<ga_instruction_matrix_mult>
5829 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
5830 else if (child1->test_function_type == 2)
5831 pgai = std::make_shared<ga_instruction_matrix_mult_spec>
5832 (pnode->tensor(), child0->tensor(), child1->tensor(),
5833 s2, tps0/s2, tps1/s2);
5835 pgai = std::make_shared<ga_instruction_matrix_mult_spec2>
5836 (pnode->tensor(), child0->tensor(), child1->tensor(),
5837 s2, tps0/s2, tps1/s2);
5841 rmi.instructions.push_back(std::move(pgai));
5846 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
5847 pgai = std::make_shared<ga_instruction_scalar_scalar_div>
5848 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
5849 }
else if (child1->tensor().size() == 1) {
5850 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5851 pgai = std::make_shared<ga_instruction_scalar_div>
5852 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
5853 }
else GMM_ASSERT1(
false,
"Internal error");
5854 rmi.instructions.push_back(std::move(pgai));
5858 pnode->t.set_to_copy(child0->t);
5859 pgai = std::make_shared<ga_instruction_print_tensor>
5860 (pnode->tensor(), child0, gis.ctx, gis.nbpt, gis.ipt);
5861 rmi.instructions.push_back(std::move(pgai));
5865 if (pnode->tensor_proper_size() > 1) {
5866 size_type n1 = child0->tensor_proper_size(0);
5867 size_type n2 = (child0->tensor_order() > 1) ?
5868 child0->tensor_proper_size(1) : 1;
5870 for (size_type i = 2; i < child0->tensor_order(); ++i)
5871 nn *= child0->tensor_proper_size(i);
5872 if (child0->nb_test_functions() == 0)
5873 pgai = std::make_shared<ga_instruction_transpose_no_test>
5874 (pnode->tensor(), child0->tensor(), n1, n2, nn);
5876 pgai = std::make_shared<ga_instruction_transpose>
5877 (pnode->tensor(), child0->tensor(), n1, n2, nn);
5878 rmi.instructions.push_back(std::move(pgai));
5880 pnode->t.set_to_copy(child0->t);
5885 if (pnode->tensor_proper_size() != 1) {
5886 pgai = std::make_shared<ga_instruction_sym>
5887 (pnode->tensor(), child0->tensor());
5888 rmi.instructions.push_back(std::move(pgai));
5890 pnode->t.set_to_copy(child0->t);
5896 pgai = std::make_shared<ga_instruction_skew>
5897 (pnode->tensor(), child0->tensor());
5898 rmi.instructions.push_back(std::move(pgai));
5904 size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
5906 pnode->t.set_to_copy(child0->t);
5908 pgai = std::make_shared<ga_instruction_trace>
5909 (pnode->tensor(), child0->tensor(), N);
5910 rmi.instructions.push_back(std::move(pgai));
5917 size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
5918 pgai = std::make_shared<ga_instruction_deviator>
5919 (pnode->tensor(), child0->tensor(), N);
5920 rmi.instructions.push_back(std::move(pgai));
5926 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
5927 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
5928 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
5929 }
else if (child0->tensor().size() == 1) {
5930 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
5931 pgai = std::make_shared<ga_instruction_scalar_mult>
5932 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
5934 else if (child1->tensor().size() == 1) {
5935 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5936 pgai = std::make_shared<ga_instruction_scalar_mult>
5937 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
5939 else if (child1->test_function_type == 0)
5940 pgai = std::make_shared<ga_instruction_dotmult>
5941 (pnode->tensor(), child0->tensor(), child1->tensor());
5942 else if (child0->test_function_type == 0)
5943 pgai = std::make_shared<ga_instruction_dotmult>
5944 (pnode->tensor(), child1->tensor(), child0->tensor());
5945 else if (child0->test_function_type == 1)
5946 pgai = std::make_shared<ga_instruction_dotmult_spec>
5947 (pnode->tensor(), child0->tensor(), child1->tensor());
5949 pgai = std::make_shared<ga_instruction_dotmult_spec>
5950 (pnode->tensor(), child1->tensor(), child0->tensor());
5952 rmi.instructions.push_back(std::move(pgai));
5957 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
5958 pgai = std::make_shared<ga_instruction_scalar_scalar_div>
5959 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
5960 }
else if (child1->tensor().size() == 1) {
5961 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5962 pgai = std::make_shared<ga_instruction_scalar_div>
5963 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
5964 }
else if (child1->test_function_type == 0) {
5965 pgai = std::make_shared<ga_instruction_dotdiv>
5966 (pnode->tensor(), child0->tensor(), child1->tensor());
5967 }
else GMM_ASSERT1(
false,
"Internal error");
5968 rmi.instructions.push_back(std::move(pgai));
5973 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
5974 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
5975 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
5976 }
else if (child0->tensor().size() == 1) {
5977 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
5978 pgai = std::make_shared<ga_instruction_scalar_mult>
5979 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
5981 else if (child1->tensor().size() == 1) {
5982 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
5983 pgai = std::make_shared<ga_instruction_scalar_mult>
5984 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
5986 else if (child1->test_function_type == 0) {
5988 pgai = ga_uniform_instruction_simple_tmult
5989 (pnode->tensor(), child0->tensor(), child1->tensor());
5991 pgai = std::make_shared<ga_instruction_simple_tmult>
5992 (pnode->tensor(), child0->tensor(), child1->tensor());
5993 }
else if (child1->tensor_proper_size() == 1)
5994 pgai = std::make_shared<ga_instruction_spec2_tmult>
5995 (pnode->tensor(), child0->tensor(), child1->tensor());
5997 pgai = std::make_shared<ga_instruction_spec_tmult>
5998 (pnode->tensor(), child0->tensor(), child1->tensor(),
5999 child0->tensor_proper_size(),
6000 child1->tensor_proper_size());
6002 rmi.instructions.push_back(std::move(pgai));
6005 default:GMM_ASSERT1(
false,
"Unexpected operation. Internal error.");
6009 case GA_NODE_C_MATRIX:
6011 if (pnode->test_function_type) {
6012 std::vector<const base_tensor *> components(pnode->children.size());
6013 for (size_type i = 0; i < pnode->children.size(); ++i)
6014 components[i] = &(pnode->children[i]->tensor());
6015 pgai = std::make_shared<ga_instruction_c_matrix_with_tests>
6016 (pnode->tensor(), components);
6018 std::vector<scalar_type *> components(pnode->children.size());
6019 for (size_type i = 0; i < pnode->children.size(); ++i)
6020 components[i] = &(pnode->children[i]->tensor()[0]);
6021 pgai = std::make_shared<ga_instruction_simple_c_matrix>
6022 (pnode->tensor(), components);
6024 rmi.instructions.push_back(std::move(pgai));
6028 case GA_NODE_PARAMS:
6029 if (child0->node_type == GA_NODE_RESHAPE) {
6030 pgai = std::make_shared<ga_instruction_copy_tensor>(pnode->tensor(),
6032 rmi.instructions.push_back(std::move(pgai));
6033 }
else if (child0->node_type == GA_NODE_IND_MOVE_LAST) {
6035 ind =
size_type(round(pnode->children[2]->tensor()[0])-1);
6037 for (size_type i = 0; i < child1->tensor_order(); ++i)
6038 if (i>ind) ii2 *= child1->tensor_proper_size(i);
6039 size_type nn = child1->tensor_proper_size(ind);
6040 pgai = std::make_shared<ga_instruction_index_move_last>
6041 (pnode->tensor(), child1->tensor(), nn, ii2);
6042 rmi.instructions.push_back(std::move(pgai));
6043 }
else if (child0->node_type == GA_NODE_SWAP_IND) {
6045 for (size_type i = 2; i < 4; ++i)
6046 ind[i] =
size_type(round(pnode->children[i]->tensor()[0])-1);
6047 if (ind[2] > ind[3]) std::swap(ind[2], ind[3]);
6048 size_type ii2 = 1, ii3 = 1;
6049 for (size_type i = 0; i < child1->tensor_order(); ++i) {
6050 if (i>ind[2] && i<ind[3]) ii2 *= child1->tensor_proper_size(i);
6051 if (i>ind[3]) ii3 *= child1->tensor_proper_size(i);
6053 size_type nn1 = child1->tensor_proper_size(ind[2]);
6054 size_type nn2 = child1->tensor_proper_size(ind[3]);
6056 pgai = std::make_shared<ga_instruction_swap_indices>
6057 (pnode->tensor(), child1->tensor(), nn1, nn2, ii2, ii3);
6058 rmi.instructions.push_back(std::move(pgai));
6059 }
else if (child0->node_type == GA_NODE_CONTRACT) {
6060 std::vector<size_type> ind(2), indsize(2);
6061 pga_tree_node child2(0);
6062 if (pnode->children.size() == 4)
6063 { ind[0] = 2; ind[1] = 3; }
6064 else if (pnode->children.size() == 5)
6065 { ind[0] = 2; ind[1] = 4; child2 = pnode->children[3]; }
6066 else if (pnode->children.size() == 7) {
6067 ind.resize(4); indsize.resize(4);
6068 ind[0] = 2; ind[1] = 3; ind[2] = 5; ind[3] = 6;
6069 child2 = pnode->children[4];
6071 size_type kk = 0, ll = 1;
6072 for (size_type i = 1; i < pnode->children.size(); ++i) {
6074 ind[kk] =
size_type(round(pnode->children[i]->tensor()[0])-1);
6075 indsize[kk] = pnode->children[ll]->tensor_proper_size(ind[kk]);
6080 if (pnode->children.size() == 4) {
6081 size_type i1 = ind[0], i2 = ind[1];
6082 if (i1 > i2) std::swap(i1, i2);
6083 size_type ii2 = 1, ii3 = 1;
6084 for (size_type i = 0; i < child1->tensor_order(); ++i) {
6085 if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
6086 if (i > i2) ii3 *= child1->tensor_proper_size(i);
6088 pgai = std::make_shared<ga_instruction_contract_1_1>
6089 (pnode->tensor(), child1->tensor(), indsize[0], ii2, ii3);
6091 else if (pnode->children.size() == 5) {
6093 size_type i1 = ind[0], i2 = ind[1];
6094 size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1;
6095 for (size_type i = 0; i < child1->tensor_order(); ++i) {
6096 if (i < i1) ii1 *= child1->tensor_proper_size(i);
6097 if (i > i1) ii2 *= child1->tensor_proper_size(i);
6099 for (size_type i = 0; i < child2->tensor_order(); ++i) {
6100 if (i < i2) ii3 *= child2->tensor_proper_size(i);
6101 if (i > i2) ii4 *= child2->tensor_proper_size(i);
6103 if (child1->test_function_type==1 && child2->test_function_type==2)
6104 pgai = std::make_shared<ga_instruction_contract_2_1_rev>
6105 (pnode->tensor(), child1->tensor(), child2->tensor(),
6106 indsize[0], ii1, ii2, ii3, ii4);
6108 pgai = std::make_shared<ga_instruction_contract_2_1>
6109 (pnode->tensor(), child1->tensor(), child2->tensor(),
6110 indsize[0], ii1, ii2, ii3, ii4);
6112 else if (pnode->children.size() == 7) {
6114 size_type i1 = ind[0], i2 = ind[1], i3 = ind[2], i4 = ind[3];
6115 size_type nn1 = indsize[0], nn2 = indsize[1];
6116 size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1, ii5 = 1, ii6 = 1;
6118 { std::swap(i1, i2); std::swap(i3, i4); std::swap(nn1, nn2); }
6119 for (size_type i = 0; i < child1->tensor_order(); ++i) {
6120 if (i < i1) ii1 *= child1->tensor_proper_size(i);
6121 if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
6122 if (i > i2) ii3 *= child1->tensor_proper_size(i);
6124 for (size_type i = 0; i < child2->tensor_order(); ++i) {
6125 if (i < i3 && i < i4) ii4 *= child2->tensor_proper_size(i);
6126 if ((i > i3 && i < i4) || (i > i4 && i < i3))
6127 ii5 *= child2->tensor_proper_size(i);
6128 if (i > i3 && i > i4) ii6 *= child2->tensor_proper_size(i);
6130 if (child1->test_function_type==1 && child2->test_function_type==2)
6131 pgai = std::make_shared<ga_instruction_contract_2_2_rev>
6132 (pnode->tensor(), child1->tensor(), child2->tensor(),
6133 nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
6135 pgai = std::make_shared<ga_instruction_contract_2_2>
6136 (pnode->tensor(), child1->tensor(), child2->tensor(),
6137 nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
6139 rmi.instructions.push_back(std::move(pgai));
6140 }
else if (child0->node_type == GA_NODE_PREDEF_FUNC) {
6142 std::string name = child0->name;
6143 const ga_predef_function_tab &PREDEF_FUNCTIONS
6145 ga_predef_function_tab::const_iterator it = PREDEF_FUNCTIONS.find(name);
6146 const ga_predef_function &F = it->second;
6147 size_type nbargs = F.nbargs();
6148 pga_tree_node child2 = (nbargs == 2) ? pnode->children[2] : child1;
6151 if (child1->tensor().size() == 1) {
6153 pgai = std::make_shared<ga_instruction_eval_func_1arg_1res>
6154 (pnode->tensor()[0], child1->tensor()[0], F.f1());
6156 pgai = std::make_shared<ga_instruction_eval_func_1arg_1res_expr>
6157 (pnode->tensor()[0], child1->tensor()[0], F);
6160 pgai = std::make_shared<ga_instruction_eval_func_1arg>
6161 (pnode->tensor(), child1->tensor(), F.f1());
6163 pgai = std::make_shared<ga_instruction_eval_func_1arg_expr>
6164 (pnode->tensor(), child1->tensor(), F);
6167 if (child1->tensor().size() == 1 && child2->tensor().size() == 1) {
6169 pgai = std::make_shared<ga_instruction_eval_func_2arg_1res>
6170 (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
6173 pgai = std::make_shared<ga_instruction_eval_func_2arg_1res_expr>
6174 (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
6176 }
else if (child1->tensor().size() == 1) {
6179 std::make_shared<ga_instruction_eval_func_2arg_first_scalar>
6180 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
6183 std::make_shared<ga_instruction_eval_func_2arg_first_scalar_expr>
6184 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
6185 }
else if (child2->tensor().size() == 1) {
6188 std::make_shared<ga_instruction_eval_func_2arg_second_scalar>
6189 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
6192 std::make_shared<ga_instruction_eval_func_2arg_second_scalar_expr>
6193 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
6196 pgai = std::make_shared<ga_instruction_eval_func_2arg>
6197 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
6199 pgai = std::make_shared<ga_instruction_eval_func_2arg_expr>
6200 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
6203 rmi.instructions.push_back(std::move(pgai));
6205 }
else if (child0->node_type == GA_NODE_SPEC_FUNC) {
6207 GMM_ASSERT1(
false,
"Internal error");
6209 }
else if (child0->node_type == GA_NODE_OPERATOR) {
6211 ga_predef_operator_tab &PREDEF_OPERATORS
6213 ga_predef_operator_tab::T::iterator it
6214 = PREDEF_OPERATORS.tab.find(child0->name);
6215 const ga_nonlinear_operator &OP = *(it->second);
6216 ga_nonlinear_operator::arg_list args;
6217 for (size_type i = 1; i < pnode->children.size(); ++i)
6218 args.push_back(&(pnode->children[i]->tensor()));
6220 if (child0->der1 && child0->der2 == 0) {
6221 pgai = std::make_shared<ga_instruction_eval_derivative_OP>
6222 (pnode->tensor(), OP, args, child0->der1);
6223 }
else if (child0->der1 && child0->der2) {
6224 pgai = std::make_shared<ga_instruction_eval_second_derivative_OP>
6225 (pnode->tensor(), OP, args, child0->der1, child0->der2);
6227 pgai = std::make_shared<ga_instruction_eval_OP>(pnode->tensor(),
6230 rmi.instructions.push_back(std::move(pgai));
6233 bgeot::multi_index mi1(size0.size()), indices;
6234 if (pnode->tensor().size() == 1) {
6235 for (size_type i = 0; i < child0->tensor_order(); ++i)
6236 mi1[i] =
size_type(round(pnode->children[i+1]->tensor()[0])-1);
6237 pgai = std::make_shared<ga_instruction_copy_scalar>
6238 (pnode->tensor()[0], child0->tensor()(mi1));
6240 size_type nb_test = pnode->nb_test_functions();
6241 for (size_type i = 0; i < nb_test; ++i) indices.push_back(i);
6242 for (size_type i = 0; i < child0->tensor_order(); ++i) {
6243 if (pnode->children[i+1]->node_type != GA_NODE_ALLINDICES)
6245 =
size_type(round(pnode->children[i+1]->tensor()[0])- 1);
6247 indices.push_back(i+nb_test);
6249 pgai = std::make_shared<ga_instruction_tensor_slice>
6250 (pnode->tensor(), child0->tensor(), mi1, indices);
6252 rmi.instructions.push_back(std::move(pgai));
6257 default:GMM_ASSERT1(
false,
"Unexpected node type " << pnode->node_type
6258 <<
" in compilation. Internal error.");
6260 if (tensor_to_clear) {
6263 pgai = std::make_shared<ga_instruction_clear_tensor>(pnode->tensor());
6264 rmi.elt_instructions.push_back(std::move(pgai));
6267 rmi.node_list[pnode->hash_value].push_back(pnode);
6270 void ga_compile_function(ga_workspace &workspace,
6271 ga_instruction_set &gis,
bool scalar) {
6272 for (size_type i = 0; i < workspace.nb_trees(); ++i) {
6273 const ga_workspace::tree_description &td = workspace.tree_info(i);
6275 gis.trees.push_back(*(td.ptree));
6276 pga_tree_node root = gis.trees.back().root;
6278 GMM_ASSERT1(!scalar || (root->tensor().size() == 1),
6279 "The result of the given expression is not a scalar");
6280 ga_instruction_set::region_mim rm(td.mim, td.rg);
6281 gis.whole_instructions[rm].m = td.m;
6282 ga_if_hierarchy if_hierarchy;
6283 ga_compile_node(root, workspace, gis,
6284 gis.whole_instructions[rm],*(td.m),
true,if_hierarchy);
6286 gis.coeff = scalar_type(1);
6287 pga_instruction pgai;
6288 workspace.assembled_tensor() = root->tensor();
6289 pgai = std::make_shared<ga_instruction_add_to_coeff>
6290 (workspace.assembled_tensor(), root->tensor(), gis.coeff);
6291 gis.whole_instructions[rm].instructions.push_back(std::move(pgai));
6296 static bool ga_node_used_interpolates
6297 (
const pga_tree_node pnode,
const ga_workspace &workspace,
6298 std::map<std::string, std::set<std::string> > &interpolates,
6299 std::set<std::string> &interpolates_der) {
6301 bool intrpl(pnode->node_type == GA_NODE_INTERPOLATE_VAL ||
6302 pnode->node_type == GA_NODE_INTERPOLATE_GRAD ||
6303 pnode->node_type == GA_NODE_INTERPOLATE_HESS ||
6304 pnode->node_type == GA_NODE_INTERPOLATE_DIVERG);
6305 bool intrpl_test(pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST ||
6306 pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST ||
6307 pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST ||
6308 pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST);
6310 if (intrpl || intrpl_test ||
6311 pnode->node_type == GA_NODE_INTERPOLATE_FILTER ||
6312 pnode->node_type == GA_NODE_INTERPOLATE_X ||
6313 pnode->node_type == GA_NODE_INTERPOLATE_NORMAL) {
6314 interpolates[pnode->interpolate_name].size();
6315 if (intrpl || intrpl_test) {
6316 if (workspace.variable_group_exists(pnode->name))
6317 interpolates[pnode->interpolate_name].insert(pnode->name);
6321 if (pnode->node_type == GA_NODE_INTERPOLATE_DERIVATIVE) {
6322 interpolates_der.insert(pnode->interpolate_name_der);
6323 interpolates[pnode->interpolate_name_der].size();
6324 if (workspace.variable_group_exists(pnode->name))
6325 interpolates[pnode->interpolate_name_der].insert(pnode->name);
6327 for (size_type i = 0; i < pnode->children.size(); ++i)
6328 found = ga_node_used_interpolates(pnode->children[i], workspace,
6329 interpolates, interpolates_der)
6335 static void ga_compile_interpolate_trans
6336 (
const pga_tree_node pnode,
const ga_workspace &workspace,
6337 ga_instruction_set &gis, ga_instruction_set::region_mim_instructions &rmi,
6340 std::set<std::string> interpolates_der;
6341 std::map<std::string, std::set<std::string> > transformations;
6342 ga_node_used_interpolates(pnode, workspace, transformations,
6345 for (
const auto &transformation : transformations) {
6346 const std::string &transname = transformation.first;
6347 bool compute_der = (interpolates_der.count(transname) != 0);
6348 if (rmi.transformations.count(transname) == 0 ||
6349 (compute_der && rmi.transformations_der.count(transname) == 0)) {
6350 rmi.transformations[transname].size();
6351 gis.transformations.insert(transname);
6352 if (compute_der) rmi.transformations_der.insert(transname);
6353 pga_instruction pgai;
6354 if (transname.compare(
"neighbour_elt") == 0) {
6355 pgai = std::make_shared<ga_instruction_neighbour_transformation_call>
6356 (workspace, rmi.interpolate_infos[transname],
6357 workspace.interpolate_transformation(transname), gis.ctx,
6358 gis.Normal, m, gis.ipt, gis.pai, gis.gp_pool,
6359 gis.neighbour_corresp);
6361 pgai = std::make_shared<ga_instruction_transformation_call>
6362 (workspace, rmi.interpolate_infos[transname],
6363 workspace.interpolate_transformation(transname), gis.ctx,
6364 gis.Normal, m, compute_der);
6366 if (pgai) rmi.instructions.push_back(std::move(pgai));
6369 for (
const std::string &nodename : transformation.second) {
6370 if (rmi.transformations[transname].count(nodename) == 0) {
6371 auto&& inin = rmi.interpolate_infos[transname];
6372 pga_instruction pgai =
6373 std::make_shared<ga_instruction_update_group_info>
6374 (workspace, gis, inin, nodename, inin.groups_info[nodename]);
6375 rmi.instructions.push_back(std::move(pgai));
6376 rmi.transformations[transname].insert(nodename);
6382 void ga_compile_interpolation(ga_workspace &workspace,
6383 ga_instruction_set &gis) {
6384 gis.transformations.clear();
6385 gis.whole_instructions.clear();
6386 for (size_type i = 0; i < workspace.nb_trees(); ++i) {
6387 const ga_workspace::tree_description &td = workspace.tree_info(i);
6388 if (td.interpolation > 0) {
6389 gis.trees.push_back(*(td.ptree));
6392 const mesh *m = td.m;
6393 GMM_ASSERT1(m,
"Internal error");
6394 ga_semantic_analysis(gis.trees.back(), workspace, *m,
6395 ref_elt_dim_of_mesh(*m),
true,
false);
6396 pga_tree_node root = gis.trees.back().root;
6399 ga_instruction_set::region_mim rm(td.mim, td.rg);
6400 ga_instruction_set::region_mim_instructions &rmi
6401 = gis.whole_instructions[rm];
6405 ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
6406 ga_compile_node(root, workspace, gis,rmi, *(td.m),
false,
6407 rmi.current_hierarchy);
6410 workspace.assembled_tensor() = root->tensor();
6411 pga_instruction pgai = std::make_shared<ga_instruction_add_to>
6412 (workspace.assembled_tensor(), root->tensor());
6413 rmi.instructions.push_back(std::move(pgai));
6419 void ga_compile(ga_workspace &workspace,
6420 ga_instruction_set &gis, size_type order) {
6421 gis.transformations.clear();
6422 gis.whole_instructions.clear();
6423 for (size_type version : std::array<size_type, 3>{1, 0, 2}) {
6424 for (size_type i = 0; i < workspace.nb_trees(); ++i) {
6425 ga_workspace::tree_description &td = workspace.tree_info(i);
6427 if ((version == td.interpolation) &&
6428 ((version == 0 && td.order == order) ||
6429 ((version > 0 && (td.order == size_type(-1) ||
6430 td.order == size_type(-2) - order))))) {
6431 ga_tree *added_tree = 0;
6432 if (td.interpolation) {
6433 gis.interpolation_trees.push_back(*(td.ptree));
6434 added_tree = &(gis.interpolation_trees.back());
6436 gis.trees.push_back(*(td.ptree));
6437 added_tree = &(gis.trees.back());
6441 ga_semantic_analysis(*added_tree, workspace,
6442 td.mim->linked_mesh(),
6443 ref_elt_dim_of_mesh(td.mim->linked_mesh()),
6445 pga_tree_node root = added_tree->root;
6450 ga_instruction_set::region_mim rm(td.mim, td.rg);
6451 ga_instruction_set::region_mim_instructions &rmi
6452 = gis.whole_instructions[rm];
6456 ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
6457 ga_compile_node(root, workspace, gis, rmi, *(td.m),
false,
6458 rmi.current_hierarchy);
6463 if(td.varname_interpolation.size() != 0) {
6465 = workspace.associated_im_data(td.varname_interpolation);
6466 auto &V =
const_cast<model_real_plain_vector &
> 6467 (workspace.value(td.varname_interpolation));
6468 GMM_ASSERT1(imd,
"Internal error");
6469 auto pgai = std::make_shared<ga_instruction_assignment>
6470 (root->tensor(), V, gis.ctx, imd);
6471 rmi.instructions.push_back(std::move(pgai));
6474 pga_instruction pgai;
6477 workspace.assembled_tensor() = root->tensor();
6478 pgai = std::make_shared<ga_instruction_add_to_coeff>
6479 (workspace.assembled_tensor(), root->tensor(), gis.coeff);
6483 GMM_ASSERT1(root->tensor_proper_size() == 1,
6484 "Invalid vector or tensor quantity. An order 1 " 6485 "weak form has to be a scalar quantity");
6486 const mesh_fem *mf=workspace.associated_mf(root->name_test1);
6487 const mesh_fem **mfg = 0;
6488 add_interval_to_gis(workspace, root->name_test1, gis);
6491 const std::string &intn1 = root->interpolate_name_test1;
6492 const gmm::sub_interval *Ir = 0, *In = 0;
6494 workspace.variable_group_exists(root->name_test1)) {
6495 ga_instruction_set::variable_group_info &vgi =
6496 rmi.interpolate_infos[intn1]
6497 .groups_info[root->name_test1];
6503 Ir = &(gis.var_intervals[root->name_test1]);
6504 In = &(workspace.interval_of_variable(root->name_test1));
6506 fem_interpolation_context &ctx
6507 = intn1.size() ? rmi.interpolate_infos[intn1].ctx
6510 = (!intn1.empty() && intn1.compare(
"neighbour_elt")!=0);
6511 pgai = std::make_shared<ga_instruction_fem_vector_assembly>
6512 (root->tensor(), workspace.unreduced_vector(),
6513 workspace.assembled_vector(), ctx, *Ir, *In, mf, mfg,
6514 gis.coeff, gis.nbpt, gis.ipt, interpolate);
6516 pgai = std::make_shared<ga_instruction_vector_assembly>
6517 (root->tensor(), workspace.assembled_vector(),
6518 workspace.interval_of_variable(root->name_test1),
6525 GMM_ASSERT1(root->tensor_proper_size() == 1,
6526 "Invalid vector or tensor quantity. An order 2 " 6527 "weak form has to be a scalar quantity");
6528 const mesh_fem *mf1=workspace.associated_mf(root->name_test1);
6529 const mesh_fem *mf2=workspace.associated_mf(root->name_test2);
6530 const mesh_fem **mfg1 = 0, **mfg2 = 0;
6531 const std::string &intn1 = root->interpolate_name_test1;
6532 const std::string &intn2 = root->interpolate_name_test2;
6533 fem_interpolation_context &ctx1
6534 = intn1.empty() ? gis.ctx
6535 : rmi.interpolate_infos[intn1].ctx;
6536 fem_interpolation_context &ctx2
6537 = intn2.empty() ? gis.ctx
6538 : rmi.interpolate_infos[intn2].ctx;
6540 = (!intn1.empty() && intn1.compare(
"neighbour_elt")!=0)
6541 || (!intn2.empty() && intn2.compare(
"neighbour_elt")!=0);
6543 add_interval_to_gis(workspace, root->name_test1, gis);
6544 add_interval_to_gis(workspace, root->name_test2, gis);
6546 const gmm::sub_interval *Ir1 = 0, *In1 = 0, *Ir2 = 0, *In2=0;
6547 const scalar_type *alpha1 = 0, *alpha2 = 0;
6549 if (!intn1.empty() &&
6550 workspace.variable_group_exists(root->name_test1)) {
6551 ga_instruction_set::variable_group_info &vgi =
6552 rmi.interpolate_infos[intn1]
6553 .groups_info[root->name_test1];
6558 alpha1 = &(vgi.alpha);
6560 alpha1 = &(workspace.factor_of_variable(root->name_test1));
6561 Ir1 = &(gis.var_intervals[root->name_test1]);
6562 In1 = &(workspace.interval_of_variable(root->name_test1));
6565 if (!intn2.empty() &&
6566 workspace.variable_group_exists(root->name_test2)) {
6567 ga_instruction_set::variable_group_info &vgi =
6568 rmi.interpolate_infos[intn2]
6569 .groups_info[root->name_test2];
6574 alpha2 = &(vgi.alpha);
6576 alpha2 = &(workspace.factor_of_variable(root->name_test2));
6577 Ir2 = &(gis.var_intervals[root->name_test2]);
6578 In2 = &(workspace.interval_of_variable(root->name_test2));
6581 if (!interpolate && mfg1 == 0 && mfg2 == 0 && mf1 && mf2
6582 && mf1->get_qdim() == 1 && mf2->get_qdim() == 1
6583 && !(mf1->is_reduced()) && !(mf2->is_reduced())) {
6584 pgai = std::make_shared
6585 <ga_instruction_matrix_assembly_standard_scalar<>>
6586 (root->tensor(), workspace.assembled_matrix(), ctx1, ctx2,
6587 *In1, *In2, mf1, mf2,
6588 gis.coeff, *alpha1, *alpha2, gis.nbpt, gis.ipt);
6589 }
else if (!interpolate && mfg1 == 0 && mfg2==0 && mf1 && mf2
6590 && !(mf1->is_reduced()) && !(mf2->is_reduced())) {
6591 if (root->sparsity() == 10 && root->t.qdim()==2)
6592 pgai = std::make_shared
6593 <ga_instruction_matrix_assembly_standard_vector_opt10_2>
6594 (root->tensor(), workspace.assembled_matrix(),ctx1,ctx2,
6595 *In1, *In2, mf1, mf2,
6596 gis.coeff, *alpha1, *alpha2, gis.nbpt, gis.ipt);
6597 else if (root->sparsity() == 10 && root->t.qdim()==3)
6598 pgai = std::make_shared
6599 <ga_instruction_matrix_assembly_standard_vector_opt10_3>
6600 (root->tensor(), workspace.assembled_matrix(),ctx1,ctx2,
6601 *In1, *In2, mf1, mf2,
6602 gis.coeff, *alpha1, *alpha2, gis.nbpt, gis.ipt);
6604 pgai = std::make_shared
6605 <ga_instruction_matrix_assembly_standard_vector<>>
6606 (root->tensor(), workspace.assembled_matrix(),ctx1,ctx2,
6607 *In1, *In2, mf1, mf2,
6608 gis.coeff, *alpha1, *alpha2, gis.nbpt, gis.ipt);
6611 pgai = std::make_shared<ga_instruction_matrix_assembly<>>
6612 (root->tensor(), workspace.unreduced_matrix(),
6613 workspace.assembled_matrix(), ctx1, ctx2,
6614 *Ir1, *In1, *Ir2, *In2, mf1, mfg1, mf2, mfg2,
6615 gis.coeff, *alpha1, *alpha2, gis.nbpt, gis.ipt,
6622 gis.whole_instructions[rm].instructions.push_back
6637 void ga_function_exec(ga_instruction_set &gis) {
6639 for (
auto &&instr : gis.whole_instructions) {
6640 ga_instruction_list &gil = instr.second.instructions;
6641 for (size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
6645 void ga_interpolation_exec(ga_instruction_set &gis,
6646 ga_workspace &workspace,
6647 ga_interpolation_context &gic) {
6649 base_small_vector un, up;
6651 for (
const std::string &t : gis.transformations)
6652 workspace.interpolate_transformation(t)->init(workspace);
6654 for (
auto &&instr : gis.whole_instructions) {
6657 const mesh_region ®ion = *(instr.first.region());
6659 GMM_ASSERT1(&m == &(gic.linked_mesh()),
6660 "Incompatibility of meshes in interpolation");
6661 ga_instruction_list &gilb = instr.second.begin_instructions;
6662 ga_instruction_list &gile = instr.second.elt_instructions;
6663 ga_instruction_list &gil = instr.second.instructions;
6666 std::vector<size_type> ind;
6667 auto pai_old = papprox_integration{};
6669 if (gic.use_mim()) {
6676 bgeot::pstored_point_tab pspt
6677 = gic.ppoints_for_element(v.cv(), v.f(), ind);
6679 if (pspt.get() && ind.size() && pspt->size()) {
6680 m.points_of_convex(v.cv(), G);
6682 up.resize(G.nrows());
6683 un.resize(pgt->dim());
6685 if (gis.ctx.have_pgp() && gis.ctx.pgt() == pgt && pai_old == gis.pai) {
6686 gis.ctx.change(gis.ctx.pgp(), 0, 0, G, v.cv(), v.f());
6688 if (!(gic.use_pgp(v.cv()))) {
6689 gis.ctx.change(pgt, 0, (*pspt)[0], G, v.cv(), v.f());
6691 gis.ctx.change(gis.gp_pool(pgt, pspt), 0, 0, G, v.cv(), v.f());
6696 if (gis.need_elt_size)
6700 gis.nbpt = pspt->size();
6701 for (size_type ii = 0; ii < ind.size(); ++ii) {
6703 if (gis.ctx.have_pgp()) gis.ctx.set_ii(ind[ii]);
6704 else gis.ctx.set_xref((*pspt)[gis.ipt]);
6706 if (ii == 0 || !(pgt->is_linear())) {
6709 const base_matrix& B = gis.ctx.B();
6710 gmm::copy(pgt->normals()[v.f()], un);
6711 gmm::mult(B, un, up);
6713 gmm::scale(up,1.0/nup);
6714 gmm::clean(up, 1e-13);
6716 }
else gis.Normal.resize(0);
6718 gmm::clear(workspace.assembled_tensor().as_vector());
6720 for (size_type j = 0; j < gilb.size(); ++j) j += gilb[j]->exec();
6721 for (size_type j = 0; j < gile.size(); ++j) j += gile[j]->exec();
6723 for (size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
6724 gic.store_result(v.cv(), ind[ii], workspace.assembled_tensor());
6729 for (
const std::string &t : gis.transformations)
6730 workspace.interpolate_transformation(t)->finalize();
6735 void ga_interpolation_single_point_exec
6736 (ga_instruction_set &gis, ga_workspace &workspace,
6737 const fem_interpolation_context &ctx_x,
const base_small_vector &Normal,
6738 const mesh &interp_mesh) {
6740 gis.Normal = Normal;
6741 gmm::clear(workspace.assembled_tensor().as_vector());
6746 for (
auto &&instr : gis.whole_instructions) {
6748 GMM_ASSERT1(&m == &interp_mesh,
6749 "Incompatibility of meshes in interpolation");
6750 ga_instruction_list &gilb = instr.second.begin_instructions;
6751 for (size_type j = 0; j < gilb.size(); ++j) j += gilb[j]->exec();
6752 ga_instruction_list &gile = instr.second.elt_instructions;
6753 for (size_type j = 0; j < gile.size(); ++j) j+=gile[j]->exec();
6754 ga_instruction_list &gil = instr.second.instructions;
6755 for (size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
6759 void ga_exec(ga_instruction_set &gis, ga_workspace &workspace) {
6761 base_small_vector un;
6764 for (
const std::string &t : gis.transformations)
6765 workspace.interpolate_transformation(t)->init(workspace);
6767 for (
const auto &instr : gis.whole_instructions) {
6770 GMM_ASSERT1(&m == &(mim.
linked_mesh()),
"Incompatibility of meshes");
6771 const ga_instruction_list &gilb = instr.second.begin_instructions;
6772 const ga_instruction_list &gile = instr.second.elt_instructions;
6773 const ga_instruction_list &gil = instr.second.instructions;
6785 const mesh_region ®ion = *(instr.first.region());
6790 pintegration_method pim = 0;
6791 papprox_integration pai = 0;
6792 bgeot::pstored_point_tab pspt = 0, old_pspt = 0;
6793 bgeot::pgeotrans_precomp pgp = 0;
6794 bool first_gp =
true;
6798 if (v.cv() != old_cv) {
6799 pgt = m.trans_of_convex(v.cv());
6801 m.points_of_convex(v.cv(), G);
6803 if (pim->type() == IM_NONE)
continue;
6804 GMM_ASSERT1(pim->type() == IM_APPROX,
"Sorry, exact methods cannot " 6805 "be used in high level generic assembly");
6806 pai = pim->approx_method();
6807 pspt = pai->pintegration_points();
6809 if (pgp && gis.pai == pai && pgt_old == pgt) {
6810 gis.ctx.change(pgp, 0, 0, G, v.cv(), v.f());
6812 if (pai->is_built_on_the_fly()) {
6813 gis.ctx.change(pgt, 0, (*pspt)[0], G, v.cv(), v.f());
6816 pgp = gis.gp_pool(pgt, pspt);
6817 gis.ctx.change(pgp, 0, 0, G, v.cv(), v.f());
6819 pgt_old = pgt; gis.pai = pai;
6821 if (gis.need_elt_size)
6826 if (pim->type() == IM_NONE)
continue;
6827 gis.ctx.set_face_num(v.f());
6829 if (pspt != old_pspt) { first_gp =
true; old_pspt = pspt; }
6832 gis.nbpt = pai->nb_points_on_convex();
6833 size_type first_ind = 0;
6835 gis.nbpt = pai->nb_points_on_face(v.f());
6836 first_ind = pai->ind_first_point_on_face(v.f());
6838 for (gis.ipt = 0; gis.ipt < gis.nbpt; ++(gis.ipt)) {
6840 if (pgp) gis.ctx.set_ii(first_ind+gis.ipt);
6841 else gis.ctx.set_xref((*pspt)[first_ind+gis.ipt]);
6842 if (gis.ipt == 0 || !(pgt->is_linear())) {
6846 gis.Normal.resize(G.nrows());
6847 un.resize(pgt->dim());
6848 gmm::copy(pgt->normals()[v.f()], un);
6849 gmm::mult(gis.ctx.B(), un, gis.Normal);
6852 gmm::scale(gis.Normal, 1.0/nup);
6853 gmm::clean(gis.Normal, 1e-13);
6854 }
else gis.Normal.resize(0);
6856 auto ipt_coeff = pai->coeff(first_ind+gis.ipt);
6857 gis.coeff = J * ipt_coeff;
6858 bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
6859 workspace.include_empty_int_points());
6860 if (!enable_ipt) gis.coeff = scalar_type(0);
6862 for (size_type j = 0; j < gilb.size(); ++j) j+=gilb[j]->exec();
6866 for (size_type j = 0; j < gile.size(); ++j) j+=gile[j]->exec();
6868 if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
6869 for (size_type j = 0; j < gil.size(); ++j) j+=gil[j]->exec();
6876 GA_DEBUG_INFO(
"-----------------------------");
6878 for (
const std::string &t : gis.transformations)
6879 workspace.interpolate_transformation(t)->finalize();
virtual pintegration_method int_method_of_element(size_type cv) const
return the integration method associated with an element (in no integration is associated, the function will crash! use the convex_index() of the mesh_im to check that a fem is associated to a given convex)
void slice_vector_on_basic_dof_of_element(const mesh_fem &mf, const VEC1 &vec, size_type cv, VEC2 &coeff, size_type qmult1=size_type(-1), size_type qmult2=size_type(-1))
Given a mesh_fem.
The object geotrans_precomp_pool Allow to allocate a certain number of geotrans_precomp and automatic...
Describe a mesh (collection of convexes (elements) and points).
does the inversion of the geometric transformation for a given convex
Describe an integration method linked to a mesh.
Semantic analysis of assembly trees and semantic manipulations.
static T & instance()
Instance from the current thread.
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norm2(const V &v)
Euclidean norm of a vector.
size_t size_type
used as the common size type in the library
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norminf(const V &v)
Infinity norm of a vector.
virtual scalar_type convex_radius_estimate(size_type ic) const
Return an estimate of the convex largest dimension.
a subclass of mesh_im which is conformal to a number of level sets.
bool invert(const base_node &n, base_node &n_ref, scalar_type IN_EPS=1e-12)
given the node on the real element, returns the node on the reference element (even if it is outside ...
"iterator" class for regions.
sparse vector built upon std::vector.
GEneric Tool for Finite Element Methods.
scalar_type APIDECL convex_radius_estimate(bgeot::pgeometric_trans pgt, const base_matrix &pts)
rough estimate of the radius of the convex using the largest eigenvalue of the jacobian of the geomet...
std::shared_ptr< const getfem::virtual_fem > pfem
type of pointer on a fem description
void clear(L &l)
clear (fill with zeros) a vector or matrix.
gmm::uint16_type short_type
used as the common short type integer in the library
base_small_vector compute_normal(const geotrans_interpolation_context &c, size_type face)
norm of returned vector is the ratio between the face surface on the real element and the face surfac...
const dal::bit_vector & convex_index(void) const
Get the set of convexes where an integration method has been assigned.
const mesh & linked_mesh() const
Give a reference to the linked mesh of type mesh.
Compilation and execution operations.
size_type alpha(short_type n, short_type d)
Return the value of which is the number of monomials of a polynomial of variables and degree ...
std::shared_ptr< const bgeot::geometric_trans > pgeometric_trans
pointer type for a geometric transformation