GetFEM++  5.3
getfem_omp.h
Go to the documentation of this file.
1 /* -*- c++ -*- (enables emacs c++ mode) */
2 /*===========================================================================
3 
4  Copyright (C) 2012-2017 Andriy Andreykiv
5 
6  This file is a part of GetFEM++
7 
8  GetFEM++ is free software; you can redistribute it and/or modify it
9  under the terms of the GNU Lesser General Public License as published
10  by the Free Software Foundation; either version 3 of the License, or
11  (at your option) any later version along with the GCC Runtime Library
12  Exception either version 3.1 or (at your option) any later version.
13  This program is distributed in the hope that it will be useful, but
14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16  License and GCC Runtime Library Exception for more details.
17  You should have received a copy of the GNU Lesser General Public License
18  along with this program; if not, write to the Free Software Foundation,
19  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
20 
21  As a special exception, you may use this file as it is a part of a free
22  software library without restriction. Specifically, if other files
23  instantiate templates or use macros or inline functions from this file,
24  or you compile this file and link it with other files to produce an
25  executable, this file does not by itself cause the resulting executable
26  to be covered by the GNU Lesser General Public License. This exception
27  does not however invalidate any other reasons why the executable file
28  might be covered by the GNU Lesser General Public License.
29 
30 ===========================================================================*/
31 
32 /**@file getfem_omp.h
33 @author Andriy Andreykiv <andriy.andreykiv@gmail.com>
34 @date May 14th, 2013.
35 @brief Tools for multithreaded, OpenMP and Boost based parallelization.
36 
37 This is the kernel of getfem.
38 */
39 #pragma once
40 #ifndef GETFEM_OMP
41 #define GETFEM_OMP
42 
43 #include <vector>
44 #include <algorithm>
45 
46 #ifdef _WIN32
47 #include <mbctype.h>
48 #endif
49 
50 #include <locale.h>
51 #include <memory>
52 #include "gmm/gmm_std.h"
53 #include "bgeot_config.h"
54 #ifdef GETFEM_HAVE_OPENMP
55 #include <omp.h>
56 #include <boost/thread.hpp>
57 #include <boost/shared_ptr.hpp>
58 #include <boost/make_shared.hpp>
59 #endif
60 
61 
62 namespace getfem
63 {
64  using bgeot::size_type;
65  class mesh;
66  class mesh_region;
67 
68 
69 #ifdef GETFEM_HAVE_OPENMP
70  //declaring a thread lock, to protect multi-threaded accesses to
71  //asserts, traces and warnings. Using a global mutex
72  class omp_guard: public boost::lock_guard<boost::recursive_mutex>
73  {
74  public:
75  omp_guard();
76  private:
77  static boost::recursive_mutex boost_mutex;
78  };
79 
80  //like boost::lock_guard, but copyable
81  class local_guard
82  {
83  public:
84  local_guard(boost::recursive_mutex&);
85  local_guard(const local_guard&);
86 
87  private:
88  boost::recursive_mutex& mutex_;
89  boost::shared_ptr<boost::lock_guard<boost::recursive_mutex>> plock_;
90  };
91 
92  //produces scoped lock on the
93  //mutex, held in this class
94  class lock_factory
95  {
96  public:
97  lock_factory();
98 
99  //get a lock object with RAII acuire/release semantics
100  //on the mutex from this factory
101  local_guard get_lock() const;
102  private:
103  mutable boost::recursive_mutex mutex_;
104  };
105 
106 
107 #else
108 
109  class omp_guard{};
110  class local_guard{};
111  struct lock_factory
112  {
113  inline local_guard get_lock() const {return local_guard();}
114  };
115 #endif
116 
117 
118 #ifdef GETFEM_HAVE_OPENMP
119  /**number of OpenMP threads*/
120  inline size_t num_threads(){return omp_get_max_threads();}
121 
122  /**set maximum number of OpenMP threads*/
123  inline void set_num_threads(int n){omp_set_num_threads(n);}
124 
125  /**index of the current thread*/
126  inline size_type this_thread() {return omp_get_thread_num();}
127  /**is the program running in the parallel section*/
128  inline bool me_is_multithreaded_now(){return static_cast<bool>(omp_in_parallel());}
129 
130 #else
131  inline size_type num_threads(){return size_type(1);}
132  inline void set_num_threads(int /* n */) { }
133  inline size_type this_thread() {return size_type(0);}
134  inline bool me_is_multithreaded_now(){return false;}
135 #endif
136 
137 
138 
139  /**use this template class for any object you want to
140  distribute to open_MP threads. The creation of this
141  object should happen in serial, while accessing the individual
142  thread local instances will take place in parallel. If
143  one needs creation of thread local object, use the macro
144  DEFINE_STATIC_THREAD_LOCAL
145  */
146  template <typename T> class omp_distribute {
147  std::vector<T> thread_values;
148  friend struct all_values_proxy;
149  struct all_values_proxy{
151  all_values_proxy(omp_distribute& d): distro(d){}
152  void operator=(const T& x){
153  for(typename std::vector<T>::iterator it=distro.thread_values.begin();
154  it!=distro.thread_values.end();it++) *it=x;
155  }
156  };
157  public:
158 
159  template <class... Args>
160  omp_distribute(Args&&... value){
161  for (size_type i = 0; i < num_threads(); ++i) thread_values.emplace_back(value...);
162  }
163  operator T& (){return thread_values[this_thread()];}
164  operator const T& () const {return thread_values[this_thread()];}
165  T& thrd_cast(){return thread_values[this_thread()];}
166  const T& thrd_cast() const {return thread_values[this_thread()];}
167  T& operator()(size_type i) {
168  return thread_values[i];
169  }
170  const T& operator()(size_type i) const {
171  return thread_values[i];
172  }
173  T& operator = (const T& x){
174  return (thread_values[this_thread()]=x);
175  }
176 
177  all_values_proxy all_threads(){return all_values_proxy(*this);}
178 
179  ~omp_distribute(){}
180  };
181 
182  template <typename T> class omp_distribute<std::vector<T> > {
183  std::vector<std::vector<T> > thread_values;
184  friend struct all_values_proxy;
185  struct all_values_proxy{
187  all_values_proxy(omp_distribute& d): distro(d){}
188  void operator=(const T& x){
189  for(typename std::vector<T>::iterator it=distro.thread_values.begin();
190  it!=distro.thread_values.end();it++) *it=x;
191  }
192  };
193 
194  public:
195  typedef std::vector<T> VEC;
196  omp_distribute() : thread_values(num_threads()) {}
197  omp_distribute(size_t n, const T& value) :
198  thread_values(num_threads(), std::vector<T>(n,value)){}
199  operator VEC& (){return thread_values[this_thread()];}
200  operator const VEC& () const
201  {return thread_values[this_thread()];}
202  VEC& operator()(size_type i) {return thread_values[i];}
203  const VEC& operator()(size_type i) const {return thread_values[i];}
204  VEC& thrd_cast(){return thread_values[this_thread()];}
205  const VEC& thrd_cast() const
206  {return thread_values[this_thread()];}
207  T& operator[](size_type i)
208  {return thread_values[this_thread()][i];}
209  const T& operator[](size_type i) const
210  {return thread_values[this_thread()][i];}
211  T& operator = (const T& value) {
212  return (thread_values[this_thread()]=value);
213  }
214  all_values_proxy all_threads(){return all_values_proxy(*this);}
215  ~omp_distribute(){}
216  };
217 
218  /**specialization for bool, to circumvent the shortcommings
219  of standards library specialization for std::vector<bool>*/
220  template <> class omp_distribute<bool> {
221  typedef int BOOL;
222  std::vector<BOOL> thread_values;
223  friend struct all_values_proxy;
224  struct all_values_proxy{
226  all_values_proxy(omp_distribute& d): distro(d){}
227  void operator=(const bool& x);
228  };
229 
230 
231  public:
232 
233  omp_distribute() : thread_values(num_threads()) {}
234  omp_distribute(const bool& value) :
235  thread_values(num_threads(),value) {}
236  operator BOOL& (){return thread_values[this_thread()];}
237  operator const BOOL& () const {return thread_values[this_thread()];}
238  BOOL& thrd_cast(){return thread_values[this_thread()];}
239  const BOOL& thrd_cast() const {return thread_values[this_thread()];}
240  BOOL& operator()(size_type i) {
241  return thread_values[i];
242  }
243  const BOOL& operator()(size_type i) const {
244  return thread_values[i];
245  }
246  BOOL& operator = (const BOOL& x){
247  return (thread_values[this_thread()]=x);
248  }
249  all_values_proxy all_threads(){return all_values_proxy(*this);}
250  ~omp_distribute(){}
251  private:
252 
253  };
254 
255  /* Use these macros only in function local context to achieve
256  the effect of thread local storage for any type of objects
257  and their initialization (it's more general and portable
258  then using __declspec(thread))*/
259 #ifdef GETFEM_HAVE_OPENMP
260 #define DEFINE_STATIC_THREAD_LOCAL_INITIALIZED(Type,Var,initial) \
261  static boost::thread_specific_ptr<Type> ptr_##Var; \
262  if(!ptr_##Var.get()) {ptr_##Var.reset(new Type(initial));} \
263  Type& Var=*ptr_##Var;
264 
265 #define DEFINE_STATIC_THREAD_LOCAL(Type,Var) \
266  static boost::thread_specific_ptr<Type> ptr_##Var; \
267  if(!ptr_##Var.get()) {ptr_##Var.reset(new Type());} \
268  Type& Var=*ptr_##Var;
269 
270 #define DEFINE_STATIC_THREAD_LOCAL_CONSTRUCTED(Type, Var, ...) \
271  static boost::thread_specific_ptr<Type> ptr_##Var; \
272  if(!ptr_##Var.get()) {ptr_##Var.reset(new Type(__VA_ARGS__));} \
273  Type& Var=*ptr_##Var;
274 
275 #else
276 #define DEFINE_STATIC_THREAD_LOCAL_INITIALIZED(Type,Var,initial) \
277  static Type Var(initial);
278 
279 #define DEFINE_STATIC_THREAD_LOCAL(Type,Var) \
280  static Type Var;
281 
282 #define DEFINE_STATIC_THREAD_LOCAL_CONSTRUCTED(Type, Var, ...) \
283  static Type Var(__VA_ARGS__);
284 
285 #endif
286 
287  class open_mp_is_running_properly{
288  static omp_distribute<bool> answer;
289 
290  public:
291  open_mp_is_running_properly();
292  ~open_mp_is_running_properly();
293  static bool is_it();
294  };
295 
296 #if defined _WIN32 && !defined (__GNUC__)
297  /**parallelization function for a for loop*/
298  template<class LOOP_BODY>
299  inline void open_mp_for(int begin, int end,
300  const LOOP_BODY& loop_body){
301  _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
302  gmm::standard_locale locale;
303  open_mp_is_running_properly check;
304 #pragma omp parallel default(shared)
305  {
306  _setmbcp(_MB_CP_ANSI);
307 #pragma omp for schedule(static)
308  for(int i=begin;i<end;i++) loop_body(i);
309  }
310  _configthreadlocale(_DISABLE_PER_THREAD_LOCALE);
311  }
312 #else /*LINUX*/
313  /**parallelization function for a for loop*/
314  template<class LOOP_BODY>
315  inline void open_mp_for(
316  int begin, int end, const LOOP_BODY& loop_body){
317  gmm::standard_locale locale;
318  open_mp_is_running_properly check;
319 #pragma omp parallel default(shared)
320  {
321 #pragma omp for schedule(static)
322  for(int i=begin;i<end;i++) loop_body(i);
323  }
324  }
325 #endif
326 
327  /**parallelization macro of a for loop*/
328 #define OPEN_MP_FOR(begin,end,loop_counter,loop_body) \
329  getfem::open_mp_for(begin,end,loop_body(loop_counter));
330 
331  /**used to partition a mesh region so that
332  each partition can be used on a different thread. Thread safe*/
334  mesh* pparent_mesh;
335  std::shared_ptr<mesh_region> original_region;
336  mutable std::vector<size_type> partitions;
337  public:
338  region_partition(mesh* mmesh=0,size_type id=-1);
340  void operator=(const region_partition& rp);
341  size_type thread_local_partition() const;
342  };
343 
344  /** Allows to re-throw exceptions, generated in OpemMP parallel section.
345  Collects exceptions from all threads and on destruction re-throws
346  the first one, so that
347  it can be again caught in the master thread. */
349  public:
351 
352  /**re-throws the first captured exception*/
353  ~thread_exception();
354 
355  /** Run function f in parallel part to capture it's exceptions.
356  Possible syntax can be:
357  thread_exception exception;
358  #pragma omp parallel...
359  {
360  exception.run([&]
361  {
362  your code that can throw exceptions
363  });
364  }
365  exception.rethrow();
366  */
367  template <typename Function, typename... Parameters>
368  void run(Function f, Parameters... params)
369  {
370 #ifdef GETFEM_HAVE_OPENMP
371  try { f(params...); } catch (...) { captureException(); }
372 #else
373  f(params...);
374 #endif
375  }
376 
377  /**vector of pointers to caught exceptions*/
378  std::vector<std::exception_ptr> caughtExceptions() const;
379  void rethrow();
380 
381  private:
382  void captureException();
383 
384  std::vector<std::exception_ptr> exceptions_;
385  };
386 
387 
388 }
389 
390 #endif //GETFEM_OMP
391 
multi-threaded distribution of a single vector or a matrix.
basic setup for gmm (includes, typedefs etc.)
use this template class for any object you want to distribute to open_MP threads. ...
Definition: getfem_omp.h:146
specialization for bool, to circumvent the shortcommings of standards library specialization for std:...
Definition: getfem_omp.h:220
Describe a mesh (collection of convexes (elements) and points).
Definition: getfem_mesh.h:95
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:49
this is the above solutions for linux, but it still needs to be tested.
Definition: gmm_std.h:238
GEneric Tool for Finite Element Methods.
void open_mp_for(int begin, int end, const LOOP_BODY &loop_body)
parallelization function for a for loop
Definition: getfem_omp.h:315
void run(Function f, Parameters...params)
Run function f in parallel part to capture it&#39;s exceptions.
Definition: getfem_omp.h:368
Allows to re-throw exceptions, generated in OpemMP parallel section.
Definition: getfem_omp.h:348
used to partition a mesh region so that each partition can be used on a different thread...
Definition: getfem_omp.h:333
defines and typedefs for namespace bgeot