OpenImageIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
thread.h
Go to the documentation of this file.
1 /*
2  Copyright 2008 Larry Gritz and the other authors and contributors.
3  All Rights Reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are
7  met:
8  * Redistributions of source code must retain the above copyright
9  notice, this list of conditions and the following disclaimer.
10  * Redistributions in binary form must reproduce the above copyright
11  notice, this list of conditions and the following disclaimer in the
12  documentation and/or other materials provided with the distribution.
13  * Neither the name of the software's owners nor the names of its
14  contributors may be used to endorse or promote products derived from
15  this software without specific prior written permission.
16  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28  (This is the Modified BSD License)
29 */
30 
31 
37 
38 
39 #ifndef OPENIMAGEIO_THREAD_H
40 #define OPENIMAGEIO_THREAD_H
41 
42 #include "version.h"
43 #include "sysutil.h"
44 
45 
46 // defining NOMINMAX to prevent problems with std::min/std::max
47 // and std::numeric_limits<type>::min()/std::numeric_limits<type>::max()
48 // when boost include windows.h
49 #ifdef _MSC_VER
50 # define WIN32_LEAN_AND_MEAN
51 # define VC_EXTRALEAN
52 # ifndef NOMINMAX
53 # define NOMINMAX
54 # endif
55 #endif
56 
57 #include <boost/version.hpp>
58 #if defined(__GNUC__) && (BOOST_VERSION == 104500)
59 // gcc reports errors inside some of the boost headers with boost 1.45
60 // See: https://svn.boost.org/trac/boost/ticket/4818
61 #pragma GCC diagnostic ignored "-Wunused-variable"
62 #endif
63 
64 #include <boost/thread.hpp>
65 #include <boost/thread/tss.hpp>
66 #include <boost/version.hpp>
67 
68 #if defined(__GNUC__) && (BOOST_VERSION == 104500)
69 // can't restore via push/pop in all versions of gcc (warning push/pop implemented for 4.6+ only)
70 #pragma GCC diagnostic error "-Wunused-variable"
71 #endif
72 
73 #ifndef USE_TBB
74 # define USE_TBB 0
75 #endif
76 
77 // Include files we need for atomic counters.
78 // Some day, we hope this is all replaced by use of std::atomic<>.
79 #if USE_TBB
80 # include <tbb/atomic.h>
81 # include <tbb/spin_mutex.h>
82 # define USE_TBB_ATOMIC 1
83 # define USE_TBB_SPINLOCK 1
84 #else
85 # define USE_TBB_ATOMIC 0
86 # define USE_TBB_SPINLOCK 0
87 #endif
88 
89 
90 #if defined(_MSC_VER) && !USE_TBB
91 # include <windows.h>
92 # include <winbase.h>
93 # pragma intrinsic (_InterlockedExchangeAdd)
94 # pragma intrinsic (_InterlockedCompareExchange)
95 # pragma intrinsic (_InterlockedCompareExchange64)
96 # pragma intrinsic (_ReadWriteBarrier)
97 # if defined(_WIN64)
98 # pragma intrinsic(_InterlockedExchangeAdd64)
99 # endif
100 // InterlockedExchangeAdd64 is not available for XP
101 # if defined(_WIN32_WINNT) && _WIN32_WINNT <= 0x0501
102 inline long long
103 InterlockedExchangeAdd64 (volatile long long *Addend, long long Value)
104 {
105  long long Old;
106  do {
107  Old = *Addend;
108  } while (_InterlockedCompareExchange64(Addend, Old + Value, Old) != Old);
109  return Old;
110 }
111 # endif
112 #endif
113 
114 #if defined(__GNUC__) && (defined(_GLIBCXX_ATOMIC_BUILTINS) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 401))
115 #if !defined(__FreeBSD__) && !defined(__powerpc__) || defined(__x86_64__)
116 #define USE_GCC_ATOMICS
117 #endif
118 #endif
119 
120 OIIO_NAMESPACE_ENTER
121 {
122 
125 class null_mutex {
126 public:
127  null_mutex () { }
128  ~null_mutex () { }
129  void lock () { }
130  void unlock () { }
131  void lock_shared () { }
132  void unlock_shared () { }
133 };
134 
137 template<typename T>
138 class null_lock {
139 public:
140  null_lock (T &m) { }
141 };
142 
143 
144 // Null thread-specific ptr that just wraps a single ordinary pointer
145 //
146 template<typename T>
148 public:
149  typedef void (*destructor_t)(T *);
150  null_thread_specific_ptr (destructor_t dest=NULL)
151  : m_ptr(NULL), m_dest(dest) { }
152  ~null_thread_specific_ptr () { reset (NULL); }
153  T * get () { return m_ptr; }
154  void reset (T *newptr=NULL) {
155  if (m_ptr) {
156  if (m_dest)
157  (*m_dest) (m_ptr);
158  else
159  delete m_ptr;
160  }
161  m_ptr = newptr;
162  }
163 private:
164  T *m_ptr;
165  destructor_t m_dest;
166 };
167 
168 
169 #ifdef NOTHREADS
170 
171 // Definitions that we use for debugging to turn off all mutexes, locks,
172 // and atomics in order to test the performance hit of our thread safety.
173 
174 // Null thread-specific ptr that just wraps a single ordinary pointer
175 //
176 template<typename T>
177 class thread_specific_ptr {
178 public:
179  typedef void (*destructor_t)(T *);
180  thread_specific_ptr (destructor_t dest=NULL)
181  : m_ptr(NULL), m_dest(dest) { }
182  ~thread_specific_ptr () { reset (NULL); }
183  T * get () { return m_ptr; }
184  void reset (T *newptr=NULL) {
185  if (m_ptr) {
186  if (m_dest)
187  (*m_dest) (m_ptr);
188  else
189  delete m_ptr;
190  }
191  m_ptr = newptr;
192  }
193 private:
194  T *m_ptr;
195  destructor_t m_dest;
196 };
197 
198 
199 typedef null_mutex mutex;
200 typedef null_mutex recursive_mutex;
201 typedef null_lock<mutex> lock_guard;
202 typedef null_lock<recursive_mutex> recursive_lock_guard;
203 
204 
205 #else
206 
207 // Fairly modern Boost has all the mutex and lock types we need.
208 
209 typedef boost::mutex mutex;
210 typedef boost::recursive_mutex recursive_mutex;
211 typedef boost::lock_guard< boost::mutex > lock_guard;
212 typedef boost::lock_guard< boost::recursive_mutex > recursive_lock_guard;
213 using boost::thread_specific_ptr;
214 
215 #endif
216 
217 
218 
221 inline int
222 atomic_exchange_and_add (volatile int *at, int x)
223 {
224 #ifdef USE_GCC_ATOMICS
225  return __sync_fetch_and_add ((int *)at, x);
226 #elif USE_TBB
227  atomic<int> *a = (atomic<int> *)at;
228  return a->fetch_and_add (x);
229 #elif defined(_MSC_VER)
230  // Windows
231  return _InterlockedExchangeAdd ((volatile LONG *)at, x);
232 #elif defined (__powerpc__)
233  long long r;
234  r = *at;
235  *at += x;
236  return r;
237 #else
238 # error No atomics on this platform.
239 #endif
240 }
241 
242 
243 
244 inline long long
245 atomic_exchange_and_add (volatile long long *at, long long x)
246 {
247 #ifdef USE_GCC_ATOMICS
248  return __sync_fetch_and_add (at, x);
249 #elif USE_TBB
251  return a->fetch_and_add (x);
252 #elif defined(_MSC_VER)
253  // Windows
254 # if defined(_WIN64)
255  return _InterlockedExchangeAdd64 ((volatile LONGLONG *)at, x);
256 # else
257  return InterlockedExchangeAdd64 ((volatile LONGLONG *)at, x);
258 # endif
259 #elif defined (__powerpc__)
260  long long r;
261  r = *at;
262  *at += x;
263  return r;
264 #else
265 # error No atomics on this platform.
266 #endif
267 }
268 
269 
270 
277 inline bool
278 atomic_compare_and_exchange (volatile int *at, int compareval, int newval)
279 {
280 #ifdef USE_GCC_ATOMICS
281  return __sync_bool_compare_and_swap (at, compareval, newval);
282 #elif USE_TBB
283  atomic<int> *a = (atomic<int> *)at;
284  return a->compare_and_swap (newval, compareval) == newval;
285 #elif defined(_MSC_VER)
286  return (_InterlockedCompareExchange ((volatile LONG *)at, newval, compareval) == compareval);
287 #elif defined(__powerpc__)
288  return ((*at == compareval) ? (*at = newval), 1 : 0);
289 #else
290 # error No atomics on this platform.
291 #endif
292 }
293 
294 
295 
296 inline bool
297 atomic_compare_and_exchange (volatile long long *at, long long compareval, long long newval)
298 {
299 #ifdef USE_GCC_ATOMICS
300  return __sync_bool_compare_and_swap (at, compareval, newval);
301 #elif USE_TBB
303  return a->compare_and_swap (newval, compareval) == newval;
304 #elif defined(_MSC_VER)
305  return (_InterlockedCompareExchange64 ((volatile LONGLONG *)at, newval, compareval) == compareval);
306 #elif defined(__PPC__)
307  return ((*at == compareval) ? (*at = newval), 1 : 0);
308 #else
309 # error No atomics on this platform.
310 #endif
311 }
312 
313 
314 
317 inline void
319 {
320 #if defined(__GNUC__)
321  sched_yield ();
322 #elif defined(_MSC_VER)
323  SwitchToThread ();
324 #else
325 # error No yield on this platform.
326 #endif
327 }
328 
329 
330 
331 // Slight pause
332 inline void
333 pause (int delay)
334 {
335 #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
336  for (int i = 0; i < delay; ++i)
337  __asm__ __volatile__("pause;");
338 
339 #elif defined(__GNUC__) && (defined(__arm__) || defined(__s390__))
340  for (int i = 0; i < delay; ++i)
341  __asm__ __volatile__("NOP;");
342 
343 #elif USE_TBB
344  __TBB_Pause(delay);
345 
346 #elif defined(_MSC_VER)
347  for (int i = 0; i < delay; ++i) {
348 #if defined (_WIN64)
349  YieldProcessor();
350 #else
351  _asm pause
352 #endif /* _WIN64 */
353  }
354 
355 #else
356  // No pause on this platform, just punt
357  for (int i = 0; i < delay; ++i) ;
358 #endif
359 }
360 
361 
362 
363 // Helper class to deliver ever longer pauses until we yield our timeslice.
365 public:
366  atomic_backoff () : m_count(1) { }
367 
368  void operator() () {
369  if (m_count <= 16) {
370  pause (m_count);
371  m_count *= 2;
372  } else {
373  yield();
374  }
375  }
376 
377 private:
378  int m_count;
379 };
380 
381 
382 
383 #if USE_TBB_ATOMIC
384 using tbb::atomic;
385 #else
386 // If we're not using TBB's atomic, we need to define our own atomic<>.
387 
388 
391 template<class T>
392 class atomic {
393 public:
396  atomic (T val=0) : m_val(val) { }
397 
398  ~atomic () { }
399 
402  T operator() () const { return atomic_exchange_and_add (&m_val, 0); }
403 
406  operator T() const { return atomic_exchange_and_add (&m_val, 0); }
407 
410  T fast_value () const { return m_val; }
411 
414  T operator= (T x) {
415  //incorrect? return (m_val = x);
416  while (1) {
417  T result = m_val;
418  if (atomic_compare_and_exchange (&m_val, result, x))
419  break;
420  }
421  return x;
422  }
423 
426  T operator++ () { return atomic_exchange_and_add (&m_val, 1) + 1; }
427 
430  T operator++ (int) { return atomic_exchange_and_add (&m_val, 1); }
431 
434  T operator-- () { return atomic_exchange_and_add (&m_val, -1) - 1; }
435 
438  T operator-- (int) { return atomic_exchange_and_add (&m_val, -1); }
439 
442  T operator+= (T x) { return atomic_exchange_and_add (&m_val, x) + x; }
443 
446  T operator-= (T x) { return atomic_exchange_and_add (&m_val, -x) - x; }
447 
448  bool bool_compare_and_swap (T compareval, T newval) {
449  return atomic_compare_and_exchange (&m_val, compareval, newval);
450  }
451 
452  T operator= (const atomic &x) {
453  T r = x();
454  *this = r;
455  return r;
456  }
457 
458 private:
459 #ifdef __arm__
460  OIIO_ALIGN(8)
461 #endif
462  volatile mutable T m_val;
463 
464  // Disallow copy construction by making private and unimplemented.
465  atomic (atomic const &);
466 #if defined __arm__
467 } __attribute__((aligned(8)));
468 #else
469 };
470 #endif
471 
472 
473 
474 #endif /* ! USE_TBB_ATOMIC */
475 
476 
477 #ifdef NOTHREADS
478 
479 typedef int atomic_int;
480 typedef long long atomic_ll;
481 
482 #else
483 
484 typedef atomic<int> atomic_int;
485 typedef atomic<long long> atomic_ll;
486 
487 #endif
488 
489 
490 
491 #ifdef NOTHREADS
492 
493 typedef null_mutex spin_mutex;
495 
496 #elif USE_TBB_SPINLOCK
497 
498 // Use TBB's spin locks
499 typedef tbb::spin_mutex spin_mutex;
500 typedef tbb::spin_mutex::scoped_lock spin_lock;
501 
502 
503 #else
504 
505 // Define our own spin locks. Do we trust them?
506 
507 
528 class spin_mutex {
529 public:
532  spin_mutex (void) { m_locked = 0; }
533 
534  ~spin_mutex (void) { }
535 
538  spin_mutex (const spin_mutex &) { m_locked = 0; }
539 
542  const spin_mutex& operator= (const spin_mutex&) { return *this; }
543 
546  void lock () {
547  // To avoid spinning too tightly, we use the atomic_backoff to
548  // provide increasingly longer pauses, and if the lock is under
549  // lots of contention, eventually yield the timeslice.
550  atomic_backoff backoff;
551 
552  // Try to get ownership of the lock. Though experimentation, we
553  // found that OIIO_UNLIKELY makes this just a bit faster on
554  // gcc x86/x86_64 systems.
555  while (! OIIO_UNLIKELY(try_lock())) {
556  do {
557  backoff();
558  } while (m_locked);
559 
560  // The full try_lock() involves a compare_and_swap, which
561  // writes memory, and that will lock the bus. But a normal
562  // read of m_locked will let us spin until the value
563  // changes, without locking the bus. So it's faster to
564  // check in this manner until the mutex appears to be free.
565  }
566  }
567 
570  void unlock () {
571 #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
572  // Fastest way to do it is with a store with "release" semantics
573  __asm__ __volatile__("": : :"memory");
574  m_locked = 0;
575  // N.B. GCC gives us an intrinsic that is even better, an atomic
576  // assignment of 0 with "release" barrier semantics:
577  // __sync_lock_release (&m_locked);
578  // But empirically we found it not as performant as the above.
579 #elif defined(_MSC_VER)
580  _ReadWriteBarrier();
581  m_locked = 0;
582 #else
583  // Otherwise, just assign zero to the atomic (but that's a full
584  // memory barrier).
585  *(atomic_int *)&m_locked = 0;
586 #endif
587  }
588 
591  bool try_lock () {
592 #if USE_TBB_ATOMIC
593  // TBB's compare_and_swap returns the original value
594  return (*(atomic_int *)&m_locked).compare_and_swap (0, 1) == 0;
595 #elif defined(__GNUC__)
596  // GCC gives us an intrinsic that is even better -- an atomic
597  // exchange with "acquire" barrier semantics.
598  return __sync_lock_test_and_set (&m_locked, 1) == 0;
599 #else
600  // Our compare_and_swap returns true if it swapped
601  return atomic_compare_and_exchange (&m_locked, 0, 1);
602 #endif
603  }
604 
607  class lock_guard {
608  public:
609  lock_guard (spin_mutex &fm) : m_fm(fm) { m_fm.lock(); }
610  ~lock_guard () { m_fm.unlock(); }
611  private:
612  lock_guard(); // Do not implement (even though TBB does)
613  lock_guard(const lock_guard& other); // Do not implement
614  lock_guard& operator = (const lock_guard& other); // Do not implement
615  spin_mutex & m_fm;
616  };
617 
618 private:
619  volatile int m_locked;
620 };
621 
622 
623 typedef spin_mutex::lock_guard spin_lock;
624 
625 #endif
626 
627 
628 
635 public:
638  spin_rw_mutex (void) { m_readers = 0; }
639 
640  ~spin_rw_mutex (void) { }
641 
644  spin_rw_mutex (const spin_rw_mutex &) { m_readers = 0; }
645 
648  const spin_rw_mutex& operator= (const spin_rw_mutex&) { return *this; }
649 
652  void read_lock () {
653  // Spin until there are no writers active
654  m_locked.lock();
655  // Register ourself as a reader
656  ++m_readers;
657  // Release the lock, to let other readers work
658  m_locked.unlock();
659  }
660 
663  void read_unlock () {
664  --m_readers; // it's atomic, no need to lock to release
665  }
666 
669  void write_lock () {
670  // Make sure no new readers (or writers) can start
671  m_locked.lock();
672  // Spin until the last reader is done, at which point we will be
673  // the sole owners and nobody else (reader or writer) can acquire
674  // the resource until we release it.
675  while (*(volatile int *)&m_readers > 0)
676  ;
677  }
678 
681  void write_unlock () {
682  // Let other readers or writers get the lock
683  m_locked.unlock ();
684  }
685 
689  public:
690  read_lock_guard (spin_rw_mutex &fm) : m_fm(fm) { m_fm.read_lock(); }
691  ~read_lock_guard () { m_fm.read_unlock(); }
692  private:
693  read_lock_guard(); // Do not implement
694  read_lock_guard(const read_lock_guard& other); // Do not implement
695  read_lock_guard& operator = (const read_lock_guard& other); // Do not implement
696  spin_rw_mutex & m_fm;
697  };
698 
702  public:
703  write_lock_guard (spin_rw_mutex &fm) : m_fm(fm) { m_fm.write_lock(); }
704  ~write_lock_guard () { m_fm.write_unlock(); }
705  private:
706  write_lock_guard(); // Do not implement
707  write_lock_guard(const write_lock_guard& other); // Do not implement
708  write_lock_guard& operator = (const write_lock_guard& other); // Do not implement
709  spin_rw_mutex & m_fm;
710  };
711 
712 private:
713  OIIO_CACHE_ALIGN
714  spin_mutex m_locked; // write lock
715  char pad1_[OIIO_CACHE_LINE_SIZE-sizeof(spin_mutex)];
716  OIIO_CACHE_ALIGN
717  atomic_int m_readers; // number of readers
718  char pad2_[OIIO_CACHE_LINE_SIZE-sizeof(atomic_int)];
719 };
720 
721 
724 
725 
726 }
727 OIIO_NAMESPACE_EXIT
728 
729 #endif // OPENIMAGEIO_THREAD_H