rqspinlock.h 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _ASM_RQSPINLOCK_H
  3. #define _ASM_RQSPINLOCK_H
  4. #include <asm/barrier.h>
  5. /*
  6. * Hardcode res_smp_cond_load_acquire implementations for arm64 to a custom
  7. * version based on [0]. In rqspinlock code, our conditional expression involves
  8. * checking the value _and_ additionally a timeout. However, on arm64, the
  9. * WFE-based implementation may never spin again if no stores occur to the
  10. * locked byte in the lock word. As such, we may be stuck forever if
  11. * event-stream based unblocking is not available on the platform for WFE spin
  12. * loops (arch_timer_evtstrm_available).
  13. *
  14. * Once support for smp_cond_load_acquire_timewait [0] lands, we can drop this
  15. * copy-paste.
  16. *
  17. * While we rely on the implementation to amortize the cost of sampling
  18. * cond_expr for us, it will not happen when event stream support is
  19. * unavailable, time_expr check is amortized. This is not the common case, and
  20. * it would be difficult to fit our logic in the time_expr_ns >= time_limit_ns
  21. * comparison, hence just let it be. In case of event-stream, the loop is woken
  22. * up at microsecond granularity.
  23. *
  24. * [0]: https://lore.kernel.org/lkml/20250203214911.898276-1-ankur.a.arora@oracle.com
  25. */
  26. #ifndef smp_cond_load_acquire_timewait
  27. #define smp_cond_time_check_count 200
  28. #define __smp_cond_load_relaxed_spinwait(ptr, cond_expr, time_expr_ns, \
  29. time_limit_ns) ({ \
  30. typeof(ptr) __PTR = (ptr); \
  31. __unqual_scalar_typeof(*ptr) VAL; \
  32. unsigned int __count = 0; \
  33. for (;;) { \
  34. VAL = READ_ONCE(*__PTR); \
  35. if (cond_expr) \
  36. break; \
  37. cpu_relax(); \
  38. if (__count++ < smp_cond_time_check_count) \
  39. continue; \
  40. if ((time_expr_ns) >= (time_limit_ns)) \
  41. break; \
  42. __count = 0; \
  43. } \
  44. (typeof(*ptr))VAL; \
  45. })
  46. #define __smp_cond_load_acquire_timewait(ptr, cond_expr, \
  47. time_expr_ns, time_limit_ns) \
  48. ({ \
  49. typeof(ptr) __PTR = (ptr); \
  50. __unqual_scalar_typeof(*ptr) VAL; \
  51. for (;;) { \
  52. VAL = smp_load_acquire(__PTR); \
  53. if (cond_expr) \
  54. break; \
  55. __cmpwait_relaxed(__PTR, VAL); \
  56. if ((time_expr_ns) >= (time_limit_ns)) \
  57. break; \
  58. } \
  59. (typeof(*ptr))VAL; \
  60. })
  61. #define smp_cond_load_acquire_timewait(ptr, cond_expr, \
  62. time_expr_ns, time_limit_ns) \
  63. ({ \
  64. __unqual_scalar_typeof(*ptr) _val; \
  65. int __wfe = arch_timer_evtstrm_available(); \
  66. \
  67. if (likely(__wfe)) { \
  68. _val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \
  69. time_expr_ns, \
  70. time_limit_ns); \
  71. } else { \
  72. _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \
  73. time_expr_ns, \
  74. time_limit_ns); \
  75. smp_acquire__after_ctrl_dep(); \
  76. } \
  77. (typeof(*ptr))_val; \
  78. })
  79. #endif
  80. #define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)
  81. #include <asm-generic/rqspinlock.h>
  82. #endif /* _ASM_RQSPINLOCK_H */