i915_memcpy.c 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. /*
  2. * Copyright © 2016 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. *
  23. */
  24. #include <linux/kernel.h>
  25. #include <linux/string.h>
  26. #include <linux/cpufeature.h>
  27. #include <linux/bug.h>
  28. #include <linux/build_bug.h>
  29. #include <asm/fpu/api.h>
  30. #include "i915_memcpy.h"
  31. #if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
  32. #define CI_BUG_ON(expr) BUG_ON(expr)
  33. #else
  34. #define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
  35. #endif
  36. static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
  37. static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
  38. {
  39. kernel_fpu_begin();
  40. while (len >= 4) {
  41. asm("movntdqa (%0), %%xmm0\n"
  42. "movntdqa 16(%0), %%xmm1\n"
  43. "movntdqa 32(%0), %%xmm2\n"
  44. "movntdqa 48(%0), %%xmm3\n"
  45. "movaps %%xmm0, (%1)\n"
  46. "movaps %%xmm1, 16(%1)\n"
  47. "movaps %%xmm2, 32(%1)\n"
  48. "movaps %%xmm3, 48(%1)\n"
  49. :: "r" (src), "r" (dst) : "memory");
  50. src += 64;
  51. dst += 64;
  52. len -= 4;
  53. }
  54. while (len--) {
  55. asm("movntdqa (%0), %%xmm0\n"
  56. "movaps %%xmm0, (%1)\n"
  57. :: "r" (src), "r" (dst) : "memory");
  58. src += 16;
  59. dst += 16;
  60. }
  61. kernel_fpu_end();
  62. }
  63. static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len)
  64. {
  65. kernel_fpu_begin();
  66. while (len >= 4) {
  67. asm("movntdqa (%0), %%xmm0\n"
  68. "movntdqa 16(%0), %%xmm1\n"
  69. "movntdqa 32(%0), %%xmm2\n"
  70. "movntdqa 48(%0), %%xmm3\n"
  71. "movups %%xmm0, (%1)\n"
  72. "movups %%xmm1, 16(%1)\n"
  73. "movups %%xmm2, 32(%1)\n"
  74. "movups %%xmm3, 48(%1)\n"
  75. :: "r" (src), "r" (dst) : "memory");
  76. src += 64;
  77. dst += 64;
  78. len -= 4;
  79. }
  80. while (len--) {
  81. asm("movntdqa (%0), %%xmm0\n"
  82. "movups %%xmm0, (%1)\n"
  83. :: "r" (src), "r" (dst) : "memory");
  84. src += 16;
  85. dst += 16;
  86. }
  87. kernel_fpu_end();
  88. }
  89. /**
  90. * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC
  91. * @dst: destination pointer
  92. * @src: source pointer
  93. * @len: how many bytes to copy
  94. *
  95. * i915_memcpy_from_wc copies @len bytes from @src to @dst using
  96. * non-temporal instructions where available. Note that all arguments
  97. * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
  98. * of 16.
  99. *
  100. * To test whether accelerated reads from WC are supported, use
  101. * i915_memcpy_from_wc(NULL, NULL, 0);
  102. *
  103. * Returns true if the copy was successful, false if the preconditions
  104. * are not met.
  105. */
  106. bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
  107. {
  108. if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
  109. return false;
  110. if (static_branch_likely(&has_movntdqa)) {
  111. if (likely(len))
  112. __memcpy_ntdqa(dst, src, len >> 4);
  113. return true;
  114. }
  115. return false;
  116. }
  117. /**
  118. * i915_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC
  119. * @dst: destination pointer
  120. * @src: source pointer
  121. * @len: how many bytes to copy
  122. *
  123. * Like i915_memcpy_from_wc(), the unaligned variant copies @len bytes from
  124. * @src to @dst using * non-temporal instructions where available, but
  125. * accepts that its arguments may not be aligned, but are valid for the
  126. * potential 16-byte read past the end.
  127. */
  128. void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len)
  129. {
  130. unsigned long addr;
  131. CI_BUG_ON(!i915_has_memcpy_from_wc());
  132. addr = (unsigned long)src;
  133. if (!IS_ALIGNED(addr, 16)) {
  134. unsigned long x = min(ALIGN(addr, 16) - addr, len);
  135. memcpy(dst, src, x);
  136. len -= x;
  137. dst += x;
  138. src += x;
  139. }
  140. if (likely(len))
  141. __memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16));
  142. }
  143. void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
  144. {
  145. /*
  146. * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions
  147. * emulation. So don't enable movntdqa in hypervisor guest.
  148. */
  149. if (static_cpu_has(X86_FEATURE_XMM4_1) &&
  150. !boot_cpu_has(X86_FEATURE_HYPERVISOR))
  151. static_branch_enable(&has_movntdqa);
  152. }