recov_loongarch_simd.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX)
  4. *
  5. * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
  6. *
  7. * Originally based on recov_avx2.c and recov_ssse3.c:
  8. *
  9. * Copyright (C) 2012 Intel Corporation
  10. * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
  11. */
  12. #include <linux/raid/pq.h>
  13. #include "loongarch.h"
  14. /*
  15. * Unlike with the syndrome calculation algorithms, there's no boot-time
  16. * selection of recovery algorithms by benchmarking, so we have to specify
  17. * the priorities and hope the future cores will all have decent vector
  18. * support (i.e. no LASX slower than LSX, or even scalar code).
  19. */
  20. #ifdef CONFIG_CPU_HAS_LSX
  21. static int raid6_has_lsx(void)
  22. {
  23. return cpu_has_lsx;
  24. }
  25. static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila,
  26. int failb, void **ptrs)
  27. {
  28. u8 *p, *q, *dp, *dq;
  29. const u8 *pbmul; /* P multiplier table for B data */
  30. const u8 *qmul; /* Q multiplier table (for both) */
  31. p = (u8 *)ptrs[disks - 2];
  32. q = (u8 *)ptrs[disks - 1];
  33. /*
  34. * Compute syndrome with zero for the missing data pages
  35. * Use the dead data pages as temporary storage for
  36. * delta p and delta q
  37. */
  38. dp = (u8 *)ptrs[faila];
  39. ptrs[faila] = raid6_get_zero_page();
  40. ptrs[disks - 2] = dp;
  41. dq = (u8 *)ptrs[failb];
  42. ptrs[failb] = raid6_get_zero_page();
  43. ptrs[disks - 1] = dq;
  44. raid6_call.gen_syndrome(disks, bytes, ptrs);
  45. /* Restore pointer table */
  46. ptrs[faila] = dp;
  47. ptrs[failb] = dq;
  48. ptrs[disks - 2] = p;
  49. ptrs[disks - 1] = q;
  50. /* Now, pick the proper data tables */
  51. pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
  52. qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
  53. kernel_fpu_begin();
  54. /*
  55. * vr20, vr21: qmul
  56. * vr22, vr23: pbmul
  57. */
  58. asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
  59. asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
  60. asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
  61. asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
  62. while (bytes) {
  63. /* vr4 - vr7: Q */
  64. asm volatile("vld $vr4, %0" : : "m" (q[0]));
  65. asm volatile("vld $vr5, %0" : : "m" (q[16]));
  66. asm volatile("vld $vr6, %0" : : "m" (q[32]));
  67. asm volatile("vld $vr7, %0" : : "m" (q[48]));
  68. /* vr4 - vr7: Q + Qxy */
  69. asm volatile("vld $vr8, %0" : : "m" (dq[0]));
  70. asm volatile("vld $vr9, %0" : : "m" (dq[16]));
  71. asm volatile("vld $vr10, %0" : : "m" (dq[32]));
  72. asm volatile("vld $vr11, %0" : : "m" (dq[48]));
  73. asm volatile("vxor.v $vr4, $vr4, $vr8");
  74. asm volatile("vxor.v $vr5, $vr5, $vr9");
  75. asm volatile("vxor.v $vr6, $vr6, $vr10");
  76. asm volatile("vxor.v $vr7, $vr7, $vr11");
  77. /* vr0 - vr3: P */
  78. asm volatile("vld $vr0, %0" : : "m" (p[0]));
  79. asm volatile("vld $vr1, %0" : : "m" (p[16]));
  80. asm volatile("vld $vr2, %0" : : "m" (p[32]));
  81. asm volatile("vld $vr3, %0" : : "m" (p[48]));
  82. /* vr0 - vr3: P + Pxy */
  83. asm volatile("vld $vr8, %0" : : "m" (dp[0]));
  84. asm volatile("vld $vr9, %0" : : "m" (dp[16]));
  85. asm volatile("vld $vr10, %0" : : "m" (dp[32]));
  86. asm volatile("vld $vr11, %0" : : "m" (dp[48]));
  87. asm volatile("vxor.v $vr0, $vr0, $vr8");
  88. asm volatile("vxor.v $vr1, $vr1, $vr9");
  89. asm volatile("vxor.v $vr2, $vr2, $vr10");
  90. asm volatile("vxor.v $vr3, $vr3, $vr11");
  91. /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */
  92. asm volatile("vsrli.b $vr8, $vr4, 4");
  93. asm volatile("vsrli.b $vr9, $vr5, 4");
  94. asm volatile("vsrli.b $vr10, $vr6, 4");
  95. asm volatile("vsrli.b $vr11, $vr7, 4");
  96. /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */
  97. asm volatile("vandi.b $vr4, $vr4, 0x0f");
  98. asm volatile("vandi.b $vr5, $vr5, 0x0f");
  99. asm volatile("vandi.b $vr6, $vr6, 0x0f");
  100. asm volatile("vandi.b $vr7, $vr7, 0x0f");
  101. /* lookup from qmul[0] */
  102. asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4");
  103. asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5");
  104. asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6");
  105. asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7");
  106. /* lookup from qmul[16] */
  107. asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8");
  108. asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9");
  109. asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10");
  110. asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11");
  111. /* vr16 - vr19: B(Q + Qxy) */
  112. asm volatile("vxor.v $vr16, $vr8, $vr4");
  113. asm volatile("vxor.v $vr17, $vr9, $vr5");
  114. asm volatile("vxor.v $vr18, $vr10, $vr6");
  115. asm volatile("vxor.v $vr19, $vr11, $vr7");
  116. /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */
  117. asm volatile("vsrli.b $vr4, $vr0, 4");
  118. asm volatile("vsrli.b $vr5, $vr1, 4");
  119. asm volatile("vsrli.b $vr6, $vr2, 4");
  120. asm volatile("vsrli.b $vr7, $vr3, 4");
  121. /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */
  122. asm volatile("vandi.b $vr12, $vr0, 0x0f");
  123. asm volatile("vandi.b $vr13, $vr1, 0x0f");
  124. asm volatile("vandi.b $vr14, $vr2, 0x0f");
  125. asm volatile("vandi.b $vr15, $vr3, 0x0f");
  126. /* lookup from pbmul[0] */
  127. asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12");
  128. asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13");
  129. asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14");
  130. asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15");
  131. /* lookup from pbmul[16] */
  132. asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4");
  133. asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5");
  134. asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6");
  135. asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7");
  136. /* vr4 - vr7: A(P + Pxy) */
  137. asm volatile("vxor.v $vr4, $vr4, $vr12");
  138. asm volatile("vxor.v $vr5, $vr5, $vr13");
  139. asm volatile("vxor.v $vr6, $vr6, $vr14");
  140. asm volatile("vxor.v $vr7, $vr7, $vr15");
  141. /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */
  142. asm volatile("vxor.v $vr4, $vr4, $vr16");
  143. asm volatile("vxor.v $vr5, $vr5, $vr17");
  144. asm volatile("vxor.v $vr6, $vr6, $vr18");
  145. asm volatile("vxor.v $vr7, $vr7, $vr19");
  146. asm volatile("vst $vr4, %0" : "=m" (dq[0]));
  147. asm volatile("vst $vr5, %0" : "=m" (dq[16]));
  148. asm volatile("vst $vr6, %0" : "=m" (dq[32]));
  149. asm volatile("vst $vr7, %0" : "=m" (dq[48]));
  150. /* vr0 - vr3: P + Pxy + Dx = Dy */
  151. asm volatile("vxor.v $vr0, $vr0, $vr4");
  152. asm volatile("vxor.v $vr1, $vr1, $vr5");
  153. asm volatile("vxor.v $vr2, $vr2, $vr6");
  154. asm volatile("vxor.v $vr3, $vr3, $vr7");
  155. asm volatile("vst $vr0, %0" : "=m" (dp[0]));
  156. asm volatile("vst $vr1, %0" : "=m" (dp[16]));
  157. asm volatile("vst $vr2, %0" : "=m" (dp[32]));
  158. asm volatile("vst $vr3, %0" : "=m" (dp[48]));
  159. bytes -= 64;
  160. p += 64;
  161. q += 64;
  162. dp += 64;
  163. dq += 64;
  164. }
  165. kernel_fpu_end();
  166. }
  167. static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila,
  168. void **ptrs)
  169. {
  170. u8 *p, *q, *dq;
  171. const u8 *qmul; /* Q multiplier table */
  172. p = (u8 *)ptrs[disks - 2];
  173. q = (u8 *)ptrs[disks - 1];
  174. /*
  175. * Compute syndrome with zero for the missing data page
  176. * Use the dead data page as temporary storage for delta q
  177. */
  178. dq = (u8 *)ptrs[faila];
  179. ptrs[faila] = raid6_get_zero_page();
  180. ptrs[disks - 1] = dq;
  181. raid6_call.gen_syndrome(disks, bytes, ptrs);
  182. /* Restore pointer table */
  183. ptrs[faila] = dq;
  184. ptrs[disks - 1] = q;
  185. /* Now, pick the proper data tables */
  186. qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
  187. kernel_fpu_begin();
  188. /* vr22, vr23: qmul */
  189. asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
  190. asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
  191. while (bytes) {
  192. /* vr0 - vr3: P + Dx */
  193. asm volatile("vld $vr0, %0" : : "m" (p[0]));
  194. asm volatile("vld $vr1, %0" : : "m" (p[16]));
  195. asm volatile("vld $vr2, %0" : : "m" (p[32]));
  196. asm volatile("vld $vr3, %0" : : "m" (p[48]));
  197. /* vr4 - vr7: Qx */
  198. asm volatile("vld $vr4, %0" : : "m" (dq[0]));
  199. asm volatile("vld $vr5, %0" : : "m" (dq[16]));
  200. asm volatile("vld $vr6, %0" : : "m" (dq[32]));
  201. asm volatile("vld $vr7, %0" : : "m" (dq[48]));
  202. /* vr4 - vr7: Q + Qx */
  203. asm volatile("vld $vr8, %0" : : "m" (q[0]));
  204. asm volatile("vld $vr9, %0" : : "m" (q[16]));
  205. asm volatile("vld $vr10, %0" : : "m" (q[32]));
  206. asm volatile("vld $vr11, %0" : : "m" (q[48]));
  207. asm volatile("vxor.v $vr4, $vr4, $vr8");
  208. asm volatile("vxor.v $vr5, $vr5, $vr9");
  209. asm volatile("vxor.v $vr6, $vr6, $vr10");
  210. asm volatile("vxor.v $vr7, $vr7, $vr11");
  211. /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */
  212. asm volatile("vsrli.b $vr8, $vr4, 4");
  213. asm volatile("vsrli.b $vr9, $vr5, 4");
  214. asm volatile("vsrli.b $vr10, $vr6, 4");
  215. asm volatile("vsrli.b $vr11, $vr7, 4");
  216. /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */
  217. asm volatile("vandi.b $vr4, $vr4, 0x0f");
  218. asm volatile("vandi.b $vr5, $vr5, 0x0f");
  219. asm volatile("vandi.b $vr6, $vr6, 0x0f");
  220. asm volatile("vandi.b $vr7, $vr7, 0x0f");
  221. /* lookup from qmul[0] */
  222. asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4");
  223. asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5");
  224. asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6");
  225. asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7");
  226. /* lookup from qmul[16] */
  227. asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8");
  228. asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9");
  229. asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10");
  230. asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11");
  231. /* vr4 - vr7: qmul(Q + Qx) = Dx */
  232. asm volatile("vxor.v $vr4, $vr4, $vr8");
  233. asm volatile("vxor.v $vr5, $vr5, $vr9");
  234. asm volatile("vxor.v $vr6, $vr6, $vr10");
  235. asm volatile("vxor.v $vr7, $vr7, $vr11");
  236. asm volatile("vst $vr4, %0" : "=m" (dq[0]));
  237. asm volatile("vst $vr5, %0" : "=m" (dq[16]));
  238. asm volatile("vst $vr6, %0" : "=m" (dq[32]));
  239. asm volatile("vst $vr7, %0" : "=m" (dq[48]));
  240. /* vr0 - vr3: P + Dx + Dx = P */
  241. asm volatile("vxor.v $vr0, $vr0, $vr4");
  242. asm volatile("vxor.v $vr1, $vr1, $vr5");
  243. asm volatile("vxor.v $vr2, $vr2, $vr6");
  244. asm volatile("vxor.v $vr3, $vr3, $vr7");
  245. asm volatile("vst $vr0, %0" : "=m" (p[0]));
  246. asm volatile("vst $vr1, %0" : "=m" (p[16]));
  247. asm volatile("vst $vr2, %0" : "=m" (p[32]));
  248. asm volatile("vst $vr3, %0" : "=m" (p[48]));
  249. bytes -= 64;
  250. p += 64;
  251. q += 64;
  252. dq += 64;
  253. }
  254. kernel_fpu_end();
  255. }
  256. const struct raid6_recov_calls raid6_recov_lsx = {
  257. .data2 = raid6_2data_recov_lsx,
  258. .datap = raid6_datap_recov_lsx,
  259. .valid = raid6_has_lsx,
  260. .name = "lsx",
  261. .priority = 1,
  262. };
  263. #endif /* CONFIG_CPU_HAS_LSX */
  264. #ifdef CONFIG_CPU_HAS_LASX
  265. static int raid6_has_lasx(void)
  266. {
  267. return cpu_has_lasx;
  268. }
  269. static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila,
  270. int failb, void **ptrs)
  271. {
  272. u8 *p, *q, *dp, *dq;
  273. const u8 *pbmul; /* P multiplier table for B data */
  274. const u8 *qmul; /* Q multiplier table (for both) */
  275. p = (u8 *)ptrs[disks - 2];
  276. q = (u8 *)ptrs[disks - 1];
  277. /*
  278. * Compute syndrome with zero for the missing data pages
  279. * Use the dead data pages as temporary storage for
  280. * delta p and delta q
  281. */
  282. dp = (u8 *)ptrs[faila];
  283. ptrs[faila] = raid6_get_zero_page();
  284. ptrs[disks - 2] = dp;
  285. dq = (u8 *)ptrs[failb];
  286. ptrs[failb] = raid6_get_zero_page();
  287. ptrs[disks - 1] = dq;
  288. raid6_call.gen_syndrome(disks, bytes, ptrs);
  289. /* Restore pointer table */
  290. ptrs[faila] = dp;
  291. ptrs[failb] = dq;
  292. ptrs[disks - 2] = p;
  293. ptrs[disks - 1] = q;
  294. /* Now, pick the proper data tables */
  295. pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
  296. qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
  297. kernel_fpu_begin();
  298. /*
  299. * xr20, xr21: qmul
  300. * xr22, xr23: pbmul
  301. */
  302. asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
  303. asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
  304. asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
  305. asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
  306. asm volatile("xvreplve0.q $xr20, $xr20");
  307. asm volatile("xvreplve0.q $xr21, $xr21");
  308. asm volatile("xvreplve0.q $xr22, $xr22");
  309. asm volatile("xvreplve0.q $xr23, $xr23");
  310. while (bytes) {
  311. /* xr0, xr1: Q */
  312. asm volatile("xvld $xr0, %0" : : "m" (q[0]));
  313. asm volatile("xvld $xr1, %0" : : "m" (q[32]));
  314. /* xr0, xr1: Q + Qxy */
  315. asm volatile("xvld $xr4, %0" : : "m" (dq[0]));
  316. asm volatile("xvld $xr5, %0" : : "m" (dq[32]));
  317. asm volatile("xvxor.v $xr0, $xr0, $xr4");
  318. asm volatile("xvxor.v $xr1, $xr1, $xr5");
  319. /* xr2, xr3: P */
  320. asm volatile("xvld $xr2, %0" : : "m" (p[0]));
  321. asm volatile("xvld $xr3, %0" : : "m" (p[32]));
  322. /* xr2, xr3: P + Pxy */
  323. asm volatile("xvld $xr4, %0" : : "m" (dp[0]));
  324. asm volatile("xvld $xr5, %0" : : "m" (dp[32]));
  325. asm volatile("xvxor.v $xr2, $xr2, $xr4");
  326. asm volatile("xvxor.v $xr3, $xr3, $xr5");
  327. /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */
  328. asm volatile("xvsrli.b $xr4, $xr0, 4");
  329. asm volatile("xvsrli.b $xr5, $xr1, 4");
  330. /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */
  331. asm volatile("xvandi.b $xr0, $xr0, 0x0f");
  332. asm volatile("xvandi.b $xr1, $xr1, 0x0f");
  333. /* lookup from qmul[0] */
  334. asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0");
  335. asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1");
  336. /* lookup from qmul[16] */
  337. asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4");
  338. asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5");
  339. /* xr6, xr7: B(Q + Qxy) */
  340. asm volatile("xvxor.v $xr6, $xr4, $xr0");
  341. asm volatile("xvxor.v $xr7, $xr5, $xr1");
  342. /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */
  343. asm volatile("xvsrli.b $xr4, $xr2, 4");
  344. asm volatile("xvsrli.b $xr5, $xr3, 4");
  345. /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */
  346. asm volatile("xvandi.b $xr0, $xr2, 0x0f");
  347. asm volatile("xvandi.b $xr1, $xr3, 0x0f");
  348. /* lookup from pbmul[0] */
  349. asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0");
  350. asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1");
  351. /* lookup from pbmul[16] */
  352. asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
  353. asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
  354. /* xr0, xr1: A(P + Pxy) */
  355. asm volatile("xvxor.v $xr0, $xr0, $xr4");
  356. asm volatile("xvxor.v $xr1, $xr1, $xr5");
  357. /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */
  358. asm volatile("xvxor.v $xr0, $xr0, $xr6");
  359. asm volatile("xvxor.v $xr1, $xr1, $xr7");
  360. /* xr2, xr3: P + Pxy + Dx = Dy */
  361. asm volatile("xvxor.v $xr2, $xr2, $xr0");
  362. asm volatile("xvxor.v $xr3, $xr3, $xr1");
  363. asm volatile("xvst $xr0, %0" : "=m" (dq[0]));
  364. asm volatile("xvst $xr1, %0" : "=m" (dq[32]));
  365. asm volatile("xvst $xr2, %0" : "=m" (dp[0]));
  366. asm volatile("xvst $xr3, %0" : "=m" (dp[32]));
  367. bytes -= 64;
  368. p += 64;
  369. q += 64;
  370. dp += 64;
  371. dq += 64;
  372. }
  373. kernel_fpu_end();
  374. }
  375. static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila,
  376. void **ptrs)
  377. {
  378. u8 *p, *q, *dq;
  379. const u8 *qmul; /* Q multiplier table */
  380. p = (u8 *)ptrs[disks - 2];
  381. q = (u8 *)ptrs[disks - 1];
  382. /*
  383. * Compute syndrome with zero for the missing data page
  384. * Use the dead data page as temporary storage for delta q
  385. */
  386. dq = (u8 *)ptrs[faila];
  387. ptrs[faila] = raid6_get_zero_page();
  388. ptrs[disks - 1] = dq;
  389. raid6_call.gen_syndrome(disks, bytes, ptrs);
  390. /* Restore pointer table */
  391. ptrs[faila] = dq;
  392. ptrs[disks - 1] = q;
  393. /* Now, pick the proper data tables */
  394. qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
  395. kernel_fpu_begin();
  396. /* xr22, xr23: qmul */
  397. asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
  398. asm volatile("xvreplve0.q $xr22, $xr22");
  399. asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
  400. asm volatile("xvreplve0.q $xr23, $xr23");
  401. while (bytes) {
  402. /* xr0, xr1: P + Dx */
  403. asm volatile("xvld $xr0, %0" : : "m" (p[0]));
  404. asm volatile("xvld $xr1, %0" : : "m" (p[32]));
  405. /* xr2, xr3: Qx */
  406. asm volatile("xvld $xr2, %0" : : "m" (dq[0]));
  407. asm volatile("xvld $xr3, %0" : : "m" (dq[32]));
  408. /* xr2, xr3: Q + Qx */
  409. asm volatile("xvld $xr4, %0" : : "m" (q[0]));
  410. asm volatile("xvld $xr5, %0" : : "m" (q[32]));
  411. asm volatile("xvxor.v $xr2, $xr2, $xr4");
  412. asm volatile("xvxor.v $xr3, $xr3, $xr5");
  413. /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */
  414. asm volatile("xvsrli.b $xr4, $xr2, 4");
  415. asm volatile("xvsrli.b $xr5, $xr3, 4");
  416. /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */
  417. asm volatile("xvandi.b $xr2, $xr2, 0x0f");
  418. asm volatile("xvandi.b $xr3, $xr3, 0x0f");
  419. /* lookup from qmul[0] */
  420. asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2");
  421. asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3");
  422. /* lookup from qmul[16] */
  423. asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
  424. asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
  425. /* xr2, xr3: qmul(Q + Qx) = Dx */
  426. asm volatile("xvxor.v $xr2, $xr2, $xr4");
  427. asm volatile("xvxor.v $xr3, $xr3, $xr5");
  428. /* xr0, xr1: P + Dx + Dx = P */
  429. asm volatile("xvxor.v $xr0, $xr0, $xr2");
  430. asm volatile("xvxor.v $xr1, $xr1, $xr3");
  431. asm volatile("xvst $xr2, %0" : "=m" (dq[0]));
  432. asm volatile("xvst $xr3, %0" : "=m" (dq[32]));
  433. asm volatile("xvst $xr0, %0" : "=m" (p[0]));
  434. asm volatile("xvst $xr1, %0" : "=m" (p[32]));
  435. bytes -= 64;
  436. p += 64;
  437. q += 64;
  438. dq += 64;
  439. }
  440. kernel_fpu_end();
  441. }
  442. const struct raid6_recov_calls raid6_recov_lasx = {
  443. .data2 = raid6_2data_recov_lasx,
  444. .datap = raid6_datap_recov_lasx,
  445. .valid = raid6_has_lasx,
  446. .name = "lasx",
  447. .priority = 2,
  448. };
  449. #endif /* CONFIG_CPU_HAS_LASX */