div64.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
  4. *
  5. * Based on former do_div() implementation from asm-parisc/div64.h:
  6. * Copyright (C) 1999 Hewlett-Packard Co
  7. * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
  8. *
  9. *
  10. * Generic C version of 64bit/32bit division and modulo, with
  11. * 64bit result and 32bit remainder.
  12. *
  13. * The fast case for (n>>32 == 0) is handled inline by do_div().
  14. *
  15. * Code generated for this function might be very inefficient
  16. * for some CPUs. __div64_32() can be overridden by linking arch-specific
  17. * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S
  18. * or by defining a preprocessor macro in arch/include/asm/div64.h.
  19. */
  20. #include <linux/bitops.h>
  21. #include <linux/export.h>
  22. #include <linux/math.h>
  23. #include <linux/math64.h>
  24. #include <linux/minmax.h>
  25. #include <linux/log2.h>
  26. /* Not needed on 64bit architectures */
  27. #if BITS_PER_LONG == 32
  28. #ifndef __div64_32
  29. uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
  30. {
  31. uint64_t rem = *n;
  32. uint64_t b = base;
  33. uint64_t res, d = 1;
  34. uint32_t high = rem >> 32;
  35. /* Reduce the thing a bit first */
  36. res = 0;
  37. if (high >= base) {
  38. high /= base;
  39. res = (uint64_t) high << 32;
  40. rem -= (uint64_t) (high*base) << 32;
  41. }
  42. while ((int64_t)b > 0 && b < rem) {
  43. b = b+b;
  44. d = d+d;
  45. }
  46. do {
  47. if (rem >= b) {
  48. rem -= b;
  49. res += d;
  50. }
  51. b >>= 1;
  52. d >>= 1;
  53. } while (d);
  54. *n = res;
  55. return rem;
  56. }
  57. EXPORT_SYMBOL(__div64_32);
  58. #endif
  59. #ifndef div_s64_rem
  60. s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
  61. {
  62. u64 quotient;
  63. if (dividend < 0) {
  64. quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
  65. *remainder = -*remainder;
  66. if (divisor > 0)
  67. quotient = -quotient;
  68. } else {
  69. quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
  70. if (divisor < 0)
  71. quotient = -quotient;
  72. }
  73. return quotient;
  74. }
  75. EXPORT_SYMBOL(div_s64_rem);
  76. #endif
  77. /*
  78. * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
  79. * @dividend: 64bit dividend
  80. * @divisor: 64bit divisor
  81. * @remainder: 64bit remainder
  82. *
  83. * This implementation is a comparable to algorithm used by div64_u64.
  84. * But this operation, which includes math for calculating the remainder,
  85. * is kept distinct to avoid slowing down the div64_u64 operation on 32bit
  86. * systems.
  87. */
  88. #ifndef div64_u64_rem
  89. u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
  90. {
  91. u32 high = divisor >> 32;
  92. u64 quot;
  93. if (high == 0) {
  94. u32 rem32;
  95. quot = div_u64_rem(dividend, divisor, &rem32);
  96. *remainder = rem32;
  97. } else {
  98. int n = fls(high);
  99. quot = div_u64(dividend >> n, divisor >> n);
  100. if (quot != 0)
  101. quot--;
  102. *remainder = dividend - quot * divisor;
  103. if (*remainder >= divisor) {
  104. quot++;
  105. *remainder -= divisor;
  106. }
  107. }
  108. return quot;
  109. }
  110. EXPORT_SYMBOL(div64_u64_rem);
  111. #endif
  112. /*
  113. * div64_u64 - unsigned 64bit divide with 64bit divisor
  114. * @dividend: 64bit dividend
  115. * @divisor: 64bit divisor
  116. *
  117. * This implementation is a modified version of the algorithm proposed
  118. * by the book 'Hacker's Delight'. The original source and full proof
  119. * can be found here and is available for use without restriction.
  120. *
  121. * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt'
  122. */
  123. #ifndef div64_u64
  124. u64 div64_u64(u64 dividend, u64 divisor)
  125. {
  126. u32 high = divisor >> 32;
  127. u64 quot;
  128. if (high == 0) {
  129. quot = div_u64(dividend, divisor);
  130. } else {
  131. int n = fls(high);
  132. quot = div_u64(dividend >> n, divisor >> n);
  133. if (quot != 0)
  134. quot--;
  135. if ((dividend - quot * divisor) >= divisor)
  136. quot++;
  137. }
  138. return quot;
  139. }
  140. EXPORT_SYMBOL(div64_u64);
  141. #endif
  142. #ifndef div64_s64
  143. s64 div64_s64(s64 dividend, s64 divisor)
  144. {
  145. s64 quot, t;
  146. quot = div64_u64(abs(dividend), abs(divisor));
  147. t = (dividend ^ divisor) >> 63;
  148. return (quot ^ t) - t;
  149. }
  150. EXPORT_SYMBOL(div64_s64);
  151. #endif
  152. #endif /* BITS_PER_LONG == 32 */
  153. /*
  154. * Iterative div/mod for use when dividend is not expected to be much
  155. * bigger than divisor.
  156. */
  157. #ifndef iter_div_u64_rem
  158. u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
  159. {
  160. return __iter_div_u64_rem(dividend, divisor, remainder);
  161. }
  162. EXPORT_SYMBOL(iter_div_u64_rem);
  163. #endif
  164. #if !defined(mul_u64_add_u64_div_u64) || defined(test_mul_u64_add_u64_div_u64)
  165. #define mul_add(a, b, c) add_u64_u32(mul_u32_u32(a, b), c)
  166. #if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64)
  167. static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
  168. {
  169. /* native 64x64=128 bits multiplication */
  170. u128 prod = (u128)a * b + c;
  171. *p_lo = prod;
  172. return prod >> 64;
  173. }
  174. #else
  175. static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
  176. {
  177. /* perform a 64x64=128 bits multiplication in 32bit chunks */
  178. u64 x, y, z;
  179. /* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */
  180. x = mul_add(a, b, c);
  181. y = mul_add(a, b >> 32, c >> 32);
  182. y = add_u64_u32(y, x >> 32);
  183. z = mul_add(a >> 32, b >> 32, y >> 32);
  184. y = mul_add(a >> 32, b, y);
  185. *p_lo = (y << 32) + (u32)x;
  186. return add_u64_u32(z, y >> 32);
  187. }
  188. #endif
  189. #ifndef BITS_PER_ITER
  190. #define BITS_PER_ITER (__LONG_WIDTH__ >= 64 ? 32 : 16)
  191. #endif
  192. #if BITS_PER_ITER == 32
  193. #define mul_u64_long_add_u64(p_lo, a, b, c) mul_u64_u64_add_u64(p_lo, a, b, c)
  194. #define add_u64_long(a, b) ((a) + (b))
  195. #else
  196. #undef BITS_PER_ITER
  197. #define BITS_PER_ITER 16
  198. static inline u32 mul_u64_long_add_u64(u64 *p_lo, u64 a, u32 b, u64 c)
  199. {
  200. u64 n_lo = mul_add(a, b, c);
  201. u64 n_med = mul_add(a >> 32, b, c >> 32);
  202. n_med = add_u64_u32(n_med, n_lo >> 32);
  203. *p_lo = n_med << 32 | (u32)n_lo;
  204. return n_med >> 32;
  205. }
  206. #define add_u64_long(a, b) add_u64_u32(a, b)
  207. #endif
  208. u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d)
  209. {
  210. unsigned long d_msig, q_digit;
  211. unsigned int reps, d_z_hi;
  212. u64 quotient, n_lo, n_hi;
  213. u32 overflow;
  214. n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c);
  215. if (!n_hi)
  216. return div64_u64(n_lo, d);
  217. if (unlikely(n_hi >= d)) {
  218. /* trigger runtime exception if divisor is zero */
  219. if (d == 0) {
  220. unsigned long zero = 0;
  221. OPTIMIZER_HIDE_VAR(zero);
  222. return ~0UL/zero;
  223. }
  224. /* overflow: result is unrepresentable in a u64 */
  225. return ~0ULL;
  226. }
  227. /* Left align the divisor, shifting the dividend to match */
  228. d_z_hi = __builtin_clzll(d);
  229. if (d_z_hi) {
  230. d <<= d_z_hi;
  231. n_hi = n_hi << d_z_hi | n_lo >> (64 - d_z_hi);
  232. n_lo <<= d_z_hi;
  233. }
  234. reps = 64 / BITS_PER_ITER;
  235. /* Optimise loop count for small dividends */
  236. if (!(u32)(n_hi >> 32)) {
  237. reps -= 32 / BITS_PER_ITER;
  238. n_hi = n_hi << 32 | n_lo >> 32;
  239. n_lo <<= 32;
  240. }
  241. #if BITS_PER_ITER == 16
  242. if (!(u32)(n_hi >> 48)) {
  243. reps--;
  244. n_hi = add_u64_u32(n_hi << 16, n_lo >> 48);
  245. n_lo <<= 16;
  246. }
  247. #endif
  248. /* Invert the dividend so we can use add instead of subtract. */
  249. n_lo = ~n_lo;
  250. n_hi = ~n_hi;
  251. /*
  252. * Get the most significant BITS_PER_ITER bits of the divisor.
  253. * This is used to get a low 'guestimate' of the quotient digit.
  254. */
  255. d_msig = (d >> (64 - BITS_PER_ITER)) + 1;
  256. /*
  257. * Now do a 'long division' with BITS_PER_ITER bit 'digits'.
  258. * The 'guess' quotient digit can be low and BITS_PER_ITER+1 bits.
  259. * The worst case is dividing ~0 by 0x8000 which requires two subtracts.
  260. */
  261. quotient = 0;
  262. while (reps--) {
  263. q_digit = (unsigned long)(~n_hi >> (64 - 2 * BITS_PER_ITER)) / d_msig;
  264. /* Shift 'n' left to align with the product q_digit * d */
  265. overflow = n_hi >> (64 - BITS_PER_ITER);
  266. n_hi = add_u64_u32(n_hi << BITS_PER_ITER, n_lo >> (64 - BITS_PER_ITER));
  267. n_lo <<= BITS_PER_ITER;
  268. /* Add product to negated divisor */
  269. overflow += mul_u64_long_add_u64(&n_hi, d, q_digit, n_hi);
  270. /* Adjust for the q_digit 'guestimate' being low */
  271. while (overflow < 0xffffffff >> (32 - BITS_PER_ITER)) {
  272. q_digit++;
  273. n_hi += d;
  274. overflow += n_hi < d;
  275. }
  276. quotient = add_u64_long(quotient << BITS_PER_ITER, q_digit);
  277. }
  278. /*
  279. * The above only ensures the remainder doesn't overflow,
  280. * it can still be possible to add (aka subtract) another copy
  281. * of the divisor.
  282. */
  283. if ((n_hi + d) > n_hi)
  284. quotient++;
  285. return quotient;
  286. }
  287. #if !defined(test_mul_u64_add_u64_div_u64)
  288. EXPORT_SYMBOL(mul_u64_add_u64_div_u64);
  289. #endif
  290. #endif