ecc.c 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710
  1. /*
  2. * Copyright (c) 2013, 2014 Kenneth MacKay. All rights reserved.
  3. * Copyright (c) 2019 Vitaly Chikunov <vt@altlinux.org>
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are
  7. * met:
  8. * * Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * * Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  15. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  16. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  17. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  18. * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  19. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  20. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <crypto/ecc_curve.h>
  27. #include <linux/module.h>
  28. #include <linux/random.h>
  29. #include <linux/slab.h>
  30. #include <linux/swab.h>
  31. #include <linux/fips.h>
  32. #include <crypto/ecdh.h>
  33. #include <crypto/rng.h>
  34. #include <crypto/internal/ecc.h>
  35. #include <linux/unaligned.h>
  36. #include <linux/ratelimit.h>
  37. #include "ecc_curve_defs.h"
  38. typedef struct {
  39. u64 m_low;
  40. u64 m_high;
  41. } uint128_t;
  42. /* Returns curv25519 curve param */
  43. const struct ecc_curve *ecc_get_curve25519(void)
  44. {
  45. return &ecc_25519;
  46. }
  47. EXPORT_SYMBOL(ecc_get_curve25519);
  48. const struct ecc_curve *ecc_get_curve(unsigned int curve_id)
  49. {
  50. switch (curve_id) {
  51. /* In FIPS mode only allow P256 and higher */
  52. case ECC_CURVE_NIST_P192:
  53. return fips_enabled ? NULL : &nist_p192;
  54. case ECC_CURVE_NIST_P256:
  55. return &nist_p256;
  56. case ECC_CURVE_NIST_P384:
  57. return &nist_p384;
  58. case ECC_CURVE_NIST_P521:
  59. return &nist_p521;
  60. default:
  61. return NULL;
  62. }
  63. }
  64. EXPORT_SYMBOL(ecc_get_curve);
  65. void ecc_digits_from_bytes(const u8 *in, unsigned int nbytes,
  66. u64 *out, unsigned int ndigits)
  67. {
  68. int diff = ndigits - DIV_ROUND_UP_POW2(nbytes, sizeof(u64));
  69. unsigned int o = nbytes & 7;
  70. __be64 msd = 0;
  71. /* diff > 0: not enough input bytes: set most significant digits to 0 */
  72. if (diff > 0) {
  73. ndigits -= diff;
  74. memset(&out[ndigits], 0, diff * sizeof(u64));
  75. }
  76. if (o) {
  77. memcpy((u8 *)&msd + sizeof(msd) - o, in, o);
  78. out[--ndigits] = be64_to_cpu(msd);
  79. in += o;
  80. }
  81. ecc_swap_digits(in, out, ndigits);
  82. }
  83. EXPORT_SYMBOL(ecc_digits_from_bytes);
  84. struct ecc_point *ecc_alloc_point(unsigned int ndigits)
  85. {
  86. struct ecc_point *p;
  87. size_t ndigits_sz;
  88. if (!ndigits)
  89. return NULL;
  90. p = kmalloc_obj(*p);
  91. if (!p)
  92. return NULL;
  93. ndigits_sz = ndigits * sizeof(u64);
  94. p->x = kmalloc(ndigits_sz, GFP_KERNEL);
  95. if (!p->x)
  96. goto err_alloc_x;
  97. p->y = kmalloc(ndigits_sz, GFP_KERNEL);
  98. if (!p->y)
  99. goto err_alloc_y;
  100. p->ndigits = ndigits;
  101. return p;
  102. err_alloc_y:
  103. kfree(p->x);
  104. err_alloc_x:
  105. kfree(p);
  106. return NULL;
  107. }
  108. EXPORT_SYMBOL(ecc_alloc_point);
  109. void ecc_free_point(struct ecc_point *p)
  110. {
  111. if (!p)
  112. return;
  113. kfree_sensitive(p->x);
  114. kfree_sensitive(p->y);
  115. kfree_sensitive(p);
  116. }
  117. EXPORT_SYMBOL(ecc_free_point);
  118. static void vli_clear(u64 *vli, unsigned int ndigits)
  119. {
  120. int i;
  121. for (i = 0; i < ndigits; i++)
  122. vli[i] = 0;
  123. }
  124. /* Returns true if vli == 0, false otherwise. */
  125. bool vli_is_zero(const u64 *vli, unsigned int ndigits)
  126. {
  127. int i;
  128. for (i = 0; i < ndigits; i++) {
  129. if (vli[i])
  130. return false;
  131. }
  132. return true;
  133. }
  134. EXPORT_SYMBOL(vli_is_zero);
  135. /* Returns nonzero if bit of vli is set. */
  136. static u64 vli_test_bit(const u64 *vli, unsigned int bit)
  137. {
  138. return (vli[bit / 64] & ((u64)1 << (bit % 64)));
  139. }
  140. static bool vli_is_negative(const u64 *vli, unsigned int ndigits)
  141. {
  142. return vli_test_bit(vli, ndigits * 64 - 1);
  143. }
  144. /* Counts the number of 64-bit "digits" in vli. */
  145. static unsigned int vli_num_digits(const u64 *vli, unsigned int ndigits)
  146. {
  147. int i;
  148. /* Search from the end until we find a non-zero digit.
  149. * We do it in reverse because we expect that most digits will
  150. * be nonzero.
  151. */
  152. for (i = ndigits - 1; i >= 0 && vli[i] == 0; i--);
  153. return (i + 1);
  154. }
  155. /* Counts the number of bits required for vli. */
  156. unsigned int vli_num_bits(const u64 *vli, unsigned int ndigits)
  157. {
  158. unsigned int i, num_digits;
  159. u64 digit;
  160. num_digits = vli_num_digits(vli, ndigits);
  161. if (num_digits == 0)
  162. return 0;
  163. digit = vli[num_digits - 1];
  164. for (i = 0; digit; i++)
  165. digit >>= 1;
  166. return ((num_digits - 1) * 64 + i);
  167. }
  168. EXPORT_SYMBOL(vli_num_bits);
  169. /* Set dest from unaligned bit string src. */
  170. void vli_from_be64(u64 *dest, const void *src, unsigned int ndigits)
  171. {
  172. int i;
  173. const u64 *from = src;
  174. for (i = 0; i < ndigits; i++)
  175. dest[i] = get_unaligned_be64(&from[ndigits - 1 - i]);
  176. }
  177. EXPORT_SYMBOL(vli_from_be64);
  178. void vli_from_le64(u64 *dest, const void *src, unsigned int ndigits)
  179. {
  180. int i;
  181. const u64 *from = src;
  182. for (i = 0; i < ndigits; i++)
  183. dest[i] = get_unaligned_le64(&from[i]);
  184. }
  185. EXPORT_SYMBOL(vli_from_le64);
  186. /* Sets dest = src. */
  187. static void vli_set(u64 *dest, const u64 *src, unsigned int ndigits)
  188. {
  189. int i;
  190. for (i = 0; i < ndigits; i++)
  191. dest[i] = src[i];
  192. }
  193. /* Returns sign of left - right. */
  194. int vli_cmp(const u64 *left, const u64 *right, unsigned int ndigits)
  195. {
  196. int i;
  197. for (i = ndigits - 1; i >= 0; i--) {
  198. if (left[i] > right[i])
  199. return 1;
  200. else if (left[i] < right[i])
  201. return -1;
  202. }
  203. return 0;
  204. }
  205. EXPORT_SYMBOL(vli_cmp);
  206. /* Computes result = in << c, returning carry. Can modify in place
  207. * (if result == in). 0 < shift < 64.
  208. */
  209. static u64 vli_lshift(u64 *result, const u64 *in, unsigned int shift,
  210. unsigned int ndigits)
  211. {
  212. u64 carry = 0;
  213. int i;
  214. for (i = 0; i < ndigits; i++) {
  215. u64 temp = in[i];
  216. result[i] = (temp << shift) | carry;
  217. carry = temp >> (64 - shift);
  218. }
  219. return carry;
  220. }
  221. /* Computes vli = vli >> 1. */
  222. static void vli_rshift1(u64 *vli, unsigned int ndigits)
  223. {
  224. u64 *end = vli;
  225. u64 carry = 0;
  226. vli += ndigits;
  227. while (vli-- > end) {
  228. u64 temp = *vli;
  229. *vli = (temp >> 1) | carry;
  230. carry = temp << 63;
  231. }
  232. }
  233. /* Computes result = left + right, returning carry. Can modify in place. */
  234. static u64 vli_add(u64 *result, const u64 *left, const u64 *right,
  235. unsigned int ndigits)
  236. {
  237. u64 carry = 0;
  238. int i;
  239. for (i = 0; i < ndigits; i++) {
  240. u64 sum;
  241. sum = left[i] + right[i] + carry;
  242. if (sum != left[i])
  243. carry = (sum < left[i]);
  244. result[i] = sum;
  245. }
  246. return carry;
  247. }
  248. /* Computes result = left + right, returning carry. Can modify in place. */
  249. static u64 vli_uadd(u64 *result, const u64 *left, u64 right,
  250. unsigned int ndigits)
  251. {
  252. u64 carry = right;
  253. int i;
  254. for (i = 0; i < ndigits; i++) {
  255. u64 sum;
  256. sum = left[i] + carry;
  257. if (sum != left[i])
  258. carry = (sum < left[i]);
  259. else
  260. carry = !!carry;
  261. result[i] = sum;
  262. }
  263. return carry;
  264. }
  265. /* Computes result = left - right, returning borrow. Can modify in place. */
  266. u64 vli_sub(u64 *result, const u64 *left, const u64 *right,
  267. unsigned int ndigits)
  268. {
  269. u64 borrow = 0;
  270. int i;
  271. for (i = 0; i < ndigits; i++) {
  272. u64 diff;
  273. diff = left[i] - right[i] - borrow;
  274. if (diff != left[i])
  275. borrow = (diff > left[i]);
  276. result[i] = diff;
  277. }
  278. return borrow;
  279. }
  280. EXPORT_SYMBOL(vli_sub);
  281. /* Computes result = left - right, returning borrow. Can modify in place. */
  282. static u64 vli_usub(u64 *result, const u64 *left, u64 right,
  283. unsigned int ndigits)
  284. {
  285. u64 borrow = right;
  286. int i;
  287. for (i = 0; i < ndigits; i++) {
  288. u64 diff;
  289. diff = left[i] - borrow;
  290. if (diff != left[i])
  291. borrow = (diff > left[i]);
  292. result[i] = diff;
  293. }
  294. return borrow;
  295. }
  296. static uint128_t mul_64_64(u64 left, u64 right)
  297. {
  298. uint128_t result;
  299. #if defined(CONFIG_ARCH_SUPPORTS_INT128)
  300. unsigned __int128 m = (unsigned __int128)left * right;
  301. result.m_low = m;
  302. result.m_high = m >> 64;
  303. #else
  304. u64 a0 = left & 0xffffffffull;
  305. u64 a1 = left >> 32;
  306. u64 b0 = right & 0xffffffffull;
  307. u64 b1 = right >> 32;
  308. u64 m0 = a0 * b0;
  309. u64 m1 = a0 * b1;
  310. u64 m2 = a1 * b0;
  311. u64 m3 = a1 * b1;
  312. m2 += (m0 >> 32);
  313. m2 += m1;
  314. /* Overflow */
  315. if (m2 < m1)
  316. m3 += 0x100000000ull;
  317. result.m_low = (m0 & 0xffffffffull) | (m2 << 32);
  318. result.m_high = m3 + (m2 >> 32);
  319. #endif
  320. return result;
  321. }
  322. static uint128_t add_128_128(uint128_t a, uint128_t b)
  323. {
  324. uint128_t result;
  325. result.m_low = a.m_low + b.m_low;
  326. result.m_high = a.m_high + b.m_high + (result.m_low < a.m_low);
  327. return result;
  328. }
  329. static void vli_mult(u64 *result, const u64 *left, const u64 *right,
  330. unsigned int ndigits)
  331. {
  332. uint128_t r01 = { 0, 0 };
  333. u64 r2 = 0;
  334. unsigned int i, k;
  335. /* Compute each digit of result in sequence, maintaining the
  336. * carries.
  337. */
  338. for (k = 0; k < ndigits * 2 - 1; k++) {
  339. unsigned int min;
  340. if (k < ndigits)
  341. min = 0;
  342. else
  343. min = (k + 1) - ndigits;
  344. for (i = min; i <= k && i < ndigits; i++) {
  345. uint128_t product;
  346. product = mul_64_64(left[i], right[k - i]);
  347. r01 = add_128_128(r01, product);
  348. r2 += (r01.m_high < product.m_high);
  349. }
  350. result[k] = r01.m_low;
  351. r01.m_low = r01.m_high;
  352. r01.m_high = r2;
  353. r2 = 0;
  354. }
  355. result[ndigits * 2 - 1] = r01.m_low;
  356. }
  357. /* Compute product = left * right, for a small right value. */
  358. static void vli_umult(u64 *result, const u64 *left, u32 right,
  359. unsigned int ndigits)
  360. {
  361. uint128_t r01 = { 0 };
  362. unsigned int k;
  363. for (k = 0; k < ndigits; k++) {
  364. uint128_t product;
  365. product = mul_64_64(left[k], right);
  366. r01 = add_128_128(r01, product);
  367. /* no carry */
  368. result[k] = r01.m_low;
  369. r01.m_low = r01.m_high;
  370. r01.m_high = 0;
  371. }
  372. result[k] = r01.m_low;
  373. for (++k; k < ndigits * 2; k++)
  374. result[k] = 0;
  375. }
  376. static void vli_square(u64 *result, const u64 *left, unsigned int ndigits)
  377. {
  378. uint128_t r01 = { 0, 0 };
  379. u64 r2 = 0;
  380. int i, k;
  381. for (k = 0; k < ndigits * 2 - 1; k++) {
  382. unsigned int min;
  383. if (k < ndigits)
  384. min = 0;
  385. else
  386. min = (k + 1) - ndigits;
  387. for (i = min; i <= k && i <= k - i; i++) {
  388. uint128_t product;
  389. product = mul_64_64(left[i], left[k - i]);
  390. if (i < k - i) {
  391. r2 += product.m_high >> 63;
  392. product.m_high = (product.m_high << 1) |
  393. (product.m_low >> 63);
  394. product.m_low <<= 1;
  395. }
  396. r01 = add_128_128(r01, product);
  397. r2 += (r01.m_high < product.m_high);
  398. }
  399. result[k] = r01.m_low;
  400. r01.m_low = r01.m_high;
  401. r01.m_high = r2;
  402. r2 = 0;
  403. }
  404. result[ndigits * 2 - 1] = r01.m_low;
  405. }
  406. /* Computes result = (left + right) % mod.
  407. * Assumes that left < mod and right < mod, result != mod.
  408. */
  409. static void vli_mod_add(u64 *result, const u64 *left, const u64 *right,
  410. const u64 *mod, unsigned int ndigits)
  411. {
  412. u64 carry;
  413. carry = vli_add(result, left, right, ndigits);
  414. /* result > mod (result = mod + remainder), so subtract mod to
  415. * get remainder.
  416. */
  417. if (carry || vli_cmp(result, mod, ndigits) >= 0)
  418. vli_sub(result, result, mod, ndigits);
  419. }
  420. /* Computes result = (left - right) % mod.
  421. * Assumes that left < mod and right < mod, result != mod.
  422. */
  423. static void vli_mod_sub(u64 *result, const u64 *left, const u64 *right,
  424. const u64 *mod, unsigned int ndigits)
  425. {
  426. u64 borrow = vli_sub(result, left, right, ndigits);
  427. /* In this case, p_result == -diff == (max int) - diff.
  428. * Since -x % d == d - x, we can get the correct result from
  429. * result + mod (with overflow).
  430. */
  431. if (borrow)
  432. vli_add(result, result, mod, ndigits);
  433. }
  434. /*
  435. * Computes result = product % mod
  436. * for special form moduli: p = 2^k-c, for small c (note the minus sign)
  437. *
  438. * References:
  439. * R. Crandall, C. Pomerance. Prime Numbers: A Computational Perspective.
  440. * 9 Fast Algorithms for Large-Integer Arithmetic. 9.2.3 Moduli of special form
  441. * Algorithm 9.2.13 (Fast mod operation for special-form moduli).
  442. */
  443. static void vli_mmod_special(u64 *result, const u64 *product,
  444. const u64 *mod, unsigned int ndigits)
  445. {
  446. u64 c = -mod[0];
  447. u64 t[ECC_MAX_DIGITS * 2];
  448. u64 r[ECC_MAX_DIGITS * 2];
  449. vli_set(r, product, ndigits * 2);
  450. while (!vli_is_zero(r + ndigits, ndigits)) {
  451. vli_umult(t, r + ndigits, c, ndigits);
  452. vli_clear(r + ndigits, ndigits);
  453. vli_add(r, r, t, ndigits * 2);
  454. }
  455. vli_set(t, mod, ndigits);
  456. vli_clear(t + ndigits, ndigits);
  457. while (vli_cmp(r, t, ndigits * 2) >= 0)
  458. vli_sub(r, r, t, ndigits * 2);
  459. vli_set(result, r, ndigits);
  460. }
  461. /*
  462. * Computes result = product % mod
  463. * for special form moduli: p = 2^{k-1}+c, for small c (note the plus sign)
  464. * where k-1 does not fit into qword boundary by -1 bit (such as 255).
  465. * References (loosely based on):
  466. * A. Menezes, P. van Oorschot, S. Vanstone. Handbook of Applied Cryptography.
  467. * 14.3.4 Reduction methods for moduli of special form. Algorithm 14.47.
  468. * URL: http://cacr.uwaterloo.ca/hac/about/chap14.pdf
  469. *
  470. * H. Cohen, G. Frey, R. Avanzi, C. Doche, T. Lange, K. Nguyen, F. Vercauteren.
  471. * Handbook of Elliptic and Hyperelliptic Curve Cryptography.
  472. * Algorithm 10.25 Fast reduction for special form moduli
  473. */
  474. static void vli_mmod_special2(u64 *result, const u64 *product,
  475. const u64 *mod, unsigned int ndigits)
  476. {
  477. u64 c2 = mod[0] * 2;
  478. u64 q[ECC_MAX_DIGITS];
  479. u64 r[ECC_MAX_DIGITS * 2];
  480. u64 m[ECC_MAX_DIGITS * 2]; /* expanded mod */
  481. int carry; /* last bit that doesn't fit into q */
  482. int i;
  483. vli_set(m, mod, ndigits);
  484. vli_clear(m + ndigits, ndigits);
  485. vli_set(r, product, ndigits);
  486. /* q and carry are top bits */
  487. vli_set(q, product + ndigits, ndigits);
  488. vli_clear(r + ndigits, ndigits);
  489. carry = vli_is_negative(r, ndigits);
  490. if (carry)
  491. r[ndigits - 1] &= (1ull << 63) - 1;
  492. for (i = 1; carry || !vli_is_zero(q, ndigits); i++) {
  493. u64 qc[ECC_MAX_DIGITS * 2];
  494. vli_umult(qc, q, c2, ndigits);
  495. if (carry)
  496. vli_uadd(qc, qc, mod[0], ndigits * 2);
  497. vli_set(q, qc + ndigits, ndigits);
  498. vli_clear(qc + ndigits, ndigits);
  499. carry = vli_is_negative(qc, ndigits);
  500. if (carry)
  501. qc[ndigits - 1] &= (1ull << 63) - 1;
  502. if (i & 1)
  503. vli_sub(r, r, qc, ndigits * 2);
  504. else
  505. vli_add(r, r, qc, ndigits * 2);
  506. }
  507. while (vli_is_negative(r, ndigits * 2))
  508. vli_add(r, r, m, ndigits * 2);
  509. while (vli_cmp(r, m, ndigits * 2) >= 0)
  510. vli_sub(r, r, m, ndigits * 2);
  511. vli_set(result, r, ndigits);
  512. }
  513. /*
  514. * Computes result = product % mod, where product is 2N words long.
  515. * Reference: Ken MacKay's micro-ecc.
  516. * Currently only designed to work for curve_p or curve_n.
  517. */
  518. static void vli_mmod_slow(u64 *result, u64 *product, const u64 *mod,
  519. unsigned int ndigits)
  520. {
  521. u64 mod_m[2 * ECC_MAX_DIGITS];
  522. u64 tmp[2 * ECC_MAX_DIGITS];
  523. u64 *v[2] = { tmp, product };
  524. u64 carry = 0;
  525. unsigned int i;
  526. /* Shift mod so its highest set bit is at the maximum position. */
  527. int shift = (ndigits * 2 * 64) - vli_num_bits(mod, ndigits);
  528. int word_shift = shift / 64;
  529. int bit_shift = shift % 64;
  530. vli_clear(mod_m, word_shift);
  531. if (bit_shift > 0) {
  532. for (i = 0; i < ndigits; ++i) {
  533. mod_m[word_shift + i] = (mod[i] << bit_shift) | carry;
  534. carry = mod[i] >> (64 - bit_shift);
  535. }
  536. } else
  537. vli_set(mod_m + word_shift, mod, ndigits);
  538. for (i = 1; shift >= 0; --shift) {
  539. u64 borrow = 0;
  540. unsigned int j;
  541. for (j = 0; j < ndigits * 2; ++j) {
  542. u64 diff = v[i][j] - mod_m[j] - borrow;
  543. if (diff != v[i][j])
  544. borrow = (diff > v[i][j]);
  545. v[1 - i][j] = diff;
  546. }
  547. i = !(i ^ borrow); /* Swap the index if there was no borrow */
  548. vli_rshift1(mod_m, ndigits);
  549. mod_m[ndigits - 1] |= mod_m[ndigits] << (64 - 1);
  550. vli_rshift1(mod_m + ndigits, ndigits);
  551. }
  552. vli_set(result, v[i], ndigits);
  553. }
  554. /* Computes result = product % mod using Barrett's reduction with precomputed
  555. * value mu appended to the mod after ndigits, mu = (2^{2w} / mod) and have
  556. * length ndigits + 1, where mu * (2^w - 1) should not overflow ndigits
  557. * boundary.
  558. *
  559. * Reference:
  560. * R. Brent, P. Zimmermann. Modern Computer Arithmetic. 2010.
  561. * 2.4.1 Barrett's algorithm. Algorithm 2.5.
  562. */
  563. static void vli_mmod_barrett(u64 *result, u64 *product, const u64 *mod,
  564. unsigned int ndigits)
  565. {
  566. u64 q[ECC_MAX_DIGITS * 2];
  567. u64 r[ECC_MAX_DIGITS * 2];
  568. const u64 *mu = mod + ndigits;
  569. vli_mult(q, product + ndigits, mu, ndigits);
  570. if (mu[ndigits])
  571. vli_add(q + ndigits, q + ndigits, product + ndigits, ndigits);
  572. vli_mult(r, mod, q + ndigits, ndigits);
  573. vli_sub(r, product, r, ndigits * 2);
  574. while (!vli_is_zero(r + ndigits, ndigits) ||
  575. vli_cmp(r, mod, ndigits) != -1) {
  576. u64 carry;
  577. carry = vli_sub(r, r, mod, ndigits);
  578. vli_usub(r + ndigits, r + ndigits, carry, ndigits);
  579. }
  580. vli_set(result, r, ndigits);
  581. }
  582. /* Computes p_result = p_product % curve_p.
  583. * See algorithm 5 and 6 from
  584. * http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf
  585. */
  586. static void vli_mmod_fast_192(u64 *result, const u64 *product,
  587. const u64 *curve_prime, u64 *tmp)
  588. {
  589. const unsigned int ndigits = ECC_CURVE_NIST_P192_DIGITS;
  590. int carry;
  591. vli_set(result, product, ndigits);
  592. vli_set(tmp, &product[3], ndigits);
  593. carry = vli_add(result, result, tmp, ndigits);
  594. tmp[0] = 0;
  595. tmp[1] = product[3];
  596. tmp[2] = product[4];
  597. carry += vli_add(result, result, tmp, ndigits);
  598. tmp[0] = tmp[1] = product[5];
  599. tmp[2] = 0;
  600. carry += vli_add(result, result, tmp, ndigits);
  601. while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
  602. carry -= vli_sub(result, result, curve_prime, ndigits);
  603. }
  604. /* Computes result = product % curve_prime
  605. * from http://www.nsa.gov/ia/_files/nist-routines.pdf
  606. */
  607. static void vli_mmod_fast_256(u64 *result, const u64 *product,
  608. const u64 *curve_prime, u64 *tmp)
  609. {
  610. int carry;
  611. const unsigned int ndigits = ECC_CURVE_NIST_P256_DIGITS;
  612. /* t */
  613. vli_set(result, product, ndigits);
  614. /* s1 */
  615. tmp[0] = 0;
  616. tmp[1] = product[5] & 0xffffffff00000000ull;
  617. tmp[2] = product[6];
  618. tmp[3] = product[7];
  619. carry = vli_lshift(tmp, tmp, 1, ndigits);
  620. carry += vli_add(result, result, tmp, ndigits);
  621. /* s2 */
  622. tmp[1] = product[6] << 32;
  623. tmp[2] = (product[6] >> 32) | (product[7] << 32);
  624. tmp[3] = product[7] >> 32;
  625. carry += vli_lshift(tmp, tmp, 1, ndigits);
  626. carry += vli_add(result, result, tmp, ndigits);
  627. /* s3 */
  628. tmp[0] = product[4];
  629. tmp[1] = product[5] & 0xffffffff;
  630. tmp[2] = 0;
  631. tmp[3] = product[7];
  632. carry += vli_add(result, result, tmp, ndigits);
  633. /* s4 */
  634. tmp[0] = (product[4] >> 32) | (product[5] << 32);
  635. tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000ull);
  636. tmp[2] = product[7];
  637. tmp[3] = (product[6] >> 32) | (product[4] << 32);
  638. carry += vli_add(result, result, tmp, ndigits);
  639. /* d1 */
  640. tmp[0] = (product[5] >> 32) | (product[6] << 32);
  641. tmp[1] = (product[6] >> 32);
  642. tmp[2] = 0;
  643. tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32);
  644. carry -= vli_sub(result, result, tmp, ndigits);
  645. /* d2 */
  646. tmp[0] = product[6];
  647. tmp[1] = product[7];
  648. tmp[2] = 0;
  649. tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000ull);
  650. carry -= vli_sub(result, result, tmp, ndigits);
  651. /* d3 */
  652. tmp[0] = (product[6] >> 32) | (product[7] << 32);
  653. tmp[1] = (product[7] >> 32) | (product[4] << 32);
  654. tmp[2] = (product[4] >> 32) | (product[5] << 32);
  655. tmp[3] = (product[6] << 32);
  656. carry -= vli_sub(result, result, tmp, ndigits);
  657. /* d4 */
  658. tmp[0] = product[7];
  659. tmp[1] = product[4] & 0xffffffff00000000ull;
  660. tmp[2] = product[5];
  661. tmp[3] = product[6] & 0xffffffff00000000ull;
  662. carry -= vli_sub(result, result, tmp, ndigits);
  663. if (carry < 0) {
  664. do {
  665. carry += vli_add(result, result, curve_prime, ndigits);
  666. } while (carry < 0);
  667. } else {
  668. while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
  669. carry -= vli_sub(result, result, curve_prime, ndigits);
  670. }
  671. }
  672. #define SL32OR32(x32, y32) (((u64)x32 << 32) | y32)
  673. #define AND64H(x64) (x64 & 0xffFFffFF00000000ull)
  674. #define AND64L(x64) (x64 & 0x00000000ffFFffFFull)
  675. /* Computes result = product % curve_prime
  676. * from "Mathematical routines for the NIST prime elliptic curves"
  677. */
  678. static void vli_mmod_fast_384(u64 *result, const u64 *product,
  679. const u64 *curve_prime, u64 *tmp)
  680. {
  681. int carry;
  682. const unsigned int ndigits = ECC_CURVE_NIST_P384_DIGITS;
  683. /* t */
  684. vli_set(result, product, ndigits);
  685. /* s1 */
  686. tmp[0] = 0; // 0 || 0
  687. tmp[1] = 0; // 0 || 0
  688. tmp[2] = SL32OR32(product[11], (product[10]>>32)); //a22||a21
  689. tmp[3] = product[11]>>32; // 0 ||a23
  690. tmp[4] = 0; // 0 || 0
  691. tmp[5] = 0; // 0 || 0
  692. carry = vli_lshift(tmp, tmp, 1, ndigits);
  693. carry += vli_add(result, result, tmp, ndigits);
  694. /* s2 */
  695. tmp[0] = product[6]; //a13||a12
  696. tmp[1] = product[7]; //a15||a14
  697. tmp[2] = product[8]; //a17||a16
  698. tmp[3] = product[9]; //a19||a18
  699. tmp[4] = product[10]; //a21||a20
  700. tmp[5] = product[11]; //a23||a22
  701. carry += vli_add(result, result, tmp, ndigits);
  702. /* s3 */
  703. tmp[0] = SL32OR32(product[11], (product[10]>>32)); //a22||a21
  704. tmp[1] = SL32OR32(product[6], (product[11]>>32)); //a12||a23
  705. tmp[2] = SL32OR32(product[7], (product[6])>>32); //a14||a13
  706. tmp[3] = SL32OR32(product[8], (product[7]>>32)); //a16||a15
  707. tmp[4] = SL32OR32(product[9], (product[8]>>32)); //a18||a17
  708. tmp[5] = SL32OR32(product[10], (product[9]>>32)); //a20||a19
  709. carry += vli_add(result, result, tmp, ndigits);
  710. /* s4 */
  711. tmp[0] = AND64H(product[11]); //a23|| 0
  712. tmp[1] = (product[10]<<32); //a20|| 0
  713. tmp[2] = product[6]; //a13||a12
  714. tmp[3] = product[7]; //a15||a14
  715. tmp[4] = product[8]; //a17||a16
  716. tmp[5] = product[9]; //a19||a18
  717. carry += vli_add(result, result, tmp, ndigits);
  718. /* s5 */
  719. tmp[0] = 0; // 0|| 0
  720. tmp[1] = 0; // 0|| 0
  721. tmp[2] = product[10]; //a21||a20
  722. tmp[3] = product[11]; //a23||a22
  723. tmp[4] = 0; // 0|| 0
  724. tmp[5] = 0; // 0|| 0
  725. carry += vli_add(result, result, tmp, ndigits);
  726. /* s6 */
  727. tmp[0] = AND64L(product[10]); // 0 ||a20
  728. tmp[1] = AND64H(product[10]); //a21|| 0
  729. tmp[2] = product[11]; //a23||a22
  730. tmp[3] = 0; // 0 || 0
  731. tmp[4] = 0; // 0 || 0
  732. tmp[5] = 0; // 0 || 0
  733. carry += vli_add(result, result, tmp, ndigits);
  734. /* d1 */
  735. tmp[0] = SL32OR32(product[6], (product[11]>>32)); //a12||a23
  736. tmp[1] = SL32OR32(product[7], (product[6]>>32)); //a14||a13
  737. tmp[2] = SL32OR32(product[8], (product[7]>>32)); //a16||a15
  738. tmp[3] = SL32OR32(product[9], (product[8]>>32)); //a18||a17
  739. tmp[4] = SL32OR32(product[10], (product[9]>>32)); //a20||a19
  740. tmp[5] = SL32OR32(product[11], (product[10]>>32)); //a22||a21
  741. carry -= vli_sub(result, result, tmp, ndigits);
  742. /* d2 */
  743. tmp[0] = (product[10]<<32); //a20|| 0
  744. tmp[1] = SL32OR32(product[11], (product[10]>>32)); //a22||a21
  745. tmp[2] = (product[11]>>32); // 0 ||a23
  746. tmp[3] = 0; // 0 || 0
  747. tmp[4] = 0; // 0 || 0
  748. tmp[5] = 0; // 0 || 0
  749. carry -= vli_sub(result, result, tmp, ndigits);
  750. /* d3 */
  751. tmp[0] = 0; // 0 || 0
  752. tmp[1] = AND64H(product[11]); //a23|| 0
  753. tmp[2] = product[11]>>32; // 0 ||a23
  754. tmp[3] = 0; // 0 || 0
  755. tmp[4] = 0; // 0 || 0
  756. tmp[5] = 0; // 0 || 0
  757. carry -= vli_sub(result, result, tmp, ndigits);
  758. if (carry < 0) {
  759. do {
  760. carry += vli_add(result, result, curve_prime, ndigits);
  761. } while (carry < 0);
  762. } else {
  763. while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
  764. carry -= vli_sub(result, result, curve_prime, ndigits);
  765. }
  766. }
  767. #undef SL32OR32
  768. #undef AND64H
  769. #undef AND64L
  770. /*
  771. * Computes result = product % curve_prime
  772. * from "Recommendations for Discrete Logarithm-Based Cryptography:
  773. * Elliptic Curve Domain Parameters" section G.1.4
  774. */
  775. static void vli_mmod_fast_521(u64 *result, const u64 *product,
  776. const u64 *curve_prime, u64 *tmp)
  777. {
  778. const unsigned int ndigits = ECC_CURVE_NIST_P521_DIGITS;
  779. size_t i;
  780. /* Initialize result with lowest 521 bits from product */
  781. vli_set(result, product, ndigits);
  782. result[8] &= 0x1ff;
  783. for (i = 0; i < ndigits; i++)
  784. tmp[i] = (product[8 + i] >> 9) | (product[9 + i] << 55);
  785. tmp[8] &= 0x1ff;
  786. vli_mod_add(result, result, tmp, curve_prime, ndigits);
  787. }
  788. /* Computes result = product % curve_prime for different curve_primes.
  789. *
  790. * Note that curve_primes are distinguished just by heuristic check and
  791. * not by complete conformance check.
  792. */
  793. static bool vli_mmod_fast(u64 *result, u64 *product,
  794. const struct ecc_curve *curve)
  795. {
  796. u64 tmp[2 * ECC_MAX_DIGITS];
  797. const u64 *curve_prime = curve->p;
  798. const unsigned int ndigits = curve->g.ndigits;
  799. /* All NIST curves have name prefix 'nist_' */
  800. if (strncmp(curve->name, "nist_", 5) != 0) {
  801. /* Try to handle Pseudo-Marsenne primes. */
  802. if (curve_prime[ndigits - 1] == -1ull) {
  803. vli_mmod_special(result, product, curve_prime,
  804. ndigits);
  805. return true;
  806. } else if (curve_prime[ndigits - 1] == 1ull << 63 &&
  807. curve_prime[ndigits - 2] == 0) {
  808. vli_mmod_special2(result, product, curve_prime,
  809. ndigits);
  810. return true;
  811. }
  812. vli_mmod_barrett(result, product, curve_prime, ndigits);
  813. return true;
  814. }
  815. switch (ndigits) {
  816. case ECC_CURVE_NIST_P192_DIGITS:
  817. vli_mmod_fast_192(result, product, curve_prime, tmp);
  818. break;
  819. case ECC_CURVE_NIST_P256_DIGITS:
  820. vli_mmod_fast_256(result, product, curve_prime, tmp);
  821. break;
  822. case ECC_CURVE_NIST_P384_DIGITS:
  823. vli_mmod_fast_384(result, product, curve_prime, tmp);
  824. break;
  825. case ECC_CURVE_NIST_P521_DIGITS:
  826. vli_mmod_fast_521(result, product, curve_prime, tmp);
  827. break;
  828. default:
  829. pr_err_ratelimited("ecc: unsupported digits size!\n");
  830. return false;
  831. }
  832. return true;
  833. }
  834. /* Computes result = (left * right) % mod.
  835. * Assumes that mod is big enough curve order.
  836. */
  837. void vli_mod_mult_slow(u64 *result, const u64 *left, const u64 *right,
  838. const u64 *mod, unsigned int ndigits)
  839. {
  840. u64 product[ECC_MAX_DIGITS * 2];
  841. vli_mult(product, left, right, ndigits);
  842. vli_mmod_slow(result, product, mod, ndigits);
  843. }
  844. EXPORT_SYMBOL(vli_mod_mult_slow);
  845. /* Computes result = (left * right) % curve_prime. */
  846. static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
  847. const struct ecc_curve *curve)
  848. {
  849. u64 product[2 * ECC_MAX_DIGITS];
  850. vli_mult(product, left, right, curve->g.ndigits);
  851. vli_mmod_fast(result, product, curve);
  852. }
  853. /* Computes result = left^2 % curve_prime. */
  854. static void vli_mod_square_fast(u64 *result, const u64 *left,
  855. const struct ecc_curve *curve)
  856. {
  857. u64 product[2 * ECC_MAX_DIGITS];
  858. vli_square(product, left, curve->g.ndigits);
  859. vli_mmod_fast(result, product, curve);
  860. }
  861. #define EVEN(vli) (!(vli[0] & 1))
  862. /* Computes result = (1 / p_input) % mod. All VLIs are the same size.
  863. * See "From Euclid's GCD to Montgomery Multiplication to the Great Divide"
  864. * https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf
  865. */
  866. void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
  867. unsigned int ndigits)
  868. {
  869. u64 a[ECC_MAX_DIGITS], b[ECC_MAX_DIGITS];
  870. u64 u[ECC_MAX_DIGITS], v[ECC_MAX_DIGITS];
  871. u64 carry;
  872. int cmp_result;
  873. if (vli_is_zero(input, ndigits)) {
  874. vli_clear(result, ndigits);
  875. return;
  876. }
  877. vli_set(a, input, ndigits);
  878. vli_set(b, mod, ndigits);
  879. vli_clear(u, ndigits);
  880. u[0] = 1;
  881. vli_clear(v, ndigits);
  882. while ((cmp_result = vli_cmp(a, b, ndigits)) != 0) {
  883. carry = 0;
  884. if (EVEN(a)) {
  885. vli_rshift1(a, ndigits);
  886. if (!EVEN(u))
  887. carry = vli_add(u, u, mod, ndigits);
  888. vli_rshift1(u, ndigits);
  889. if (carry)
  890. u[ndigits - 1] |= 0x8000000000000000ull;
  891. } else if (EVEN(b)) {
  892. vli_rshift1(b, ndigits);
  893. if (!EVEN(v))
  894. carry = vli_add(v, v, mod, ndigits);
  895. vli_rshift1(v, ndigits);
  896. if (carry)
  897. v[ndigits - 1] |= 0x8000000000000000ull;
  898. } else if (cmp_result > 0) {
  899. vli_sub(a, a, b, ndigits);
  900. vli_rshift1(a, ndigits);
  901. if (vli_cmp(u, v, ndigits) < 0)
  902. vli_add(u, u, mod, ndigits);
  903. vli_sub(u, u, v, ndigits);
  904. if (!EVEN(u))
  905. carry = vli_add(u, u, mod, ndigits);
  906. vli_rshift1(u, ndigits);
  907. if (carry)
  908. u[ndigits - 1] |= 0x8000000000000000ull;
  909. } else {
  910. vli_sub(b, b, a, ndigits);
  911. vli_rshift1(b, ndigits);
  912. if (vli_cmp(v, u, ndigits) < 0)
  913. vli_add(v, v, mod, ndigits);
  914. vli_sub(v, v, u, ndigits);
  915. if (!EVEN(v))
  916. carry = vli_add(v, v, mod, ndigits);
  917. vli_rshift1(v, ndigits);
  918. if (carry)
  919. v[ndigits - 1] |= 0x8000000000000000ull;
  920. }
  921. }
  922. vli_set(result, u, ndigits);
  923. }
  924. EXPORT_SYMBOL(vli_mod_inv);
  925. /* ------ Point operations ------ */
  926. /* Returns true if p_point is the point at infinity, false otherwise. */
  927. bool ecc_point_is_zero(const struct ecc_point *point)
  928. {
  929. return (vli_is_zero(point->x, point->ndigits) &&
  930. vli_is_zero(point->y, point->ndigits));
  931. }
  932. EXPORT_SYMBOL(ecc_point_is_zero);
  933. /* Point multiplication algorithm using Montgomery's ladder with co-Z
  934. * coordinates. From https://eprint.iacr.org/2011/338.pdf
  935. */
  936. /* Double in place */
  937. static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
  938. const struct ecc_curve *curve)
  939. {
  940. /* t1 = x, t2 = y, t3 = z */
  941. u64 t4[ECC_MAX_DIGITS];
  942. u64 t5[ECC_MAX_DIGITS];
  943. const u64 *curve_prime = curve->p;
  944. const unsigned int ndigits = curve->g.ndigits;
  945. if (vli_is_zero(z1, ndigits))
  946. return;
  947. /* t4 = y1^2 */
  948. vli_mod_square_fast(t4, y1, curve);
  949. /* t5 = x1*y1^2 = A */
  950. vli_mod_mult_fast(t5, x1, t4, curve);
  951. /* t4 = y1^4 */
  952. vli_mod_square_fast(t4, t4, curve);
  953. /* t2 = y1*z1 = z3 */
  954. vli_mod_mult_fast(y1, y1, z1, curve);
  955. /* t3 = z1^2 */
  956. vli_mod_square_fast(z1, z1, curve);
  957. /* t1 = x1 + z1^2 */
  958. vli_mod_add(x1, x1, z1, curve_prime, ndigits);
  959. /* t3 = 2*z1^2 */
  960. vli_mod_add(z1, z1, z1, curve_prime, ndigits);
  961. /* t3 = x1 - z1^2 */
  962. vli_mod_sub(z1, x1, z1, curve_prime, ndigits);
  963. /* t1 = x1^2 - z1^4 */
  964. vli_mod_mult_fast(x1, x1, z1, curve);
  965. /* t3 = 2*(x1^2 - z1^4) */
  966. vli_mod_add(z1, x1, x1, curve_prime, ndigits);
  967. /* t1 = 3*(x1^2 - z1^4) */
  968. vli_mod_add(x1, x1, z1, curve_prime, ndigits);
  969. if (vli_test_bit(x1, 0)) {
  970. u64 carry = vli_add(x1, x1, curve_prime, ndigits);
  971. vli_rshift1(x1, ndigits);
  972. x1[ndigits - 1] |= carry << 63;
  973. } else {
  974. vli_rshift1(x1, ndigits);
  975. }
  976. /* t1 = 3/2*(x1^2 - z1^4) = B */
  977. /* t3 = B^2 */
  978. vli_mod_square_fast(z1, x1, curve);
  979. /* t3 = B^2 - A */
  980. vli_mod_sub(z1, z1, t5, curve_prime, ndigits);
  981. /* t3 = B^2 - 2A = x3 */
  982. vli_mod_sub(z1, z1, t5, curve_prime, ndigits);
  983. /* t5 = A - x3 */
  984. vli_mod_sub(t5, t5, z1, curve_prime, ndigits);
  985. /* t1 = B * (A - x3) */
  986. vli_mod_mult_fast(x1, x1, t5, curve);
  987. /* t4 = B * (A - x3) - y1^4 = y3 */
  988. vli_mod_sub(t4, x1, t4, curve_prime, ndigits);
  989. vli_set(x1, z1, ndigits);
  990. vli_set(z1, y1, ndigits);
  991. vli_set(y1, t4, ndigits);
  992. }
  993. /* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */
  994. static void apply_z(u64 *x1, u64 *y1, u64 *z, const struct ecc_curve *curve)
  995. {
  996. u64 t1[ECC_MAX_DIGITS];
  997. vli_mod_square_fast(t1, z, curve); /* z^2 */
  998. vli_mod_mult_fast(x1, x1, t1, curve); /* x1 * z^2 */
  999. vli_mod_mult_fast(t1, t1, z, curve); /* z^3 */
  1000. vli_mod_mult_fast(y1, y1, t1, curve); /* y1 * z^3 */
  1001. }
  1002. /* P = (x1, y1) => 2P, (x2, y2) => P' */
  1003. static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
  1004. u64 *p_initial_z, const struct ecc_curve *curve)
  1005. {
  1006. u64 z[ECC_MAX_DIGITS];
  1007. const unsigned int ndigits = curve->g.ndigits;
  1008. vli_set(x2, x1, ndigits);
  1009. vli_set(y2, y1, ndigits);
  1010. vli_clear(z, ndigits);
  1011. z[0] = 1;
  1012. if (p_initial_z)
  1013. vli_set(z, p_initial_z, ndigits);
  1014. apply_z(x1, y1, z, curve);
  1015. ecc_point_double_jacobian(x1, y1, z, curve);
  1016. apply_z(x2, y2, z, curve);
  1017. }
  1018. /* Input P = (x1, y1, Z), Q = (x2, y2, Z)
  1019. * Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3)
  1020. * or P => P', Q => P + Q
  1021. */
  1022. static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
  1023. const struct ecc_curve *curve)
  1024. {
  1025. /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
  1026. u64 t5[ECC_MAX_DIGITS];
  1027. const u64 *curve_prime = curve->p;
  1028. const unsigned int ndigits = curve->g.ndigits;
  1029. /* t5 = x2 - x1 */
  1030. vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
  1031. /* t5 = (x2 - x1)^2 = A */
  1032. vli_mod_square_fast(t5, t5, curve);
  1033. /* t1 = x1*A = B */
  1034. vli_mod_mult_fast(x1, x1, t5, curve);
  1035. /* t3 = x2*A = C */
  1036. vli_mod_mult_fast(x2, x2, t5, curve);
  1037. /* t4 = y2 - y1 */
  1038. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  1039. /* t5 = (y2 - y1)^2 = D */
  1040. vli_mod_square_fast(t5, y2, curve);
  1041. /* t5 = D - B */
  1042. vli_mod_sub(t5, t5, x1, curve_prime, ndigits);
  1043. /* t5 = D - B - C = x3 */
  1044. vli_mod_sub(t5, t5, x2, curve_prime, ndigits);
  1045. /* t3 = C - B */
  1046. vli_mod_sub(x2, x2, x1, curve_prime, ndigits);
  1047. /* t2 = y1*(C - B) */
  1048. vli_mod_mult_fast(y1, y1, x2, curve);
  1049. /* t3 = B - x3 */
  1050. vli_mod_sub(x2, x1, t5, curve_prime, ndigits);
  1051. /* t4 = (y2 - y1)*(B - x3) */
  1052. vli_mod_mult_fast(y2, y2, x2, curve);
  1053. /* t4 = y3 */
  1054. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  1055. vli_set(x2, t5, ndigits);
  1056. }
  1057. /* Input P = (x1, y1, Z), Q = (x2, y2, Z)
  1058. * Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3)
  1059. * or P => P - Q, Q => P + Q
  1060. */
  1061. static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
  1062. const struct ecc_curve *curve)
  1063. {
  1064. /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
  1065. u64 t5[ECC_MAX_DIGITS];
  1066. u64 t6[ECC_MAX_DIGITS];
  1067. u64 t7[ECC_MAX_DIGITS];
  1068. const u64 *curve_prime = curve->p;
  1069. const unsigned int ndigits = curve->g.ndigits;
  1070. /* t5 = x2 - x1 */
  1071. vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
  1072. /* t5 = (x2 - x1)^2 = A */
  1073. vli_mod_square_fast(t5, t5, curve);
  1074. /* t1 = x1*A = B */
  1075. vli_mod_mult_fast(x1, x1, t5, curve);
  1076. /* t3 = x2*A = C */
  1077. vli_mod_mult_fast(x2, x2, t5, curve);
  1078. /* t4 = y2 + y1 */
  1079. vli_mod_add(t5, y2, y1, curve_prime, ndigits);
  1080. /* t4 = y2 - y1 */
  1081. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  1082. /* t6 = C - B */
  1083. vli_mod_sub(t6, x2, x1, curve_prime, ndigits);
  1084. /* t2 = y1 * (C - B) */
  1085. vli_mod_mult_fast(y1, y1, t6, curve);
  1086. /* t6 = B + C */
  1087. vli_mod_add(t6, x1, x2, curve_prime, ndigits);
  1088. /* t3 = (y2 - y1)^2 */
  1089. vli_mod_square_fast(x2, y2, curve);
  1090. /* t3 = x3 */
  1091. vli_mod_sub(x2, x2, t6, curve_prime, ndigits);
  1092. /* t7 = B - x3 */
  1093. vli_mod_sub(t7, x1, x2, curve_prime, ndigits);
  1094. /* t4 = (y2 - y1)*(B - x3) */
  1095. vli_mod_mult_fast(y2, y2, t7, curve);
  1096. /* t4 = y3 */
  1097. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  1098. /* t7 = (y2 + y1)^2 = F */
  1099. vli_mod_square_fast(t7, t5, curve);
  1100. /* t7 = x3' */
  1101. vli_mod_sub(t7, t7, t6, curve_prime, ndigits);
  1102. /* t6 = x3' - B */
  1103. vli_mod_sub(t6, t7, x1, curve_prime, ndigits);
  1104. /* t6 = (y2 + y1)*(x3' - B) */
  1105. vli_mod_mult_fast(t6, t6, t5, curve);
  1106. /* t2 = y3' */
  1107. vli_mod_sub(y1, t6, y1, curve_prime, ndigits);
  1108. vli_set(x1, t7, ndigits);
  1109. }
  1110. static void ecc_point_mult(struct ecc_point *result,
  1111. const struct ecc_point *point, const u64 *scalar,
  1112. u64 *initial_z, const struct ecc_curve *curve,
  1113. unsigned int ndigits)
  1114. {
  1115. /* R0 and R1 */
  1116. u64 rx[2][ECC_MAX_DIGITS];
  1117. u64 ry[2][ECC_MAX_DIGITS];
  1118. u64 z[ECC_MAX_DIGITS];
  1119. u64 sk[2][ECC_MAX_DIGITS];
  1120. u64 *curve_prime = curve->p;
  1121. int i, nb;
  1122. int num_bits;
  1123. int carry;
  1124. carry = vli_add(sk[0], scalar, curve->n, ndigits);
  1125. vli_add(sk[1], sk[0], curve->n, ndigits);
  1126. scalar = sk[!carry];
  1127. if (curve->nbits == 521) /* NIST P521 */
  1128. num_bits = curve->nbits + 2;
  1129. else
  1130. num_bits = sizeof(u64) * ndigits * 8 + 1;
  1131. vli_set(rx[1], point->x, ndigits);
  1132. vli_set(ry[1], point->y, ndigits);
  1133. xycz_initial_double(rx[1], ry[1], rx[0], ry[0], initial_z, curve);
  1134. for (i = num_bits - 2; i > 0; i--) {
  1135. nb = !vli_test_bit(scalar, i);
  1136. xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve);
  1137. xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve);
  1138. }
  1139. nb = !vli_test_bit(scalar, 0);
  1140. xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve);
  1141. /* Find final 1/Z value. */
  1142. /* X1 - X0 */
  1143. vli_mod_sub(z, rx[1], rx[0], curve_prime, ndigits);
  1144. /* Yb * (X1 - X0) */
  1145. vli_mod_mult_fast(z, z, ry[1 - nb], curve);
  1146. /* xP * Yb * (X1 - X0) */
  1147. vli_mod_mult_fast(z, z, point->x, curve);
  1148. /* 1 / (xP * Yb * (X1 - X0)) */
  1149. vli_mod_inv(z, z, curve_prime, point->ndigits);
  1150. /* yP / (xP * Yb * (X1 - X0)) */
  1151. vli_mod_mult_fast(z, z, point->y, curve);
  1152. /* Xb * yP / (xP * Yb * (X1 - X0)) */
  1153. vli_mod_mult_fast(z, z, rx[1 - nb], curve);
  1154. /* End 1/Z calculation */
  1155. xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve);
  1156. apply_z(rx[0], ry[0], z, curve);
  1157. vli_set(result->x, rx[0], ndigits);
  1158. vli_set(result->y, ry[0], ndigits);
  1159. }
  1160. /* Computes R = P + Q mod p */
  1161. static void ecc_point_add(const struct ecc_point *result,
  1162. const struct ecc_point *p, const struct ecc_point *q,
  1163. const struct ecc_curve *curve)
  1164. {
  1165. u64 z[ECC_MAX_DIGITS];
  1166. u64 px[ECC_MAX_DIGITS];
  1167. u64 py[ECC_MAX_DIGITS];
  1168. unsigned int ndigits = curve->g.ndigits;
  1169. vli_set(result->x, q->x, ndigits);
  1170. vli_set(result->y, q->y, ndigits);
  1171. vli_mod_sub(z, result->x, p->x, curve->p, ndigits);
  1172. vli_set(px, p->x, ndigits);
  1173. vli_set(py, p->y, ndigits);
  1174. xycz_add(px, py, result->x, result->y, curve);
  1175. vli_mod_inv(z, z, curve->p, ndigits);
  1176. apply_z(result->x, result->y, z, curve);
  1177. }
  1178. /* Computes R = u1P + u2Q mod p using Shamir's trick.
  1179. * Based on: Kenneth MacKay's micro-ecc (2014).
  1180. */
  1181. void ecc_point_mult_shamir(const struct ecc_point *result,
  1182. const u64 *u1, const struct ecc_point *p,
  1183. const u64 *u2, const struct ecc_point *q,
  1184. const struct ecc_curve *curve)
  1185. {
  1186. u64 z[ECC_MAX_DIGITS];
  1187. u64 sump[2][ECC_MAX_DIGITS];
  1188. u64 *rx = result->x;
  1189. u64 *ry = result->y;
  1190. unsigned int ndigits = curve->g.ndigits;
  1191. unsigned int num_bits;
  1192. struct ecc_point sum = ECC_POINT_INIT(sump[0], sump[1], ndigits);
  1193. const struct ecc_point *points[4];
  1194. const struct ecc_point *point;
  1195. unsigned int idx;
  1196. int i;
  1197. ecc_point_add(&sum, p, q, curve);
  1198. points[0] = NULL;
  1199. points[1] = p;
  1200. points[2] = q;
  1201. points[3] = &sum;
  1202. num_bits = max(vli_num_bits(u1, ndigits), vli_num_bits(u2, ndigits));
  1203. i = num_bits - 1;
  1204. idx = !!vli_test_bit(u1, i);
  1205. idx |= (!!vli_test_bit(u2, i)) << 1;
  1206. point = points[idx];
  1207. vli_set(rx, point->x, ndigits);
  1208. vli_set(ry, point->y, ndigits);
  1209. vli_clear(z + 1, ndigits - 1);
  1210. z[0] = 1;
  1211. for (--i; i >= 0; i--) {
  1212. ecc_point_double_jacobian(rx, ry, z, curve);
  1213. idx = !!vli_test_bit(u1, i);
  1214. idx |= (!!vli_test_bit(u2, i)) << 1;
  1215. point = points[idx];
  1216. if (point) {
  1217. u64 tx[ECC_MAX_DIGITS];
  1218. u64 ty[ECC_MAX_DIGITS];
  1219. u64 tz[ECC_MAX_DIGITS];
  1220. vli_set(tx, point->x, ndigits);
  1221. vli_set(ty, point->y, ndigits);
  1222. apply_z(tx, ty, z, curve);
  1223. vli_mod_sub(tz, rx, tx, curve->p, ndigits);
  1224. xycz_add(tx, ty, rx, ry, curve);
  1225. vli_mod_mult_fast(z, z, tz, curve);
  1226. }
  1227. }
  1228. vli_mod_inv(z, z, curve->p, ndigits);
  1229. apply_z(rx, ry, z, curve);
  1230. }
  1231. EXPORT_SYMBOL(ecc_point_mult_shamir);
  1232. /*
  1233. * This function performs checks equivalent to Appendix A.4.2 of FIPS 186-5.
  1234. * Whereas A.4.2 results in an integer in the interval [1, n-1], this function
  1235. * ensures that the integer is in the range of [2, n-3]. We are slightly
  1236. * stricter because of the currently used scalar multiplication algorithm.
  1237. */
  1238. static int __ecc_is_key_valid(const struct ecc_curve *curve,
  1239. const u64 *private_key, unsigned int ndigits)
  1240. {
  1241. u64 one[ECC_MAX_DIGITS] = { 1, };
  1242. u64 res[ECC_MAX_DIGITS];
  1243. if (!private_key)
  1244. return -EINVAL;
  1245. if (curve->g.ndigits != ndigits)
  1246. return -EINVAL;
  1247. /* Make sure the private key is in the range [2, n-3]. */
  1248. if (vli_cmp(one, private_key, ndigits) != -1)
  1249. return -EINVAL;
  1250. vli_sub(res, curve->n, one, ndigits);
  1251. vli_sub(res, res, one, ndigits);
  1252. if (vli_cmp(res, private_key, ndigits) != 1)
  1253. return -EINVAL;
  1254. return 0;
  1255. }
  1256. int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
  1257. const u64 *private_key, unsigned int private_key_len)
  1258. {
  1259. int nbytes;
  1260. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  1261. nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
  1262. if (private_key_len != nbytes)
  1263. return -EINVAL;
  1264. return __ecc_is_key_valid(curve, private_key, ndigits);
  1265. }
  1266. EXPORT_SYMBOL(ecc_is_key_valid);
  1267. /*
  1268. * ECC private keys are generated using the method of rejection sampling,
  1269. * equivalent to that described in FIPS 186-5, Appendix A.2.2.
  1270. *
  1271. * This method generates a private key uniformly distributed in the range
  1272. * [2, n-3].
  1273. */
  1274. int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits,
  1275. u64 *private_key)
  1276. {
  1277. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  1278. unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
  1279. unsigned int nbits = vli_num_bits(curve->n, ndigits);
  1280. int err;
  1281. /*
  1282. * Step 1 & 2: check that N is included in Table 1 of FIPS 186-5,
  1283. * section 6.1.1.
  1284. */
  1285. if (nbits < 224)
  1286. return -EINVAL;
  1287. /*
  1288. * FIPS 186-5 recommends that the private key should be obtained from a
  1289. * RBG with a security strength equal to or greater than the security
  1290. * strength associated with N.
  1291. *
  1292. * The maximum security strength identified by NIST SP800-57pt1r4 for
  1293. * ECC is 256 (N >= 512).
  1294. *
  1295. * This condition is met by the default RNG because it selects a favored
  1296. * DRBG with a security strength of 256.
  1297. */
  1298. if (crypto_get_default_rng())
  1299. return -EFAULT;
  1300. /* Step 3: obtain N returned_bits from the DRBG. */
  1301. err = crypto_rng_get_bytes(crypto_default_rng,
  1302. (u8 *)private_key, nbytes);
  1303. crypto_put_default_rng();
  1304. if (err)
  1305. return err;
  1306. /* Step 4: make sure the private key is in the valid range. */
  1307. if (__ecc_is_key_valid(curve, private_key, ndigits))
  1308. return -EINVAL;
  1309. return 0;
  1310. }
  1311. EXPORT_SYMBOL(ecc_gen_privkey);
  1312. int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits,
  1313. const u64 *private_key, u64 *public_key)
  1314. {
  1315. int ret = 0;
  1316. struct ecc_point *pk;
  1317. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  1318. if (!private_key) {
  1319. ret = -EINVAL;
  1320. goto out;
  1321. }
  1322. pk = ecc_alloc_point(ndigits);
  1323. if (!pk) {
  1324. ret = -ENOMEM;
  1325. goto out;
  1326. }
  1327. ecc_point_mult(pk, &curve->g, private_key, NULL, curve, ndigits);
  1328. /* SP800-56A rev 3 5.6.2.1.3 key check */
  1329. if (ecc_is_pubkey_valid_full(curve, pk)) {
  1330. ret = -EAGAIN;
  1331. goto err_free_point;
  1332. }
  1333. ecc_swap_digits(pk->x, public_key, ndigits);
  1334. ecc_swap_digits(pk->y, &public_key[ndigits], ndigits);
  1335. err_free_point:
  1336. ecc_free_point(pk);
  1337. out:
  1338. return ret;
  1339. }
  1340. EXPORT_SYMBOL(ecc_make_pub_key);
  1341. /* SP800-56A section 5.6.2.3.4 partial verification: ephemeral keys only */
  1342. int ecc_is_pubkey_valid_partial(const struct ecc_curve *curve,
  1343. struct ecc_point *pk)
  1344. {
  1345. u64 yy[ECC_MAX_DIGITS], xxx[ECC_MAX_DIGITS], w[ECC_MAX_DIGITS];
  1346. if (WARN_ON(pk->ndigits != curve->g.ndigits))
  1347. return -EINVAL;
  1348. /* Check 1: Verify key is not the zero point. */
  1349. if (ecc_point_is_zero(pk))
  1350. return -EINVAL;
  1351. /* Check 2: Verify key is in the range [1, p-1]. */
  1352. if (vli_cmp(curve->p, pk->x, pk->ndigits) != 1)
  1353. return -EINVAL;
  1354. if (vli_cmp(curve->p, pk->y, pk->ndigits) != 1)
  1355. return -EINVAL;
  1356. /* Check 3: Verify that y^2 == (x^3 + a·x + b) mod p */
  1357. vli_mod_square_fast(yy, pk->y, curve); /* y^2 */
  1358. vli_mod_square_fast(xxx, pk->x, curve); /* x^2 */
  1359. vli_mod_mult_fast(xxx, xxx, pk->x, curve); /* x^3 */
  1360. vli_mod_mult_fast(w, curve->a, pk->x, curve); /* a·x */
  1361. vli_mod_add(w, w, curve->b, curve->p, pk->ndigits); /* a·x + b */
  1362. vli_mod_add(w, w, xxx, curve->p, pk->ndigits); /* x^3 + a·x + b */
  1363. if (vli_cmp(yy, w, pk->ndigits) != 0) /* Equation */
  1364. return -EINVAL;
  1365. return 0;
  1366. }
  1367. EXPORT_SYMBOL(ecc_is_pubkey_valid_partial);
  1368. /* SP800-56A section 5.6.2.3.3 full verification */
  1369. int ecc_is_pubkey_valid_full(const struct ecc_curve *curve,
  1370. struct ecc_point *pk)
  1371. {
  1372. struct ecc_point *nQ;
  1373. /* Checks 1 through 3 */
  1374. int ret = ecc_is_pubkey_valid_partial(curve, pk);
  1375. if (ret)
  1376. return ret;
  1377. /* Check 4: Verify that nQ is the zero point. */
  1378. nQ = ecc_alloc_point(pk->ndigits);
  1379. if (!nQ)
  1380. return -ENOMEM;
  1381. ecc_point_mult(nQ, pk, curve->n, NULL, curve, pk->ndigits);
  1382. if (!ecc_point_is_zero(nQ))
  1383. ret = -EINVAL;
  1384. ecc_free_point(nQ);
  1385. return ret;
  1386. }
  1387. EXPORT_SYMBOL(ecc_is_pubkey_valid_full);
  1388. int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
  1389. const u64 *private_key, const u64 *public_key,
  1390. u64 *secret)
  1391. {
  1392. int ret = 0;
  1393. struct ecc_point *product, *pk;
  1394. u64 rand_z[ECC_MAX_DIGITS];
  1395. unsigned int nbytes;
  1396. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  1397. if (!private_key || !public_key || ndigits > ARRAY_SIZE(rand_z)) {
  1398. ret = -EINVAL;
  1399. goto out;
  1400. }
  1401. nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
  1402. get_random_bytes(rand_z, nbytes);
  1403. pk = ecc_alloc_point(ndigits);
  1404. if (!pk) {
  1405. ret = -ENOMEM;
  1406. goto out;
  1407. }
  1408. ecc_swap_digits(public_key, pk->x, ndigits);
  1409. ecc_swap_digits(&public_key[ndigits], pk->y, ndigits);
  1410. ret = ecc_is_pubkey_valid_partial(curve, pk);
  1411. if (ret)
  1412. goto err_alloc_product;
  1413. product = ecc_alloc_point(ndigits);
  1414. if (!product) {
  1415. ret = -ENOMEM;
  1416. goto err_alloc_product;
  1417. }
  1418. ecc_point_mult(product, pk, private_key, rand_z, curve, ndigits);
  1419. if (ecc_point_is_zero(product)) {
  1420. ret = -EFAULT;
  1421. goto err_validity;
  1422. }
  1423. ecc_swap_digits(product->x, secret, ndigits);
  1424. err_validity:
  1425. memzero_explicit(rand_z, sizeof(rand_z));
  1426. ecc_free_point(product);
  1427. err_alloc_product:
  1428. ecc_free_point(pk);
  1429. out:
  1430. return ret;
  1431. }
  1432. EXPORT_SYMBOL(crypto_ecdh_shared_secret);
  1433. MODULE_DESCRIPTION("core elliptic curve module");
  1434. MODULE_LICENSE("Dual BSD/GPL");