dat.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * KVM guest address space mapping code
  4. *
  5. * Copyright IBM Corp. 2024, 2025
  6. * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
  7. */
  8. #ifndef __KVM_S390_DAT_H
  9. #define __KVM_S390_DAT_H
  10. #include <linux/radix-tree.h>
  11. #include <linux/refcount.h>
  12. #include <linux/io.h>
  13. #include <linux/kvm_types.h>
  14. #include <linux/pgalloc.h>
  15. #include <asm/tlbflush.h>
  16. #include <asm/dat-bits.h>
  17. /*
  18. * Base address and length must be sent at the start of each block, therefore
  19. * it's cheaper to send some clean data, as long as it's less than the size of
  20. * two longs.
  21. */
  22. #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
  23. /* For consistency */
  24. #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
  25. #define _ASCE(x) ((union asce) { .val = (x), })
  26. #define NULL_ASCE _ASCE(0)
  27. enum {
  28. _DAT_TOKEN_NONE = 0,
  29. _DAT_TOKEN_PIC,
  30. };
  31. #define _CRSTE_TOK(l, t, p) ((union crste) { \
  32. .tok.i = 1, \
  33. .tok.tt = (l), \
  34. .tok.type = (t), \
  35. .tok.par = (p) \
  36. })
  37. #define _CRSTE_PIC(l, p) _CRSTE_TOK(l, _DAT_TOKEN_PIC, p)
  38. #define _CRSTE_HOLE(l) _CRSTE_PIC(l, PGM_ADDRESSING)
  39. #define _CRSTE_EMPTY(l) _CRSTE_TOK(l, _DAT_TOKEN_NONE, 0)
  40. #define _PMD_EMPTY _CRSTE_EMPTY(TABLE_TYPE_SEGMENT)
  41. #define _PTE_TOK(t, p) ((union pte) { .tok.i = 1, .tok.type = (t), .tok.par = (p) })
  42. #define _PTE_EMPTY _PTE_TOK(_DAT_TOKEN_NONE, 0)
  43. /* This fake table type is used for page table walks (both for normal page tables and vSIE) */
  44. #define TABLE_TYPE_PAGE_TABLE -1
  45. enum dat_walk_flags {
  46. DAT_WALK_USES_SKEYS = 0x40,
  47. DAT_WALK_CONTINUE = 0x20,
  48. DAT_WALK_IGN_HOLES = 0x10,
  49. DAT_WALK_SPLIT = 0x08,
  50. DAT_WALK_ALLOC = 0x04,
  51. DAT_WALK_ANY = 0x02,
  52. DAT_WALK_LEAF = 0x01,
  53. DAT_WALK_DEFAULT = 0
  54. };
  55. #define DAT_WALK_SPLIT_ALLOC (DAT_WALK_SPLIT | DAT_WALK_ALLOC)
  56. #define DAT_WALK_ALLOC_CONTINUE (DAT_WALK_CONTINUE | DAT_WALK_ALLOC)
  57. #define DAT_WALK_LEAF_ALLOC (DAT_WALK_LEAF | DAT_WALK_ALLOC)
  58. union pte {
  59. unsigned long val;
  60. union page_table_entry h;
  61. struct {
  62. unsigned long :56; /* Hardware bits */
  63. unsigned long u : 1; /* Page unused */
  64. unsigned long s : 1; /* Special */
  65. unsigned long w : 1; /* Writable */
  66. unsigned long r : 1; /* Readable */
  67. unsigned long d : 1; /* Dirty */
  68. unsigned long y : 1; /* Young */
  69. unsigned long sd: 1; /* Soft dirty */
  70. unsigned long pr: 1; /* Present */
  71. } s;
  72. struct {
  73. unsigned char hwbytes[7];
  74. unsigned char swbyte;
  75. };
  76. union {
  77. struct {
  78. unsigned long type :16; /* Token type */
  79. unsigned long par :16; /* Token parameter */
  80. unsigned long :20;
  81. unsigned long : 1; /* Must be 0 */
  82. unsigned long i : 1; /* Must be 1 */
  83. unsigned long : 2;
  84. unsigned long : 7;
  85. unsigned long pr : 1; /* Must be 0 */
  86. };
  87. struct {
  88. unsigned long token:32; /* Token and parameter */
  89. unsigned long :32;
  90. };
  91. } tok;
  92. };
  93. /* Soft dirty, needed as macro for atomic operations on ptes */
  94. #define _PAGE_SD 0x002
  95. /* Needed as macro to perform atomic operations */
  96. #define PGSTE_PCL_BIT 0x0080000000000000UL /* PCL lock, HW bit */
  97. #define PGSTE_CMMA_D_BIT 0x0000000000008000UL /* CMMA dirty soft-bit */
  98. enum pgste_gps_usage {
  99. PGSTE_GPS_USAGE_STABLE = 0,
  100. PGSTE_GPS_USAGE_UNUSED,
  101. PGSTE_GPS_USAGE_POT_VOLATILE,
  102. PGSTE_GPS_USAGE_VOLATILE,
  103. };
  104. union pgste {
  105. unsigned long val;
  106. struct {
  107. unsigned long acc : 4;
  108. unsigned long fp : 1;
  109. unsigned long : 3;
  110. unsigned long pcl : 1;
  111. unsigned long hr : 1;
  112. unsigned long hc : 1;
  113. unsigned long : 2;
  114. unsigned long gr : 1;
  115. unsigned long gc : 1;
  116. unsigned long : 1;
  117. unsigned long :16; /* val16 */
  118. unsigned long zero : 1;
  119. unsigned long nodat : 1;
  120. unsigned long : 4;
  121. unsigned long usage : 2;
  122. unsigned long : 8;
  123. unsigned long cmma_d : 1; /* Dirty flag for CMMA bits */
  124. unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
  125. unsigned long vsie_notif : 1; /* Referenced in a shadow table */
  126. unsigned long : 5;
  127. unsigned long : 8;
  128. };
  129. struct {
  130. unsigned short hwbytes0;
  131. unsigned short val16; /* Used to store chunked values, see dat_{s,g}et_ptval() */
  132. unsigned short hwbytes4;
  133. unsigned char flags; /* Maps to the software bits */
  134. unsigned char hwbyte7;
  135. } __packed;
  136. };
  137. union pmd {
  138. unsigned long val;
  139. union segment_table_entry h;
  140. struct {
  141. struct {
  142. unsigned long :44; /* HW */
  143. unsigned long : 3; /* Unused */
  144. unsigned long : 1; /* HW */
  145. unsigned long s : 1; /* Special */
  146. unsigned long w : 1; /* Writable soft-bit */
  147. unsigned long r : 1; /* Readable soft-bit */
  148. unsigned long d : 1; /* Dirty */
  149. unsigned long y : 1; /* Young */
  150. unsigned long : 3; /* HW */
  151. unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
  152. unsigned long vsie_notif : 1; /* Referenced in a shadow table */
  153. unsigned long : 4; /* HW */
  154. unsigned long sd : 1; /* Soft-Dirty */
  155. unsigned long pr : 1; /* Present */
  156. } fc1;
  157. } s;
  158. };
  159. union pud {
  160. unsigned long val;
  161. union region3_table_entry h;
  162. struct {
  163. struct {
  164. unsigned long :33; /* HW */
  165. unsigned long :14; /* Unused */
  166. unsigned long : 1; /* HW */
  167. unsigned long s : 1; /* Special */
  168. unsigned long w : 1; /* Writable soft-bit */
  169. unsigned long r : 1; /* Readable soft-bit */
  170. unsigned long d : 1; /* Dirty */
  171. unsigned long y : 1; /* Young */
  172. unsigned long : 3; /* HW */
  173. unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
  174. unsigned long vsie_notif : 1; /* Referenced in a shadow table */
  175. unsigned long : 4; /* HW */
  176. unsigned long sd : 1; /* Soft-Dirty */
  177. unsigned long pr : 1; /* Present */
  178. } fc1;
  179. } s;
  180. };
  181. union p4d {
  182. unsigned long val;
  183. union region2_table_entry h;
  184. };
  185. union pgd {
  186. unsigned long val;
  187. union region1_table_entry h;
  188. };
  189. union crste {
  190. unsigned long val;
  191. union {
  192. struct {
  193. unsigned long :52;
  194. unsigned long : 1;
  195. unsigned long fc: 1;
  196. unsigned long p : 1;
  197. unsigned long : 1;
  198. unsigned long : 2;
  199. unsigned long i : 1;
  200. unsigned long : 1;
  201. unsigned long tt: 2;
  202. unsigned long : 2;
  203. };
  204. struct {
  205. unsigned long to:52;
  206. unsigned long : 1;
  207. unsigned long fc: 1;
  208. unsigned long p : 1;
  209. unsigned long : 1;
  210. unsigned long tf: 2;
  211. unsigned long i : 1;
  212. unsigned long : 1;
  213. unsigned long tt: 2;
  214. unsigned long tl: 2;
  215. } fc0;
  216. struct {
  217. unsigned long :47;
  218. unsigned long av : 1; /* ACCF-Validity Control */
  219. unsigned long acc: 4; /* Access-Control Bits */
  220. unsigned long f : 1; /* Fetch-Protection Bit */
  221. unsigned long fc : 1; /* Format-Control */
  222. unsigned long p : 1; /* DAT-Protection Bit */
  223. unsigned long iep: 1; /* Instruction-Execution-Protection */
  224. unsigned long : 2;
  225. unsigned long i : 1; /* Segment-Invalid Bit */
  226. unsigned long cs : 1; /* Common-Segment Bit */
  227. unsigned long tt : 2; /* Table-Type Bits */
  228. unsigned long : 2;
  229. } fc1;
  230. } h;
  231. struct {
  232. struct {
  233. unsigned long :47;
  234. unsigned long : 1; /* HW (should be 0) */
  235. unsigned long s : 1; /* Special */
  236. unsigned long w : 1; /* Writable */
  237. unsigned long r : 1; /* Readable */
  238. unsigned long d : 1; /* Dirty */
  239. unsigned long y : 1; /* Young */
  240. unsigned long : 3; /* HW */
  241. unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
  242. unsigned long vsie_notif : 1; /* Referenced in a shadow table */
  243. unsigned long : 4; /* HW */
  244. unsigned long sd : 1; /* Soft-Dirty */
  245. unsigned long pr : 1; /* Present */
  246. } fc1;
  247. } s;
  248. union {
  249. struct {
  250. unsigned long type :16; /* Token type */
  251. unsigned long par :16; /* Token parameter */
  252. unsigned long :26;
  253. unsigned long i : 1; /* Must be 1 */
  254. unsigned long : 1;
  255. unsigned long tt : 2;
  256. unsigned long : 1;
  257. unsigned long pr : 1; /* Must be 0 */
  258. };
  259. struct {
  260. unsigned long token:32; /* Token and parameter */
  261. unsigned long :32;
  262. };
  263. } tok;
  264. union pmd pmd;
  265. union pud pud;
  266. union p4d p4d;
  267. union pgd pgd;
  268. };
  269. union skey {
  270. unsigned char skey;
  271. struct {
  272. unsigned char acc :4;
  273. unsigned char fp :1;
  274. unsigned char r :1;
  275. unsigned char c :1;
  276. unsigned char zero:1;
  277. };
  278. };
  279. static_assert(sizeof(union pgste) == sizeof(unsigned long));
  280. static_assert(sizeof(union pte) == sizeof(unsigned long));
  281. static_assert(sizeof(union pmd) == sizeof(unsigned long));
  282. static_assert(sizeof(union pud) == sizeof(unsigned long));
  283. static_assert(sizeof(union p4d) == sizeof(unsigned long));
  284. static_assert(sizeof(union pgd) == sizeof(unsigned long));
  285. static_assert(sizeof(union crste) == sizeof(unsigned long));
  286. static_assert(sizeof(union skey) == sizeof(char));
  287. struct segment_table {
  288. union pmd pmds[_CRST_ENTRIES];
  289. };
  290. struct region3_table {
  291. union pud puds[_CRST_ENTRIES];
  292. };
  293. struct region2_table {
  294. union p4d p4ds[_CRST_ENTRIES];
  295. };
  296. struct region1_table {
  297. union pgd pgds[_CRST_ENTRIES];
  298. };
  299. struct crst_table {
  300. union {
  301. union crste crstes[_CRST_ENTRIES];
  302. struct segment_table segment;
  303. struct region3_table region3;
  304. struct region2_table region2;
  305. struct region1_table region1;
  306. };
  307. };
  308. struct page_table {
  309. union pte ptes[_PAGE_ENTRIES];
  310. union pgste pgstes[_PAGE_ENTRIES];
  311. };
  312. static_assert(sizeof(struct crst_table) == _CRST_TABLE_SIZE);
  313. static_assert(sizeof(struct page_table) == PAGE_SIZE);
  314. struct dat_walk;
  315. typedef long (*dat_walk_op)(union crste *crste, gfn_t gfn, gfn_t next, struct dat_walk *w);
  316. struct dat_walk_ops {
  317. union {
  318. dat_walk_op crste_ops[4];
  319. struct {
  320. dat_walk_op pmd_entry;
  321. dat_walk_op pud_entry;
  322. dat_walk_op p4d_entry;
  323. dat_walk_op pgd_entry;
  324. };
  325. };
  326. long (*pte_entry)(union pte *pte, gfn_t gfn, gfn_t next, struct dat_walk *w);
  327. };
  328. struct dat_walk {
  329. const struct dat_walk_ops *ops;
  330. union crste *last;
  331. union pte *last_pte;
  332. union asce asce;
  333. gfn_t start;
  334. gfn_t end;
  335. int flags;
  336. void *priv;
  337. };
  338. struct ptval_param {
  339. unsigned char offset : 6;
  340. unsigned char len : 2;
  341. };
  342. /**
  343. * _pte() - Useful constructor for union pte
  344. * @pfn: the pfn this pte should point to.
  345. * @writable: whether the pte should be writable.
  346. * @dirty: whether the pte should be dirty.
  347. * @special: whether the pte should be marked as special
  348. *
  349. * The pte is also marked as young and present. If the pte is marked as dirty,
  350. * it gets marked as soft-dirty too. If the pte is not dirty, the hardware
  351. * protect bit is set (independently of the write softbit); this way proper
  352. * dirty tracking can be performed.
  353. *
  354. * Return: a union pte value.
  355. */
  356. static inline union pte _pte(kvm_pfn_t pfn, bool writable, bool dirty, bool special)
  357. {
  358. union pte res = { .val = PFN_PHYS(pfn) };
  359. res.h.p = !dirty;
  360. res.s.y = 1;
  361. res.s.pr = 1;
  362. res.s.w = writable;
  363. res.s.d = dirty;
  364. res.s.sd = dirty;
  365. res.s.s = special;
  366. return res;
  367. }
  368. static inline union crste _crste_fc0(kvm_pfn_t pfn, int tt)
  369. {
  370. union crste res = { .val = PFN_PHYS(pfn) };
  371. res.h.tt = tt;
  372. res.h.fc0.tl = _REGION_ENTRY_LENGTH;
  373. res.h.fc0.tf = 0;
  374. return res;
  375. }
  376. /**
  377. * _crste() - Useful constructor for union crste with FC=1
  378. * @pfn: the pfn this pte should point to.
  379. * @tt: the table type
  380. * @writable: whether the pte should be writable.
  381. * @dirty: whether the pte should be dirty.
  382. *
  383. * The crste is also marked as young and present. If the crste is marked as
  384. * dirty, it gets marked as soft-dirty too. If the crste is not dirty, the
  385. * hardware protect bit is set (independently of the write softbit); this way
  386. * proper dirty tracking can be performed.
  387. *
  388. * Return: a union crste value.
  389. */
  390. static inline union crste _crste_fc1(kvm_pfn_t pfn, int tt, bool writable, bool dirty)
  391. {
  392. union crste res = { .val = PFN_PHYS(pfn) & _SEGMENT_MASK };
  393. res.h.tt = tt;
  394. res.h.p = !dirty;
  395. res.h.fc = 1;
  396. res.s.fc1.y = 1;
  397. res.s.fc1.pr = 1;
  398. res.s.fc1.w = writable;
  399. res.s.fc1.d = dirty;
  400. res.s.fc1.sd = dirty;
  401. return res;
  402. }
  403. union essa_state {
  404. unsigned char val;
  405. struct {
  406. unsigned char : 2;
  407. unsigned char nodat : 1;
  408. unsigned char exception : 1;
  409. unsigned char usage : 2;
  410. unsigned char content : 2;
  411. };
  412. };
  413. /**
  414. * struct vsie_rmap - reverse mapping for shadow page table entries
  415. * @next: pointer to next rmap in the list
  416. * @r_gfn: virtual rmap address in the shadow guest address space
  417. */
  418. struct vsie_rmap {
  419. struct vsie_rmap *next;
  420. union {
  421. unsigned long val;
  422. struct {
  423. long level: 8;
  424. unsigned long : 4;
  425. unsigned long r_gfn:52;
  426. };
  427. };
  428. };
  429. static_assert(sizeof(struct vsie_rmap) == 2 * sizeof(long));
  430. #define KVM_S390_MMU_CACHE_N_CRSTS 6
  431. #define KVM_S390_MMU_CACHE_N_PTS 2
  432. #define KVM_S390_MMU_CACHE_N_RMAPS 16
  433. struct kvm_s390_mmu_cache {
  434. void *crsts[KVM_S390_MMU_CACHE_N_CRSTS];
  435. void *pts[KVM_S390_MMU_CACHE_N_PTS];
  436. void *rmaps[KVM_S390_MMU_CACHE_N_RMAPS];
  437. short int n_crsts;
  438. short int n_pts;
  439. short int n_rmaps;
  440. };
  441. struct guest_fault {
  442. gfn_t gfn; /* Guest frame */
  443. kvm_pfn_t pfn; /* Host PFN */
  444. struct page *page; /* Host page */
  445. union pte *ptep; /* Used to resolve the fault, or NULL */
  446. union crste *crstep; /* Used to resolve the fault, or NULL */
  447. bool writable; /* Mapping is writable */
  448. bool write_attempt; /* Write access attempted */
  449. bool attempt_pfault; /* Attempt a pfault first */
  450. bool valid; /* This entry contains valid data */
  451. void (*callback)(struct guest_fault *f);
  452. void *priv;
  453. };
  454. /*
  455. * 0 1 2 3 4 5 6 7
  456. * +-------+-------+-------+-------+-------+-------+-------+-------+
  457. * 0 | | PGT_ADDR |
  458. * 8 | VMADDR | |
  459. * 16 | |
  460. * 24 | |
  461. */
  462. #define MKPTVAL(o, l) ((struct ptval_param) { .offset = (o), .len = ((l) + 1) / 2 - 1})
  463. #define PTVAL_PGT_ADDR MKPTVAL(4, 8)
  464. #define PTVAL_VMADDR MKPTVAL(8, 6)
  465. union pgste __must_check __dat_ptep_xchg(union pte *ptep, union pgste pgste, union pte new,
  466. gfn_t gfn, union asce asce, bool uses_skeys);
  467. bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn,
  468. union asce asce);
  469. void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce);
  470. long _dat_walk_gfn_range(gfn_t start, gfn_t end, union asce asce,
  471. const struct dat_walk_ops *ops, int flags, void *priv);
  472. int dat_entry_walk(struct kvm_s390_mmu_cache *mc, gfn_t gfn, union asce asce, int flags,
  473. int walk_level, union crste **last, union pte **ptepp);
  474. void dat_free_level(struct crst_table *table, bool owns_ptes);
  475. struct crst_table *dat_alloc_crst_sleepable(unsigned long init);
  476. int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newtype);
  477. int dat_get_storage_key(union asce asce, gfn_t gfn, union skey *skey);
  478. int dat_set_storage_key(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
  479. union skey skey, bool nq);
  480. int dat_cond_set_storage_key(struct kvm_s390_mmu_cache *mmc, union asce asce, gfn_t gfn,
  481. union skey skey, union skey *oldkey, bool nq, bool mr, bool mc);
  482. int dat_reset_reference_bit(union asce asce, gfn_t gfn);
  483. long dat_reset_skeys(union asce asce, gfn_t start);
  484. unsigned long dat_get_ptval(struct page_table *table, struct ptval_param param);
  485. void dat_set_ptval(struct page_table *table, struct ptval_param param, unsigned long val);
  486. int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gfn_t end,
  487. u16 type, u16 param);
  488. int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn);
  489. bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end);
  490. int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty);
  491. long dat_reset_cmma(union asce asce, gfn_t start_gfn);
  492. int dat_peek_cmma(gfn_t start, union asce asce, unsigned int *count, u8 *values);
  493. int dat_get_cmma(union asce asce, gfn_t *start, unsigned int *count, u8 *values, atomic64_t *rem);
  494. int dat_set_cmma_bits(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
  495. unsigned long count, unsigned long mask, const uint8_t *bits);
  496. int kvm_s390_mmu_cache_topup(struct kvm_s390_mmu_cache *mc);
  497. #define GFP_KVM_S390_MMU_CACHE (GFP_ATOMIC | __GFP_ACCOUNT | __GFP_NOWARN)
  498. static inline struct page_table *kvm_s390_mmu_cache_alloc_pt(struct kvm_s390_mmu_cache *mc)
  499. {
  500. if (mc->n_pts)
  501. return mc->pts[--mc->n_pts];
  502. return (void *)__get_free_page(GFP_KVM_S390_MMU_CACHE);
  503. }
  504. static inline struct crst_table *kvm_s390_mmu_cache_alloc_crst(struct kvm_s390_mmu_cache *mc)
  505. {
  506. if (mc->n_crsts)
  507. return mc->crsts[--mc->n_crsts];
  508. return (void *)__get_free_pages(GFP_KVM_S390_MMU_CACHE | __GFP_COMP, CRST_ALLOC_ORDER);
  509. }
  510. static inline struct vsie_rmap *kvm_s390_mmu_cache_alloc_rmap(struct kvm_s390_mmu_cache *mc)
  511. {
  512. if (mc->n_rmaps)
  513. return mc->rmaps[--mc->n_rmaps];
  514. return kzalloc_obj(struct vsie_rmap, GFP_KVM_S390_MMU_CACHE);
  515. }
  516. static inline struct crst_table *crste_table_start(union crste *crstep)
  517. {
  518. return (struct crst_table *)ALIGN_DOWN((unsigned long)crstep, _CRST_TABLE_SIZE);
  519. }
  520. static inline struct page_table *pte_table_start(union pte *ptep)
  521. {
  522. return (struct page_table *)ALIGN_DOWN((unsigned long)ptep, _PAGE_TABLE_SIZE);
  523. }
  524. static inline bool crdte_crste(union crste *crstep, union crste old, union crste new, gfn_t gfn,
  525. union asce asce)
  526. {
  527. unsigned long dtt = 0x10 | new.h.tt << 2;
  528. void *table = crste_table_start(crstep);
  529. return crdte(old.val, new.val, table, dtt, gfn_to_gpa(gfn), asce.val);
  530. }
  531. /**
  532. * idte_crste() - invalidate a crste entry using idte
  533. * @crstep: pointer to the crste to be invalidated
  534. * @gfn: a gfn mapped by the crste
  535. * @opt: options for the idte instruction
  536. * @asce: the asce
  537. * @local: whether the operation is cpu-local
  538. */
  539. static __always_inline void idte_crste(union crste *crstep, gfn_t gfn, unsigned long opt,
  540. union asce asce, int local)
  541. {
  542. unsigned long table_origin = __pa(crste_table_start(crstep));
  543. unsigned long gaddr = gfn_to_gpa(gfn) & HPAGE_MASK;
  544. if (__builtin_constant_p(opt) && opt == 0) {
  545. /* flush without guest asce */
  546. asm volatile("idte %[table_origin],0,%[gaddr],%[local]"
  547. : "+m" (*crstep)
  548. : [table_origin] "a" (table_origin), [gaddr] "a" (gaddr),
  549. [local] "i" (local)
  550. : "cc");
  551. } else {
  552. /* flush with guest asce */
  553. asm volatile("idte %[table_origin],%[asce],%[gaddr_opt],%[local]"
  554. : "+m" (*crstep)
  555. : [table_origin] "a" (table_origin), [gaddr_opt] "a" (gaddr | opt),
  556. [asce] "a" (asce.val), [local] "i" (local)
  557. : "cc");
  558. }
  559. }
  560. static inline void dat_init_pgstes(struct page_table *pt, unsigned long val)
  561. {
  562. memset64((void *)pt->pgstes, val, PTRS_PER_PTE);
  563. }
  564. static inline void dat_init_page_table(struct page_table *pt, unsigned long ptes,
  565. unsigned long pgstes)
  566. {
  567. memset64((void *)pt->ptes, ptes, PTRS_PER_PTE);
  568. dat_init_pgstes(pt, pgstes);
  569. }
  570. static inline gfn_t asce_end(union asce asce)
  571. {
  572. return 1ULL << ((asce.dt + 1) * 11 + _SEGMENT_SHIFT - PAGE_SHIFT);
  573. }
  574. #define _CRSTE(x) ((union crste) { .val = _Generic((x), \
  575. union pgd : (x).val, \
  576. union p4d : (x).val, \
  577. union pud : (x).val, \
  578. union pmd : (x).val, \
  579. union crste : (x).val)})
  580. #define _CRSTEP(x) ((union crste *)_Generic((*(x)), \
  581. union pgd : (x), \
  582. union p4d : (x), \
  583. union pud : (x), \
  584. union pmd : (x), \
  585. union crste : (x)))
  586. #define _CRSTP(x) ((struct crst_table *)_Generic((*(x)), \
  587. struct crst_table : (x), \
  588. struct segment_table : (x), \
  589. struct region3_table : (x), \
  590. struct region2_table : (x), \
  591. struct region1_table : (x)))
  592. static inline bool asce_contains_gfn(union asce asce, gfn_t gfn)
  593. {
  594. return gfn < asce_end(asce);
  595. }
  596. static inline bool is_pmd(union crste crste)
  597. {
  598. return crste.h.tt == TABLE_TYPE_SEGMENT;
  599. }
  600. static inline bool is_pud(union crste crste)
  601. {
  602. return crste.h.tt == TABLE_TYPE_REGION3;
  603. }
  604. static inline bool is_p4d(union crste crste)
  605. {
  606. return crste.h.tt == TABLE_TYPE_REGION2;
  607. }
  608. static inline bool is_pgd(union crste crste)
  609. {
  610. return crste.h.tt == TABLE_TYPE_REGION1;
  611. }
  612. static inline phys_addr_t pmd_origin_large(union pmd pmd)
  613. {
  614. return pmd.val & _SEGMENT_ENTRY_ORIGIN_LARGE;
  615. }
  616. static inline phys_addr_t pud_origin_large(union pud pud)
  617. {
  618. return pud.val & _REGION3_ENTRY_ORIGIN_LARGE;
  619. }
  620. /**
  621. * crste_origin_large() - Return the large frame origin of a large crste
  622. * @crste: The crste whose origin is to be returned. Should be either a
  623. * region-3 table entry or a segment table entry, in both cases with
  624. * FC set to 1 (large pages).
  625. *
  626. * Return: The origin of the large frame pointed to by @crste, or -1 if the
  627. * crste was not large (wrong table type, or FC==0)
  628. */
  629. static inline phys_addr_t crste_origin_large(union crste crste)
  630. {
  631. if (unlikely(!crste.h.fc || crste.h.tt > TABLE_TYPE_REGION3))
  632. return -1;
  633. if (is_pmd(crste))
  634. return pmd_origin_large(crste.pmd);
  635. return pud_origin_large(crste.pud);
  636. }
  637. #define crste_origin(x) (_Generic((x), \
  638. union pmd : (x).val & _SEGMENT_ENTRY_ORIGIN, \
  639. union pud : (x).val & _REGION_ENTRY_ORIGIN, \
  640. union p4d : (x).val & _REGION_ENTRY_ORIGIN, \
  641. union pgd : (x).val & _REGION_ENTRY_ORIGIN))
  642. static inline unsigned long pte_origin(union pte pte)
  643. {
  644. return pte.val & PAGE_MASK;
  645. }
  646. static inline bool pmd_prefix(union pmd pmd)
  647. {
  648. return pmd.h.fc && pmd.s.fc1.prefix_notif;
  649. }
  650. static inline bool pud_prefix(union pud pud)
  651. {
  652. return pud.h.fc && pud.s.fc1.prefix_notif;
  653. }
  654. static inline bool crste_leaf(union crste crste)
  655. {
  656. return (crste.h.tt <= TABLE_TYPE_REGION3) && crste.h.fc;
  657. }
  658. static inline bool crste_prefix(union crste crste)
  659. {
  660. return crste_leaf(crste) && crste.s.fc1.prefix_notif;
  661. }
  662. static inline bool crste_dirty(union crste crste)
  663. {
  664. return crste_leaf(crste) && crste.s.fc1.d;
  665. }
  666. static inline union pgste *pgste_of(union pte *pte)
  667. {
  668. return (union pgste *)(pte + _PAGE_ENTRIES);
  669. }
  670. static inline bool pte_hole(union pte pte)
  671. {
  672. return pte.h.i && !pte.tok.pr && pte.tok.type != _DAT_TOKEN_NONE;
  673. }
  674. static inline bool _crste_hole(union crste crste)
  675. {
  676. return crste.h.i && !crste.tok.pr && crste.tok.type != _DAT_TOKEN_NONE;
  677. }
  678. #define crste_hole(x) _crste_hole(_CRSTE(x))
  679. static inline bool _crste_none(union crste crste)
  680. {
  681. return crste.h.i && !crste.tok.pr && crste.tok.type == _DAT_TOKEN_NONE;
  682. }
  683. #define crste_none(x) _crste_none(_CRSTE(x))
  684. static inline phys_addr_t large_pud_to_phys(union pud pud, gfn_t gfn)
  685. {
  686. return pud_origin_large(pud) | (gfn_to_gpa(gfn) & ~_REGION3_MASK);
  687. }
  688. static inline phys_addr_t large_pmd_to_phys(union pmd pmd, gfn_t gfn)
  689. {
  690. return pmd_origin_large(pmd) | (gfn_to_gpa(gfn) & ~_SEGMENT_MASK);
  691. }
  692. static inline phys_addr_t large_crste_to_phys(union crste crste, gfn_t gfn)
  693. {
  694. if (unlikely(!crste.h.fc || crste.h.tt > TABLE_TYPE_REGION3))
  695. return -1;
  696. if (is_pmd(crste))
  697. return large_pmd_to_phys(crste.pmd, gfn);
  698. return large_pud_to_phys(crste.pud, gfn);
  699. }
  700. static inline bool cspg_crste(union crste *crstep, union crste old, union crste new)
  701. {
  702. return cspg(&crstep->val, old.val, new.val);
  703. }
  704. static inline struct page_table *dereference_pmd(union pmd pmd)
  705. {
  706. return phys_to_virt(crste_origin(pmd));
  707. }
  708. static inline struct segment_table *dereference_pud(union pud pud)
  709. {
  710. return phys_to_virt(crste_origin(pud));
  711. }
  712. static inline struct region3_table *dereference_p4d(union p4d p4d)
  713. {
  714. return phys_to_virt(crste_origin(p4d));
  715. }
  716. static inline struct region2_table *dereference_pgd(union pgd pgd)
  717. {
  718. return phys_to_virt(crste_origin(pgd));
  719. }
  720. static inline struct crst_table *_dereference_crste(union crste crste)
  721. {
  722. if (unlikely(is_pmd(crste)))
  723. return NULL;
  724. return phys_to_virt(crste_origin(crste.pud));
  725. }
  726. #define dereference_crste(x) (_Generic((x), \
  727. union pud : _dereference_crste(_CRSTE(x)), \
  728. union p4d : _dereference_crste(_CRSTE(x)), \
  729. union pgd : _dereference_crste(_CRSTE(x)), \
  730. union crste : _dereference_crste(_CRSTE(x))))
  731. static inline struct crst_table *dereference_asce(union asce asce)
  732. {
  733. return phys_to_virt(asce.val & _ASCE_ORIGIN);
  734. }
  735. static inline void asce_flush_tlb(union asce asce)
  736. {
  737. __tlb_flush_idte(asce.val);
  738. }
  739. static inline bool pgste_get_trylock(union pte *ptep, union pgste *res)
  740. {
  741. union pgste *pgstep = pgste_of(ptep);
  742. union pgste old_pgste;
  743. if (READ_ONCE(pgstep->val) & PGSTE_PCL_BIT)
  744. return false;
  745. old_pgste.val = __atomic64_or_barrier(PGSTE_PCL_BIT, &pgstep->val);
  746. if (old_pgste.pcl)
  747. return false;
  748. old_pgste.pcl = 1;
  749. *res = old_pgste;
  750. return true;
  751. }
  752. static inline union pgste pgste_get_lock(union pte *ptep)
  753. {
  754. union pgste res;
  755. while (!pgste_get_trylock(ptep, &res))
  756. cpu_relax();
  757. return res;
  758. }
  759. static inline void pgste_set_unlock(union pte *ptep, union pgste pgste)
  760. {
  761. pgste.pcl = 0;
  762. barrier();
  763. WRITE_ONCE(*pgste_of(ptep), pgste);
  764. }
  765. static inline void dat_ptep_xchg(union pte *ptep, union pte new, gfn_t gfn, union asce asce,
  766. bool has_skeys)
  767. {
  768. union pgste pgste;
  769. pgste = pgste_get_lock(ptep);
  770. pgste = __dat_ptep_xchg(ptep, pgste, new, gfn, asce, has_skeys);
  771. pgste_set_unlock(ptep, pgste);
  772. }
  773. static inline void dat_ptep_clear(union pte *ptep, gfn_t gfn, union asce asce, bool has_skeys)
  774. {
  775. dat_ptep_xchg(ptep, _PTE_EMPTY, gfn, asce, has_skeys);
  776. }
  777. static inline void dat_free_pt(struct page_table *pt)
  778. {
  779. free_page((unsigned long)pt);
  780. }
  781. static inline void _dat_free_crst(struct crst_table *table)
  782. {
  783. free_pages((unsigned long)table, CRST_ALLOC_ORDER);
  784. }
  785. #define dat_free_crst(x) _dat_free_crst(_CRSTP(x))
  786. static inline void kvm_s390_free_mmu_cache(struct kvm_s390_mmu_cache *mc)
  787. {
  788. if (!mc)
  789. return;
  790. while (mc->n_pts)
  791. dat_free_pt(mc->pts[--mc->n_pts]);
  792. while (mc->n_crsts)
  793. _dat_free_crst(mc->crsts[--mc->n_crsts]);
  794. while (mc->n_rmaps)
  795. kfree(mc->rmaps[--mc->n_rmaps]);
  796. kfree(mc);
  797. }
  798. DEFINE_FREE(kvm_s390_mmu_cache, struct kvm_s390_mmu_cache *, if (_T) kvm_s390_free_mmu_cache(_T))
  799. static inline struct kvm_s390_mmu_cache *kvm_s390_new_mmu_cache(void)
  800. {
  801. struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL;
  802. mc = kzalloc_obj(*mc, GFP_KERNEL_ACCOUNT);
  803. if (mc && !kvm_s390_mmu_cache_topup(mc))
  804. return_ptr(mc);
  805. return NULL;
  806. }
  807. static inline bool dat_pmdp_xchg_atomic(union pmd *pmdp, union pmd old, union pmd new,
  808. gfn_t gfn, union asce asce)
  809. {
  810. return dat_crstep_xchg_atomic(_CRSTEP(pmdp), _CRSTE(old), _CRSTE(new), gfn, asce);
  811. }
  812. static inline bool dat_pudp_xchg_atomic(union pud *pudp, union pud old, union pud new,
  813. gfn_t gfn, union asce asce)
  814. {
  815. return dat_crstep_xchg_atomic(_CRSTEP(pudp), _CRSTE(old), _CRSTE(new), gfn, asce);
  816. }
  817. static inline union crste dat_crstep_clear_atomic(union crste *crstep, gfn_t gfn, union asce asce)
  818. {
  819. union crste oldcrste, empty = _CRSTE_EMPTY(crstep->h.tt);
  820. do {
  821. oldcrste = READ_ONCE(*crstep);
  822. } while (!dat_crstep_xchg_atomic(crstep, oldcrste, empty, gfn, asce));
  823. return oldcrste;
  824. }
  825. static inline int get_level(union crste *crstep, union pte *ptep)
  826. {
  827. return ptep ? TABLE_TYPE_PAGE_TABLE : crstep->h.tt;
  828. }
  829. static inline int dat_delete_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start,
  830. unsigned long npages)
  831. {
  832. return dat_set_slot(mc, asce, start, start + npages, _DAT_TOKEN_PIC, PGM_ADDRESSING);
  833. }
  834. static inline int dat_create_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start,
  835. unsigned long npages)
  836. {
  837. return dat_set_slot(mc, asce, start, start + npages, _DAT_TOKEN_NONE, 0);
  838. }
  839. static inline bool crste_is_ucas(union crste crste)
  840. {
  841. return is_pmd(crste) && crste.h.i && crste.h.fc0.tl == 1 && crste.h.fc == 0;
  842. }
  843. #endif /* __KVM_S390_DAT_H */