gaccess.c 46 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * guest access functions
  4. *
  5. * Copyright IBM Corp. 2014
  6. *
  7. */
  8. #include <linux/vmalloc.h>
  9. #include <linux/mm_types.h>
  10. #include <linux/err.h>
  11. #include <linux/pgtable.h>
  12. #include <linux/bitfield.h>
  13. #include <linux/kvm_host.h>
  14. #include <linux/kvm_types.h>
  15. #include <asm/diag.h>
  16. #include <asm/access-regs.h>
  17. #include <asm/fault.h>
  18. #include <asm/dat-bits.h>
  19. #include "kvm-s390.h"
  20. #include "dat.h"
  21. #include "gmap.h"
  22. #include "gaccess.h"
  23. #include "faultin.h"
  24. #define GMAP_SHADOW_FAKE_TABLE 1ULL
  25. union dat_table_entry {
  26. unsigned long val;
  27. union region1_table_entry pgd;
  28. union region2_table_entry p4d;
  29. union region3_table_entry pud;
  30. union segment_table_entry pmd;
  31. union page_table_entry pte;
  32. };
  33. #define WALK_N_ENTRIES 7
  34. #define LEVEL_MEM -2
  35. struct pgtwalk {
  36. struct guest_fault raw_entries[WALK_N_ENTRIES];
  37. gpa_t last_addr;
  38. int level;
  39. bool p;
  40. };
  41. static inline struct guest_fault *get_entries(struct pgtwalk *w)
  42. {
  43. return w->raw_entries - LEVEL_MEM;
  44. }
  45. /*
  46. * raddress union which will contain the result (real or absolute address)
  47. * after a page table walk. The rfaa, sfaa and pfra members are used to
  48. * simply assign them the value of a region, segment or page table entry.
  49. */
  50. union raddress {
  51. unsigned long addr;
  52. unsigned long rfaa : 33; /* Region-Frame Absolute Address */
  53. unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
  54. unsigned long pfra : 52; /* Page-Frame Real Address */
  55. };
  56. union alet {
  57. u32 val;
  58. struct {
  59. u32 reserved : 7;
  60. u32 p : 1;
  61. u32 alesn : 8;
  62. u32 alen : 16;
  63. };
  64. };
  65. union ald {
  66. u32 val;
  67. struct {
  68. u32 : 1;
  69. u32 alo : 24;
  70. u32 all : 7;
  71. };
  72. };
  73. struct ale {
  74. unsigned long i : 1; /* ALEN-Invalid Bit */
  75. unsigned long : 5;
  76. unsigned long fo : 1; /* Fetch-Only Bit */
  77. unsigned long p : 1; /* Private Bit */
  78. unsigned long alesn : 8; /* Access-List-Entry Sequence Number */
  79. unsigned long aleax : 16; /* Access-List-Entry Authorization Index */
  80. unsigned long : 32;
  81. unsigned long : 1;
  82. unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */
  83. unsigned long : 6;
  84. unsigned long astesn : 32; /* ASTE Sequence Number */
  85. };
  86. struct aste {
  87. unsigned long i : 1; /* ASX-Invalid Bit */
  88. unsigned long ato : 29; /* Authority-Table Origin */
  89. unsigned long : 1;
  90. unsigned long b : 1; /* Base-Space Bit */
  91. unsigned long ax : 16; /* Authorization Index */
  92. unsigned long atl : 12; /* Authority-Table Length */
  93. unsigned long : 2;
  94. unsigned long ca : 1; /* Controlled-ASN Bit */
  95. unsigned long ra : 1; /* Reusable-ASN Bit */
  96. unsigned long asce : 64; /* Address-Space-Control Element */
  97. unsigned long ald : 32;
  98. unsigned long astesn : 32;
  99. /* .. more fields there */
  100. };
  101. union oac {
  102. unsigned int val;
  103. struct {
  104. struct {
  105. unsigned short key : 4;
  106. unsigned short : 4;
  107. unsigned short as : 2;
  108. unsigned short : 4;
  109. unsigned short k : 1;
  110. unsigned short a : 1;
  111. } oac1;
  112. struct {
  113. unsigned short key : 4;
  114. unsigned short : 4;
  115. unsigned short as : 2;
  116. unsigned short : 4;
  117. unsigned short k : 1;
  118. unsigned short a : 1;
  119. } oac2;
  120. };
  121. };
  122. int ipte_lock_held(struct kvm *kvm)
  123. {
  124. if (sclp.has_siif)
  125. return kvm->arch.sca->ipte_control.kh != 0;
  126. return kvm->arch.ipte_lock_count != 0;
  127. }
  128. static void ipte_lock_simple(struct kvm *kvm)
  129. {
  130. union ipte_control old, new, *ic;
  131. mutex_lock(&kvm->arch.ipte_mutex);
  132. kvm->arch.ipte_lock_count++;
  133. if (kvm->arch.ipte_lock_count > 1)
  134. goto out;
  135. retry:
  136. ic = &kvm->arch.sca->ipte_control;
  137. old = READ_ONCE(*ic);
  138. do {
  139. if (old.k) {
  140. cond_resched();
  141. goto retry;
  142. }
  143. new = old;
  144. new.k = 1;
  145. } while (!try_cmpxchg(&ic->val, &old.val, new.val));
  146. out:
  147. mutex_unlock(&kvm->arch.ipte_mutex);
  148. }
  149. static void ipte_unlock_simple(struct kvm *kvm)
  150. {
  151. union ipte_control old, new, *ic;
  152. mutex_lock(&kvm->arch.ipte_mutex);
  153. kvm->arch.ipte_lock_count--;
  154. if (kvm->arch.ipte_lock_count)
  155. goto out;
  156. ic = &kvm->arch.sca->ipte_control;
  157. old = READ_ONCE(*ic);
  158. do {
  159. new = old;
  160. new.k = 0;
  161. } while (!try_cmpxchg(&ic->val, &old.val, new.val));
  162. wake_up(&kvm->arch.ipte_wq);
  163. out:
  164. mutex_unlock(&kvm->arch.ipte_mutex);
  165. }
  166. static void ipte_lock_siif(struct kvm *kvm)
  167. {
  168. union ipte_control old, new, *ic;
  169. retry:
  170. ic = &kvm->arch.sca->ipte_control;
  171. old = READ_ONCE(*ic);
  172. do {
  173. if (old.kg) {
  174. cond_resched();
  175. goto retry;
  176. }
  177. new = old;
  178. new.k = 1;
  179. new.kh++;
  180. } while (!try_cmpxchg(&ic->val, &old.val, new.val));
  181. }
  182. static void ipte_unlock_siif(struct kvm *kvm)
  183. {
  184. union ipte_control old, new, *ic;
  185. ic = &kvm->arch.sca->ipte_control;
  186. old = READ_ONCE(*ic);
  187. do {
  188. new = old;
  189. new.kh--;
  190. if (!new.kh)
  191. new.k = 0;
  192. } while (!try_cmpxchg(&ic->val, &old.val, new.val));
  193. if (!new.kh)
  194. wake_up(&kvm->arch.ipte_wq);
  195. }
  196. void ipte_lock(struct kvm *kvm)
  197. {
  198. if (sclp.has_siif)
  199. ipte_lock_siif(kvm);
  200. else
  201. ipte_lock_simple(kvm);
  202. }
  203. void ipte_unlock(struct kvm *kvm)
  204. {
  205. if (sclp.has_siif)
  206. ipte_unlock_siif(kvm);
  207. else
  208. ipte_unlock_simple(kvm);
  209. }
  210. static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
  211. enum gacc_mode mode)
  212. {
  213. union alet alet;
  214. struct ale ale;
  215. struct aste aste;
  216. unsigned long ald_addr, authority_table_addr;
  217. union ald ald;
  218. int eax, rc;
  219. u8 authority_table;
  220. if (ar >= NUM_ACRS)
  221. return -EINVAL;
  222. if (vcpu->arch.acrs_loaded)
  223. save_access_regs(vcpu->run->s.regs.acrs);
  224. alet.val = vcpu->run->s.regs.acrs[ar];
  225. if (ar == 0 || alet.val == 0) {
  226. asce->val = vcpu->arch.sie_block->gcr[1];
  227. return 0;
  228. } else if (alet.val == 1) {
  229. asce->val = vcpu->arch.sie_block->gcr[7];
  230. return 0;
  231. }
  232. if (alet.reserved)
  233. return PGM_ALET_SPECIFICATION;
  234. if (alet.p)
  235. ald_addr = vcpu->arch.sie_block->gcr[5];
  236. else
  237. ald_addr = vcpu->arch.sie_block->gcr[2];
  238. ald_addr &= 0x7fffffc0;
  239. rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
  240. if (rc)
  241. return rc;
  242. if (alet.alen / 8 > ald.all)
  243. return PGM_ALEN_TRANSLATION;
  244. if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
  245. return PGM_ADDRESSING;
  246. rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
  247. sizeof(struct ale));
  248. if (rc)
  249. return rc;
  250. if (ale.i == 1)
  251. return PGM_ALEN_TRANSLATION;
  252. if (ale.alesn != alet.alesn)
  253. return PGM_ALE_SEQUENCE;
  254. rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
  255. if (rc)
  256. return rc;
  257. if (aste.i)
  258. return PGM_ASTE_VALIDITY;
  259. if (aste.astesn != ale.astesn)
  260. return PGM_ASTE_SEQUENCE;
  261. if (ale.p == 1) {
  262. eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
  263. if (ale.aleax != eax) {
  264. if (eax / 16 > aste.atl)
  265. return PGM_EXTENDED_AUTHORITY;
  266. authority_table_addr = aste.ato * 4 + eax / 4;
  267. rc = read_guest_real(vcpu, authority_table_addr,
  268. &authority_table,
  269. sizeof(u8));
  270. if (rc)
  271. return rc;
  272. if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
  273. return PGM_EXTENDED_AUTHORITY;
  274. }
  275. }
  276. if (ale.fo == 1 && mode == GACC_STORE)
  277. return PGM_PROTECTION;
  278. asce->val = aste.asce;
  279. return 0;
  280. }
  281. enum prot_type {
  282. PROT_TYPE_LA = 0,
  283. PROT_TYPE_KEYC = 1,
  284. PROT_TYPE_ALC = 2,
  285. PROT_TYPE_DAT = 3,
  286. PROT_TYPE_IEP = 4,
  287. /* Dummy value for passing an initialized value when code != PGM_PROTECTION */
  288. PROT_TYPE_DUMMY,
  289. };
  290. static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
  291. enum gacc_mode mode, enum prot_type prot, bool terminate)
  292. {
  293. struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
  294. union teid *teid;
  295. memset(pgm, 0, sizeof(*pgm));
  296. pgm->code = code;
  297. teid = (union teid *)&pgm->trans_exc_code;
  298. switch (code) {
  299. case PGM_PROTECTION:
  300. switch (prot) {
  301. case PROT_TYPE_DUMMY:
  302. /* We should never get here, acts like termination */
  303. WARN_ON_ONCE(1);
  304. break;
  305. case PROT_TYPE_IEP:
  306. teid->b61 = 1;
  307. fallthrough;
  308. case PROT_TYPE_LA:
  309. teid->b56 = 1;
  310. break;
  311. case PROT_TYPE_KEYC:
  312. teid->b60 = 1;
  313. break;
  314. case PROT_TYPE_ALC:
  315. teid->b60 = 1;
  316. fallthrough;
  317. case PROT_TYPE_DAT:
  318. teid->b61 = 1;
  319. break;
  320. }
  321. if (terminate) {
  322. teid->b56 = 0;
  323. teid->b60 = 0;
  324. teid->b61 = 0;
  325. }
  326. fallthrough;
  327. case PGM_ASCE_TYPE:
  328. case PGM_PAGE_TRANSLATION:
  329. case PGM_REGION_FIRST_TRANS:
  330. case PGM_REGION_SECOND_TRANS:
  331. case PGM_REGION_THIRD_TRANS:
  332. case PGM_SEGMENT_TRANSLATION:
  333. /*
  334. * op_access_id only applies to MOVE_PAGE -> set bit 61
  335. * exc_access_id has to be set to 0 for some instructions. Both
  336. * cases have to be handled by the caller.
  337. */
  338. teid->addr = gva >> PAGE_SHIFT;
  339. teid->fsi = mode == GACC_STORE ? TEID_FSI_STORE : TEID_FSI_FETCH;
  340. teid->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
  341. fallthrough;
  342. case PGM_ALEN_TRANSLATION:
  343. case PGM_ALE_SEQUENCE:
  344. case PGM_ASTE_VALIDITY:
  345. case PGM_ASTE_SEQUENCE:
  346. case PGM_EXTENDED_AUTHORITY:
  347. /*
  348. * We can always store exc_access_id, as it is
  349. * undefined for non-ar cases. It is undefined for
  350. * most DAT protection exceptions.
  351. */
  352. pgm->exc_access_id = ar;
  353. break;
  354. }
  355. return code;
  356. }
  357. static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
  358. enum gacc_mode mode, enum prot_type prot)
  359. {
  360. return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false);
  361. }
  362. static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
  363. unsigned long ga, u8 ar, enum gacc_mode mode)
  364. {
  365. int rc;
  366. struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
  367. if (!psw.dat) {
  368. asce->val = 0;
  369. asce->r = 1;
  370. return 0;
  371. }
  372. if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME))
  373. psw.as = PSW_BITS_AS_PRIMARY;
  374. switch (psw.as) {
  375. case PSW_BITS_AS_PRIMARY:
  376. asce->val = vcpu->arch.sie_block->gcr[1];
  377. return 0;
  378. case PSW_BITS_AS_SECONDARY:
  379. asce->val = vcpu->arch.sie_block->gcr[7];
  380. return 0;
  381. case PSW_BITS_AS_HOME:
  382. asce->val = vcpu->arch.sie_block->gcr[13];
  383. return 0;
  384. case PSW_BITS_AS_ACCREG:
  385. rc = ar_translation(vcpu, asce, ar, mode);
  386. if (rc > 0)
  387. return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
  388. return rc;
  389. }
  390. return 0;
  391. }
  392. static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
  393. {
  394. return kvm_read_guest(kvm, gpa, val, sizeof(*val));
  395. }
  396. /**
  397. * guest_translate_gva() - translate a guest virtual into a guest absolute address
  398. * @vcpu: virtual cpu
  399. * @gva: guest virtual address
  400. * @gpa: points to where guest physical (absolute) address should be stored
  401. * @asce: effective asce
  402. * @mode: indicates the access mode to be used
  403. * @prot: returns the type for protection exceptions
  404. *
  405. * Translate a guest virtual address into a guest absolute address by means
  406. * of dynamic address translation as specified by the architecture.
  407. * If the resulting absolute address is not available in the configuration
  408. * an addressing exception is indicated and @gpa will not be changed.
  409. *
  410. * Returns: - zero on success; @gpa contains the resulting absolute address
  411. * - a negative value if guest access failed due to e.g. broken
  412. * guest mapping
  413. * - a positive value if an access exception happened. In this case
  414. * the returned value is the program interruption code as defined
  415. * by the architecture
  416. */
  417. static unsigned long guest_translate_gva(struct kvm_vcpu *vcpu, unsigned long gva,
  418. unsigned long *gpa, const union asce asce,
  419. enum gacc_mode mode, enum prot_type *prot)
  420. {
  421. union vaddress vaddr = {.addr = gva};
  422. union raddress raddr = {.addr = gva};
  423. union page_table_entry pte;
  424. int dat_protection = 0;
  425. int iep_protection = 0;
  426. union ctlreg0 ctlreg0;
  427. unsigned long ptr;
  428. int edat1, edat2, iep;
  429. ctlreg0.val = vcpu->arch.sie_block->gcr[0];
  430. edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
  431. edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
  432. iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
  433. if (asce.r)
  434. goto real_address;
  435. ptr = asce.rsto * PAGE_SIZE;
  436. switch (asce.dt) {
  437. case ASCE_TYPE_REGION1:
  438. if (vaddr.rfx01 > asce.tl)
  439. return PGM_REGION_FIRST_TRANS;
  440. ptr += vaddr.rfx * 8;
  441. break;
  442. case ASCE_TYPE_REGION2:
  443. if (vaddr.rfx)
  444. return PGM_ASCE_TYPE;
  445. if (vaddr.rsx01 > asce.tl)
  446. return PGM_REGION_SECOND_TRANS;
  447. ptr += vaddr.rsx * 8;
  448. break;
  449. case ASCE_TYPE_REGION3:
  450. if (vaddr.rfx || vaddr.rsx)
  451. return PGM_ASCE_TYPE;
  452. if (vaddr.rtx01 > asce.tl)
  453. return PGM_REGION_THIRD_TRANS;
  454. ptr += vaddr.rtx * 8;
  455. break;
  456. case ASCE_TYPE_SEGMENT:
  457. if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
  458. return PGM_ASCE_TYPE;
  459. if (vaddr.sx01 > asce.tl)
  460. return PGM_SEGMENT_TRANSLATION;
  461. ptr += vaddr.sx * 8;
  462. break;
  463. }
  464. switch (asce.dt) {
  465. case ASCE_TYPE_REGION1: {
  466. union region1_table_entry rfte;
  467. if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
  468. return PGM_ADDRESSING;
  469. if (deref_table(vcpu->kvm, ptr, &rfte.val))
  470. return -EFAULT;
  471. if (rfte.i)
  472. return PGM_REGION_FIRST_TRANS;
  473. if (rfte.tt != TABLE_TYPE_REGION1)
  474. return PGM_TRANSLATION_SPEC;
  475. if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
  476. return PGM_REGION_SECOND_TRANS;
  477. if (edat1)
  478. dat_protection |= rfte.p;
  479. ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
  480. }
  481. fallthrough;
  482. case ASCE_TYPE_REGION2: {
  483. union region2_table_entry rste;
  484. if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
  485. return PGM_ADDRESSING;
  486. if (deref_table(vcpu->kvm, ptr, &rste.val))
  487. return -EFAULT;
  488. if (rste.i)
  489. return PGM_REGION_SECOND_TRANS;
  490. if (rste.tt != TABLE_TYPE_REGION2)
  491. return PGM_TRANSLATION_SPEC;
  492. if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
  493. return PGM_REGION_THIRD_TRANS;
  494. if (edat1)
  495. dat_protection |= rste.p;
  496. ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
  497. }
  498. fallthrough;
  499. case ASCE_TYPE_REGION3: {
  500. union region3_table_entry rtte;
  501. if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
  502. return PGM_ADDRESSING;
  503. if (deref_table(vcpu->kvm, ptr, &rtte.val))
  504. return -EFAULT;
  505. if (rtte.i)
  506. return PGM_REGION_THIRD_TRANS;
  507. if (rtte.tt != TABLE_TYPE_REGION3)
  508. return PGM_TRANSLATION_SPEC;
  509. if (rtte.cr && asce.p && edat2)
  510. return PGM_TRANSLATION_SPEC;
  511. if (rtte.fc && edat2) {
  512. dat_protection |= rtte.fc1.p;
  513. iep_protection = rtte.fc1.iep;
  514. raddr.rfaa = rtte.fc1.rfaa;
  515. goto absolute_address;
  516. }
  517. if (vaddr.sx01 < rtte.fc0.tf)
  518. return PGM_SEGMENT_TRANSLATION;
  519. if (vaddr.sx01 > rtte.fc0.tl)
  520. return PGM_SEGMENT_TRANSLATION;
  521. if (edat1)
  522. dat_protection |= rtte.fc0.p;
  523. ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
  524. }
  525. fallthrough;
  526. case ASCE_TYPE_SEGMENT: {
  527. union segment_table_entry ste;
  528. if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
  529. return PGM_ADDRESSING;
  530. if (deref_table(vcpu->kvm, ptr, &ste.val))
  531. return -EFAULT;
  532. if (ste.i)
  533. return PGM_SEGMENT_TRANSLATION;
  534. if (ste.tt != TABLE_TYPE_SEGMENT)
  535. return PGM_TRANSLATION_SPEC;
  536. if (ste.cs && asce.p)
  537. return PGM_TRANSLATION_SPEC;
  538. if (ste.fc && edat1) {
  539. dat_protection |= ste.fc1.p;
  540. iep_protection = ste.fc1.iep;
  541. raddr.sfaa = ste.fc1.sfaa;
  542. goto absolute_address;
  543. }
  544. dat_protection |= ste.fc0.p;
  545. ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
  546. }
  547. }
  548. if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
  549. return PGM_ADDRESSING;
  550. if (deref_table(vcpu->kvm, ptr, &pte.val))
  551. return -EFAULT;
  552. if (pte.i)
  553. return PGM_PAGE_TRANSLATION;
  554. if (pte.z)
  555. return PGM_TRANSLATION_SPEC;
  556. dat_protection |= pte.p;
  557. iep_protection = pte.iep;
  558. raddr.pfra = pte.pfra;
  559. real_address:
  560. raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
  561. absolute_address:
  562. if (mode == GACC_STORE && dat_protection) {
  563. *prot = PROT_TYPE_DAT;
  564. return PGM_PROTECTION;
  565. }
  566. if (mode == GACC_IFETCH && iep_protection && iep) {
  567. *prot = PROT_TYPE_IEP;
  568. return PGM_PROTECTION;
  569. }
  570. if (!kvm_is_gpa_in_memslot(vcpu->kvm, raddr.addr))
  571. return PGM_ADDRESSING;
  572. *gpa = raddr.addr;
  573. return 0;
  574. }
  575. static inline int is_low_address(unsigned long ga)
  576. {
  577. /* Check for address ranges 0..511 and 4096..4607 */
  578. return (ga & ~0x11fful) == 0;
  579. }
  580. static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
  581. const union asce asce)
  582. {
  583. union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
  584. psw_t *psw = &vcpu->arch.sie_block->gpsw;
  585. if (!ctlreg0.lap)
  586. return 0;
  587. if (psw_bits(*psw).dat && asce.p)
  588. return 0;
  589. return 1;
  590. }
  591. static int vm_check_access_key_gpa(struct kvm *kvm, u8 access_key,
  592. enum gacc_mode mode, gpa_t gpa)
  593. {
  594. union skey storage_key;
  595. int r;
  596. scoped_guard(read_lock, &kvm->mmu_lock)
  597. r = dat_get_storage_key(kvm->arch.gmap->asce, gpa_to_gfn(gpa), &storage_key);
  598. if (r)
  599. return r;
  600. if (access_key == 0 || storage_key.acc == access_key)
  601. return 0;
  602. if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !storage_key.fp)
  603. return 0;
  604. return PGM_PROTECTION;
  605. }
  606. static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
  607. union asce asce)
  608. {
  609. psw_t *psw = &vcpu->arch.sie_block->gpsw;
  610. unsigned long override;
  611. if (mode == GACC_FETCH || mode == GACC_IFETCH) {
  612. /* check if fetch protection override enabled */
  613. override = vcpu->arch.sie_block->gcr[0];
  614. override &= CR0_FETCH_PROTECTION_OVERRIDE;
  615. /* not applicable if subject to DAT && private space */
  616. override = override && !(psw_bits(*psw).dat && asce.p);
  617. return override;
  618. }
  619. return false;
  620. }
  621. static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
  622. {
  623. return ga < 2048 && ga + len <= 2048;
  624. }
  625. static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
  626. {
  627. /* check if storage protection override enabled */
  628. return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
  629. }
  630. static bool storage_prot_override_applies(u8 access_control)
  631. {
  632. /* matches special storage protection override key (9) -> allow */
  633. return access_control == PAGE_SPO_ACC;
  634. }
  635. static int vcpu_check_access_key_gpa(struct kvm_vcpu *vcpu, u8 access_key,
  636. enum gacc_mode mode, union asce asce, gpa_t gpa,
  637. unsigned long ga, unsigned int len)
  638. {
  639. union skey storage_key;
  640. int r;
  641. /* access key 0 matches any storage key -> allow */
  642. if (access_key == 0)
  643. return 0;
  644. /*
  645. * caller needs to ensure that gfn is accessible, so we can
  646. * assume that this cannot fail
  647. */
  648. scoped_guard(read_lock, &vcpu->kvm->mmu_lock)
  649. r = dat_get_storage_key(vcpu->arch.gmap->asce, gpa_to_gfn(gpa), &storage_key);
  650. if (r)
  651. return r;
  652. /* access key matches storage key -> allow */
  653. if (storage_key.acc == access_key)
  654. return 0;
  655. if (mode == GACC_FETCH || mode == GACC_IFETCH) {
  656. /* it is a fetch and fetch protection is off -> allow */
  657. if (!storage_key.fp)
  658. return 0;
  659. if (fetch_prot_override_applicable(vcpu, mode, asce) &&
  660. fetch_prot_override_applies(ga, len))
  661. return 0;
  662. }
  663. if (storage_prot_override_applicable(vcpu) &&
  664. storage_prot_override_applies(storage_key.acc))
  665. return 0;
  666. return PGM_PROTECTION;
  667. }
  668. /**
  669. * guest_range_to_gpas() - Calculate guest physical addresses of page fragments
  670. * covering a logical range
  671. * @vcpu: virtual cpu
  672. * @ga: guest address, start of range
  673. * @ar: access register
  674. * @gpas: output argument, may be NULL
  675. * @len: length of range in bytes
  676. * @asce: address-space-control element to use for translation
  677. * @mode: access mode
  678. * @access_key: access key to mach the range's storage keys against
  679. *
  680. * Translate a logical range to a series of guest absolute addresses,
  681. * such that the concatenation of page fragments starting at each gpa make up
  682. * the whole range.
  683. * The translation is performed as if done by the cpu for the given @asce, @ar,
  684. * @mode and state of the @vcpu.
  685. * If the translation causes an exception, its program interruption code is
  686. * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified
  687. * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject
  688. * a correct exception into the guest.
  689. * The resulting gpas are stored into @gpas, unless it is NULL.
  690. *
  691. * Note: All fragments except the first one start at the beginning of a page.
  692. * When deriving the boundaries of a fragment from a gpa, all but the last
  693. * fragment end at the end of the page.
  694. *
  695. * Return:
  696. * * 0 - success
  697. * * <0 - translation could not be performed, for example if guest
  698. * memory could not be accessed
  699. * * >0 - an access exception occurred. In this case the returned value
  700. * is the program interruption code and the contents of pgm may
  701. * be used to inject an exception into the guest.
  702. */
  703. static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
  704. unsigned long *gpas, unsigned long len,
  705. const union asce asce, enum gacc_mode mode,
  706. u8 access_key)
  707. {
  708. psw_t *psw = &vcpu->arch.sie_block->gpsw;
  709. unsigned int offset = offset_in_page(ga);
  710. unsigned int fragment_len;
  711. int lap_enabled, rc = 0;
  712. enum prot_type prot;
  713. unsigned long gpa;
  714. lap_enabled = low_address_protection_enabled(vcpu, asce);
  715. while (min(PAGE_SIZE - offset, len) > 0) {
  716. fragment_len = min(PAGE_SIZE - offset, len);
  717. ga = kvm_s390_logical_to_effective(vcpu, ga);
  718. if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
  719. return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
  720. PROT_TYPE_LA);
  721. if (psw_bits(*psw).dat) {
  722. rc = guest_translate_gva(vcpu, ga, &gpa, asce, mode, &prot);
  723. if (rc < 0)
  724. return rc;
  725. } else {
  726. gpa = kvm_s390_real_to_abs(vcpu, ga);
  727. if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
  728. rc = PGM_ADDRESSING;
  729. prot = PROT_TYPE_DUMMY;
  730. }
  731. }
  732. if (rc)
  733. return trans_exc(vcpu, rc, ga, ar, mode, prot);
  734. rc = vcpu_check_access_key_gpa(vcpu, access_key, mode, asce, gpa, ga, fragment_len);
  735. if (rc)
  736. return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC);
  737. if (gpas)
  738. *gpas++ = gpa;
  739. offset = 0;
  740. ga += fragment_len;
  741. len -= fragment_len;
  742. }
  743. return 0;
  744. }
  745. static int access_guest_page_gpa(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
  746. void *data, unsigned int len)
  747. {
  748. const unsigned int offset = offset_in_page(gpa);
  749. const gfn_t gfn = gpa_to_gfn(gpa);
  750. int rc;
  751. if (!gfn_to_memslot(kvm, gfn))
  752. return PGM_ADDRESSING;
  753. if (mode == GACC_STORE)
  754. rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
  755. else
  756. rc = kvm_read_guest_page(kvm, gfn, data, offset, len);
  757. return rc;
  758. }
  759. static int mvcos_key(void *to, const void *from, unsigned long size, u8 dst_key, u8 src_key)
  760. {
  761. union oac spec = {
  762. .oac1.key = dst_key,
  763. .oac1.k = !!dst_key,
  764. .oac2.key = src_key,
  765. .oac2.k = !!src_key,
  766. };
  767. int exception = PGM_PROTECTION;
  768. asm_inline volatile(
  769. " lr %%r0,%[spec]\n"
  770. "0: mvcos %[to],%[from],%[size]\n"
  771. "1: lhi %[exc],0\n"
  772. "2:\n"
  773. EX_TABLE(0b, 2b)
  774. EX_TABLE(1b, 2b)
  775. : [size] "+d" (size), [to] "=Q" (*(char *)to), [exc] "+d" (exception)
  776. : [spec] "d" (spec.val), [from] "Q" (*(const char *)from)
  777. : "memory", "cc", "0");
  778. return exception;
  779. }
  780. struct acc_page_key_context {
  781. void *data;
  782. int exception;
  783. unsigned short offset;
  784. unsigned short len;
  785. bool store;
  786. u8 access_key;
  787. };
  788. static void _access_guest_page_with_key_gpa(struct guest_fault *f)
  789. {
  790. struct acc_page_key_context *context = f->priv;
  791. void *ptr;
  792. int r;
  793. ptr = __va(PFN_PHYS(f->pfn) | context->offset);
  794. if (context->store)
  795. r = mvcos_key(ptr, context->data, context->len, context->access_key, 0);
  796. else
  797. r = mvcos_key(context->data, ptr, context->len, 0, context->access_key);
  798. context->exception = r;
  799. }
  800. static int access_guest_page_with_key_gpa(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
  801. void *data, unsigned int len, u8 acc)
  802. {
  803. struct acc_page_key_context context = {
  804. .offset = offset_in_page(gpa),
  805. .len = len,
  806. .data = data,
  807. .access_key = acc,
  808. .store = mode == GACC_STORE,
  809. };
  810. struct guest_fault fault = {
  811. .gfn = gpa_to_gfn(gpa),
  812. .priv = &context,
  813. .write_attempt = mode == GACC_STORE,
  814. .callback = _access_guest_page_with_key_gpa,
  815. };
  816. int rc;
  817. if (KVM_BUG_ON((len + context.offset) > PAGE_SIZE, kvm))
  818. return -EINVAL;
  819. rc = kvm_s390_faultin_gfn(NULL, kvm, &fault);
  820. if (rc)
  821. return rc;
  822. return context.exception;
  823. }
  824. int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data,
  825. unsigned long len, enum gacc_mode mode, u8 access_key)
  826. {
  827. int offset = offset_in_page(gpa);
  828. int fragment_len;
  829. int rc;
  830. while (min(PAGE_SIZE - offset, len) > 0) {
  831. fragment_len = min(PAGE_SIZE - offset, len);
  832. rc = access_guest_page_with_key_gpa(kvm, mode, gpa, data, fragment_len, access_key);
  833. if (rc)
  834. return rc;
  835. offset = 0;
  836. len -= fragment_len;
  837. data += fragment_len;
  838. gpa += fragment_len;
  839. }
  840. return 0;
  841. }
  842. int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
  843. void *data, unsigned long len, enum gacc_mode mode,
  844. u8 access_key)
  845. {
  846. psw_t *psw = &vcpu->arch.sie_block->gpsw;
  847. unsigned long nr_pages, idx;
  848. unsigned long gpa_array[2];
  849. unsigned int fragment_len;
  850. unsigned long *gpas;
  851. enum prot_type prot;
  852. int need_ipte_lock;
  853. union asce asce;
  854. bool try_storage_prot_override;
  855. bool try_fetch_prot_override;
  856. int rc;
  857. if (!len)
  858. return 0;
  859. ga = kvm_s390_logical_to_effective(vcpu, ga);
  860. rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode);
  861. if (rc)
  862. return rc;
  863. nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
  864. gpas = gpa_array;
  865. if (nr_pages > ARRAY_SIZE(gpa_array))
  866. gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
  867. if (!gpas)
  868. return -ENOMEM;
  869. try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
  870. try_storage_prot_override = storage_prot_override_applicable(vcpu);
  871. need_ipte_lock = psw_bits(*psw).dat && !asce.r;
  872. if (need_ipte_lock)
  873. ipte_lock(vcpu->kvm);
  874. /*
  875. * Since we do the access further down ultimately via a move instruction
  876. * that does key checking and returns an error in case of a protection
  877. * violation, we don't need to do the check during address translation.
  878. * Skip it by passing access key 0, which matches any storage key,
  879. * obviating the need for any further checks. As a result the check is
  880. * handled entirely in hardware on access, we only need to take care to
  881. * forego key protection checking if fetch protection override applies or
  882. * retry with the special key 9 in case of storage protection override.
  883. */
  884. rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
  885. if (rc)
  886. goto out_unlock;
  887. for (idx = 0; idx < nr_pages; idx++) {
  888. fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
  889. if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) {
  890. rc = access_guest_page_gpa(vcpu->kvm, mode, gpas[idx], data, fragment_len);
  891. } else {
  892. rc = access_guest_page_with_key_gpa(vcpu->kvm, mode, gpas[idx],
  893. data, fragment_len, access_key);
  894. }
  895. if (rc == PGM_PROTECTION && try_storage_prot_override)
  896. rc = access_guest_page_with_key_gpa(vcpu->kvm, mode, gpas[idx],
  897. data, fragment_len, PAGE_SPO_ACC);
  898. if (rc)
  899. break;
  900. len -= fragment_len;
  901. data += fragment_len;
  902. ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len);
  903. }
  904. if (rc > 0) {
  905. bool terminate = (mode == GACC_STORE) && (idx > 0);
  906. if (rc == PGM_PROTECTION)
  907. prot = PROT_TYPE_KEYC;
  908. else
  909. prot = PROT_TYPE_DUMMY;
  910. rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
  911. }
  912. out_unlock:
  913. if (need_ipte_lock)
  914. ipte_unlock(vcpu->kvm);
  915. if (nr_pages > ARRAY_SIZE(gpa_array))
  916. vfree(gpas);
  917. return rc;
  918. }
  919. int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
  920. void *data, unsigned long len, enum gacc_mode mode)
  921. {
  922. unsigned int fragment_len;
  923. unsigned long gpa;
  924. int rc = 0;
  925. while (len && !rc) {
  926. gpa = kvm_s390_real_to_abs(vcpu, gra);
  927. fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len);
  928. rc = access_guest_page_gpa(vcpu->kvm, mode, gpa, data, fragment_len);
  929. len -= fragment_len;
  930. gra += fragment_len;
  931. data += fragment_len;
  932. }
  933. if (rc > 0)
  934. vcpu->arch.pgm.code = rc;
  935. return rc;
  936. }
  937. /**
  938. * __cmpxchg_with_key() - Perform cmpxchg, honoring storage keys.
  939. * @ptr: Address of value to compare to *@old and exchange with
  940. * @new. Must be aligned to @size.
  941. * @old: Old value. Compared to the content pointed to by @ptr in order to
  942. * determine if the exchange occurs. The old value read from *@ptr is
  943. * written here.
  944. * @new: New value to place at *@ptr.
  945. * @size: Size of the operation in bytes, may only be a power of two up to 16.
  946. * @access_key: Access key to use for checking storage key protection.
  947. *
  948. * Perform a cmpxchg on guest memory, honoring storage key protection.
  949. * @access_key alone determines how key checking is performed, neither
  950. * storage-protection-override nor fetch-protection-override apply.
  951. * In case of an exception *@uval is set to zero.
  952. *
  953. * Return:
  954. * * %0: cmpxchg executed successfully
  955. * * %1: cmpxchg executed unsuccessfully
  956. * * %PGM_PROTECTION: an exception happened when trying to access *@ptr
  957. * * %-EAGAIN: maxed out number of retries (byte and short only)
  958. * * %-EINVAL: invalid value for @size
  959. */
  960. static int __cmpxchg_with_key(union kvm_s390_quad *ptr, union kvm_s390_quad *old,
  961. union kvm_s390_quad new, int size, u8 access_key)
  962. {
  963. union kvm_s390_quad tmp = { .sixteen = 0 };
  964. int rc;
  965. /*
  966. * The cmpxchg_key macro depends on the type of "old", so we need
  967. * a case for each valid length and get some code duplication as long
  968. * as we don't introduce a new macro.
  969. */
  970. switch (size) {
  971. case 1:
  972. rc = __cmpxchg_key1(&ptr->one, &tmp.one, old->one, new.one, access_key);
  973. break;
  974. case 2:
  975. rc = __cmpxchg_key2(&ptr->two, &tmp.two, old->two, new.two, access_key);
  976. break;
  977. case 4:
  978. rc = __cmpxchg_key4(&ptr->four, &tmp.four, old->four, new.four, access_key);
  979. break;
  980. case 8:
  981. rc = __cmpxchg_key8(&ptr->eight, &tmp.eight, old->eight, new.eight, access_key);
  982. break;
  983. case 16:
  984. rc = __cmpxchg_key16(&ptr->sixteen, &tmp.sixteen, old->sixteen, new.sixteen,
  985. access_key);
  986. break;
  987. default:
  988. return -EINVAL;
  989. }
  990. if (!rc && memcmp(&tmp, old, size))
  991. rc = 1;
  992. *old = tmp;
  993. /*
  994. * Assume that the fault is caused by protection, either key protection
  995. * or user page write protection.
  996. */
  997. if (rc == -EFAULT)
  998. rc = PGM_PROTECTION;
  999. return rc;
  1000. }
  1001. struct cmpxchg_key_context {
  1002. union kvm_s390_quad new;
  1003. union kvm_s390_quad *old;
  1004. int exception;
  1005. unsigned short offset;
  1006. u8 access_key;
  1007. u8 len;
  1008. };
  1009. static void _cmpxchg_guest_abs_with_key(struct guest_fault *f)
  1010. {
  1011. struct cmpxchg_key_context *context = f->priv;
  1012. context->exception = __cmpxchg_with_key(__va(PFN_PHYS(f->pfn) | context->offset),
  1013. context->old, context->new, context->len,
  1014. context->access_key);
  1015. }
  1016. /**
  1017. * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address.
  1018. * @kvm: Virtual machine instance.
  1019. * @gpa: Absolute guest address of the location to be changed.
  1020. * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
  1021. * non power of two will result in failure.
  1022. * @old: Pointer to old value. If the location at @gpa contains this value,
  1023. * the exchange will succeed. After calling cmpxchg_guest_abs_with_key()
  1024. * *@old contains the value at @gpa before the attempt to
  1025. * exchange the value.
  1026. * @new: The value to place at @gpa.
  1027. * @acc: The access key to use for the guest access.
  1028. * @success: output value indicating if an exchange occurred.
  1029. *
  1030. * Atomically exchange the value at @gpa by @new, if it contains *@old.
  1031. * Honors storage keys.
  1032. *
  1033. * Return: * 0: successful exchange
  1034. * * >0: a program interruption code indicating the reason cmpxchg could
  1035. * not be attempted
  1036. * * -EINVAL: address misaligned or len not power of two
  1037. * * -EAGAIN: transient failure (len 1 or 2)
  1038. * * -EOPNOTSUPP: read-only memslot (should never occur)
  1039. */
  1040. int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, union kvm_s390_quad *old,
  1041. union kvm_s390_quad new, u8 acc, bool *success)
  1042. {
  1043. struct cmpxchg_key_context context = {
  1044. .old = old,
  1045. .new = new,
  1046. .offset = offset_in_page(gpa),
  1047. .len = len,
  1048. .access_key = acc,
  1049. };
  1050. struct guest_fault fault = {
  1051. .gfn = gpa_to_gfn(gpa),
  1052. .priv = &context,
  1053. .write_attempt = true,
  1054. .callback = _cmpxchg_guest_abs_with_key,
  1055. };
  1056. int rc;
  1057. lockdep_assert_held(&kvm->srcu);
  1058. if (len > 16 || !IS_ALIGNED(gpa, len))
  1059. return -EINVAL;
  1060. rc = kvm_s390_faultin_gfn(NULL, kvm, &fault);
  1061. if (rc)
  1062. return rc;
  1063. *success = !context.exception;
  1064. if (context.exception == 1)
  1065. return 0;
  1066. return context.exception;
  1067. }
  1068. /**
  1069. * guest_translate_address_with_key - translate guest logical into guest absolute address
  1070. * @vcpu: virtual cpu
  1071. * @gva: Guest virtual address
  1072. * @ar: Access register
  1073. * @gpa: Guest physical address
  1074. * @mode: Translation access mode
  1075. * @access_key: access key to mach the storage key with
  1076. *
  1077. * Parameter semantics are the same as the ones from guest_translate.
  1078. * The memory contents at the guest address are not changed.
  1079. *
  1080. * Note: The IPTE lock is not taken during this function, so the caller
  1081. * has to take care of this.
  1082. */
  1083. int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
  1084. unsigned long *gpa, enum gacc_mode mode,
  1085. u8 access_key)
  1086. {
  1087. union asce asce;
  1088. int rc;
  1089. gva = kvm_s390_logical_to_effective(vcpu, gva);
  1090. rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
  1091. if (rc)
  1092. return rc;
  1093. return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode,
  1094. access_key);
  1095. }
  1096. /**
  1097. * check_gva_range - test a range of guest virtual addresses for accessibility
  1098. * @vcpu: virtual cpu
  1099. * @gva: Guest virtual address
  1100. * @ar: Access register
  1101. * @length: Length of test range
  1102. * @mode: Translation access mode
  1103. * @access_key: access key to mach the storage keys with
  1104. */
  1105. int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
  1106. unsigned long length, enum gacc_mode mode, u8 access_key)
  1107. {
  1108. union asce asce;
  1109. int rc = 0;
  1110. rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
  1111. if (rc)
  1112. return rc;
  1113. ipte_lock(vcpu->kvm);
  1114. rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
  1115. access_key);
  1116. ipte_unlock(vcpu->kvm);
  1117. return rc;
  1118. }
  1119. /**
  1120. * check_gpa_range - test a range of guest physical addresses for accessibility
  1121. * @kvm: virtual machine instance
  1122. * @gpa: guest physical address
  1123. * @length: length of test range
  1124. * @mode: access mode to test, relevant for storage keys
  1125. * @access_key: access key to mach the storage keys with
  1126. */
  1127. int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length,
  1128. enum gacc_mode mode, u8 access_key)
  1129. {
  1130. unsigned int fragment_len;
  1131. int rc = 0;
  1132. while (length && !rc) {
  1133. fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length);
  1134. rc = vm_check_access_key_gpa(kvm, access_key, mode, gpa);
  1135. length -= fragment_len;
  1136. gpa += fragment_len;
  1137. }
  1138. return rc;
  1139. }
  1140. /**
  1141. * kvm_s390_check_low_addr_prot_real - check for low-address protection
  1142. * @vcpu: virtual cpu
  1143. * @gra: Guest real address
  1144. *
  1145. * Checks whether an address is subject to low-address protection and set
  1146. * up vcpu->arch.pgm accordingly if necessary.
  1147. *
  1148. * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
  1149. */
  1150. int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
  1151. {
  1152. union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
  1153. if (!ctlreg0.lap || !is_low_address(gra))
  1154. return 0;
  1155. return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA);
  1156. }
  1157. /**
  1158. * walk_guest_tables() - Walk the guest page table and pin the dat tables.
  1159. * @sg: Pointer to the shadow guest address space structure.
  1160. * @saddr: Faulting address in the shadow gmap.
  1161. * @w: Will be filled with information on the pinned pages.
  1162. * @wr: Wndicates a write access if true.
  1163. *
  1164. * Return:
  1165. * * %0 in case of success,
  1166. * * a PIC code > 0 in case the address translation fails
  1167. * * an error code < 0 if other errors happen in the host
  1168. */
  1169. static int walk_guest_tables(struct gmap *sg, unsigned long saddr, struct pgtwalk *w, bool wr)
  1170. {
  1171. struct gmap *parent = sg->parent;
  1172. struct guest_fault *entries;
  1173. union dat_table_entry table;
  1174. union vaddress vaddr;
  1175. unsigned long ptr;
  1176. struct kvm *kvm;
  1177. union asce asce;
  1178. int rc;
  1179. if (!parent)
  1180. return -EAGAIN;
  1181. kvm = parent->kvm;
  1182. WARN_ON(!kvm);
  1183. asce = sg->guest_asce;
  1184. entries = get_entries(w);
  1185. w->level = LEVEL_MEM;
  1186. w->last_addr = saddr;
  1187. if (asce.r)
  1188. return kvm_s390_get_guest_page(kvm, entries + LEVEL_MEM, gpa_to_gfn(saddr), false);
  1189. vaddr.addr = saddr;
  1190. ptr = asce.rsto * PAGE_SIZE;
  1191. if (!asce_contains_gfn(asce, gpa_to_gfn(saddr)))
  1192. return PGM_ASCE_TYPE;
  1193. switch (asce.dt) {
  1194. case ASCE_TYPE_REGION1:
  1195. if (vaddr.rfx01 > asce.tl)
  1196. return PGM_REGION_FIRST_TRANS;
  1197. break;
  1198. case ASCE_TYPE_REGION2:
  1199. if (vaddr.rsx01 > asce.tl)
  1200. return PGM_REGION_SECOND_TRANS;
  1201. break;
  1202. case ASCE_TYPE_REGION3:
  1203. if (vaddr.rtx01 > asce.tl)
  1204. return PGM_REGION_THIRD_TRANS;
  1205. break;
  1206. case ASCE_TYPE_SEGMENT:
  1207. if (vaddr.sx01 > asce.tl)
  1208. return PGM_SEGMENT_TRANSLATION;
  1209. break;
  1210. }
  1211. w->level = asce.dt;
  1212. switch (asce.dt) {
  1213. case ASCE_TYPE_REGION1:
  1214. w->last_addr = ptr + vaddr.rfx * 8;
  1215. rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level,
  1216. w->last_addr, &table.val);
  1217. if (rc)
  1218. return rc;
  1219. if (table.pgd.i)
  1220. return PGM_REGION_FIRST_TRANS;
  1221. if (table.pgd.tt != TABLE_TYPE_REGION1)
  1222. return PGM_TRANSLATION_SPEC;
  1223. if (vaddr.rsx01 < table.pgd.tf || vaddr.rsx01 > table.pgd.tl)
  1224. return PGM_REGION_SECOND_TRANS;
  1225. if (sg->edat_level >= 1)
  1226. w->p |= table.pgd.p;
  1227. ptr = table.pgd.rto * PAGE_SIZE;
  1228. w->level--;
  1229. fallthrough;
  1230. case ASCE_TYPE_REGION2:
  1231. w->last_addr = ptr + vaddr.rsx * 8;
  1232. rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level,
  1233. w->last_addr, &table.val);
  1234. if (rc)
  1235. return rc;
  1236. if (table.p4d.i)
  1237. return PGM_REGION_SECOND_TRANS;
  1238. if (table.p4d.tt != TABLE_TYPE_REGION2)
  1239. return PGM_TRANSLATION_SPEC;
  1240. if (vaddr.rtx01 < table.p4d.tf || vaddr.rtx01 > table.p4d.tl)
  1241. return PGM_REGION_THIRD_TRANS;
  1242. if (sg->edat_level >= 1)
  1243. w->p |= table.p4d.p;
  1244. ptr = table.p4d.rto * PAGE_SIZE;
  1245. w->level--;
  1246. fallthrough;
  1247. case ASCE_TYPE_REGION3:
  1248. w->last_addr = ptr + vaddr.rtx * 8;
  1249. rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level,
  1250. w->last_addr, &table.val);
  1251. if (rc)
  1252. return rc;
  1253. if (table.pud.i)
  1254. return PGM_REGION_THIRD_TRANS;
  1255. if (table.pud.tt != TABLE_TYPE_REGION3)
  1256. return PGM_TRANSLATION_SPEC;
  1257. if (table.pud.cr && asce.p && sg->edat_level >= 2)
  1258. return PGM_TRANSLATION_SPEC;
  1259. if (sg->edat_level >= 1)
  1260. w->p |= table.pud.p;
  1261. if (table.pud.fc && sg->edat_level >= 2) {
  1262. table.val = u64_replace_bits(table.val, saddr, ~_REGION3_MASK);
  1263. goto edat_applies;
  1264. }
  1265. if (vaddr.sx01 < table.pud.fc0.tf || vaddr.sx01 > table.pud.fc0.tl)
  1266. return PGM_SEGMENT_TRANSLATION;
  1267. ptr = table.pud.fc0.sto * PAGE_SIZE;
  1268. w->level--;
  1269. fallthrough;
  1270. case ASCE_TYPE_SEGMENT:
  1271. w->last_addr = ptr + vaddr.sx * 8;
  1272. rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level,
  1273. w->last_addr, &table.val);
  1274. if (rc)
  1275. return rc;
  1276. if (table.pmd.i)
  1277. return PGM_SEGMENT_TRANSLATION;
  1278. if (table.pmd.tt != TABLE_TYPE_SEGMENT)
  1279. return PGM_TRANSLATION_SPEC;
  1280. if (table.pmd.cs && asce.p)
  1281. return PGM_TRANSLATION_SPEC;
  1282. w->p |= table.pmd.p;
  1283. if (table.pmd.fc && sg->edat_level >= 1) {
  1284. table.val = u64_replace_bits(table.val, saddr, ~_SEGMENT_MASK);
  1285. goto edat_applies;
  1286. }
  1287. ptr = table.pmd.fc0.pto * (PAGE_SIZE / 2);
  1288. w->level--;
  1289. }
  1290. w->last_addr = ptr + vaddr.px * 8;
  1291. rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level,
  1292. w->last_addr, &table.val);
  1293. if (rc)
  1294. return rc;
  1295. if (table.pte.i)
  1296. return PGM_PAGE_TRANSLATION;
  1297. if (table.pte.z)
  1298. return PGM_TRANSLATION_SPEC;
  1299. w->p |= table.pte.p;
  1300. edat_applies:
  1301. if (wr && w->p)
  1302. return PGM_PROTECTION;
  1303. return kvm_s390_get_guest_page(kvm, entries + LEVEL_MEM, table.pte.pfra, wr);
  1304. }
  1305. static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union pte *ptep,
  1306. struct guest_fault *f, bool p)
  1307. {
  1308. union pgste pgste;
  1309. union pte newpte;
  1310. int rc;
  1311. lockdep_assert_held(&sg->kvm->mmu_lock);
  1312. lockdep_assert_held(&sg->parent->children_lock);
  1313. scoped_guard(spinlock, &sg->host_to_rmap_lock)
  1314. rc = gmap_insert_rmap(sg, f->gfn, gpa_to_gfn(raddr), TABLE_TYPE_PAGE_TABLE);
  1315. if (rc)
  1316. return rc;
  1317. if (!pgste_get_trylock(ptep_h, &pgste))
  1318. return -EAGAIN;
  1319. newpte = _pte(f->pfn, f->writable, !p, ptep_h->s.s);
  1320. newpte.s.d |= ptep_h->s.d;
  1321. newpte.s.sd |= ptep_h->s.sd;
  1322. newpte.h.p &= ptep_h->h.p;
  1323. if (!newpte.h.p && !f->writable) {
  1324. rc = -EOPNOTSUPP;
  1325. } else {
  1326. pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
  1327. pgste.vsie_notif = 1;
  1328. }
  1329. pgste_set_unlock(ptep_h, pgste);
  1330. if (rc)
  1331. return rc;
  1332. if (sg->invalidated)
  1333. return -EAGAIN;
  1334. newpte = _pte(f->pfn, 0, !p, 0);
  1335. if (!pgste_get_trylock(ptep, &pgste))
  1336. return -EAGAIN;
  1337. pgste = __dat_ptep_xchg(ptep, pgste, newpte, gpa_to_gfn(raddr), sg->asce, uses_skeys(sg));
  1338. pgste_set_unlock(ptep, pgste);
  1339. return 0;
  1340. }
  1341. static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, union crste *table,
  1342. struct guest_fault *f, bool p)
  1343. {
  1344. union crste newcrste, oldcrste;
  1345. gfn_t gfn;
  1346. int rc;
  1347. lockdep_assert_held(&sg->kvm->mmu_lock);
  1348. lockdep_assert_held(&sg->parent->children_lock);
  1349. gfn = f->gfn & gpa_to_gfn(is_pmd(*table) ? _SEGMENT_MASK : _REGION3_MASK);
  1350. scoped_guard(spinlock, &sg->host_to_rmap_lock)
  1351. rc = gmap_insert_rmap(sg, gfn, gpa_to_gfn(raddr), host->h.tt);
  1352. if (rc)
  1353. return rc;
  1354. do {
  1355. /* _gmap_crstep_xchg_atomic() could have unshadowed this shadow gmap */
  1356. if (sg->invalidated)
  1357. return -EAGAIN;
  1358. oldcrste = READ_ONCE(*host);
  1359. newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, f->writable, !p);
  1360. newcrste.s.fc1.d |= oldcrste.s.fc1.d;
  1361. newcrste.s.fc1.sd |= oldcrste.s.fc1.sd;
  1362. newcrste.h.p &= oldcrste.h.p;
  1363. newcrste.s.fc1.vsie_notif = 1;
  1364. newcrste.s.fc1.prefix_notif = oldcrste.s.fc1.prefix_notif;
  1365. newcrste.s.fc1.s = oldcrste.s.fc1.s;
  1366. if (!newcrste.h.p && !f->writable)
  1367. return -EOPNOTSUPP;
  1368. } while (!_gmap_crstep_xchg_atomic(sg->parent, host, oldcrste, newcrste, f->gfn, false));
  1369. if (sg->invalidated)
  1370. return -EAGAIN;
  1371. newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p);
  1372. gfn = gpa_to_gfn(raddr);
  1373. while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, gfn, sg->asce))
  1374. ;
  1375. return 0;
  1376. }
  1377. static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
  1378. unsigned long saddr, struct pgtwalk *w)
  1379. {
  1380. struct guest_fault *entries;
  1381. int flags, i, hl, gl, l, rc;
  1382. union crste *table, *host;
  1383. union pte *ptep, *ptep_h;
  1384. lockdep_assert_held(&sg->kvm->mmu_lock);
  1385. lockdep_assert_held(&sg->parent->children_lock);
  1386. entries = get_entries(w);
  1387. ptep_h = NULL;
  1388. ptep = NULL;
  1389. rc = dat_entry_walk(NULL, gpa_to_gfn(saddr), sg->asce, DAT_WALK_ANY, TABLE_TYPE_PAGE_TABLE,
  1390. &table, &ptep);
  1391. if (rc)
  1392. return rc;
  1393. /* A race occurred. The shadow mapping is already valid, nothing to do */
  1394. if ((ptep && !ptep->h.i && ptep->h.p == w->p) ||
  1395. (!ptep && crste_leaf(*table) && !table->h.i && table->h.p == w->p))
  1396. return 0;
  1397. gl = get_level(table, ptep);
  1398. /* In case of a real address space */
  1399. if (w->level <= LEVEL_MEM) {
  1400. l = TABLE_TYPE_PAGE_TABLE;
  1401. hl = TABLE_TYPE_REGION1;
  1402. goto real_address_space;
  1403. }
  1404. /*
  1405. * Skip levels that are already protected. For each level, protect
  1406. * only the page containing the entry, not the whole table.
  1407. */
  1408. for (i = gl ; i >= w->level; i--) {
  1409. rc = gmap_protect_rmap(mc, sg, entries[i].gfn, gpa_to_gfn(saddr),
  1410. entries[i].pfn, i + 1, entries[i].writable);
  1411. if (rc)
  1412. return rc;
  1413. if (sg->invalidated)
  1414. return -EAGAIN;
  1415. }
  1416. rc = dat_entry_walk(NULL, entries[LEVEL_MEM].gfn, sg->parent->asce, DAT_WALK_LEAF,
  1417. TABLE_TYPE_PAGE_TABLE, &host, &ptep_h);
  1418. if (rc)
  1419. return rc;
  1420. hl = get_level(host, ptep_h);
  1421. /* Get the smallest granularity */
  1422. l = min3(gl, hl, w->level);
  1423. real_address_space:
  1424. flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0);
  1425. /* If necessary, create the shadow mapping */
  1426. if (l < gl) {
  1427. rc = dat_entry_walk(mc, gpa_to_gfn(saddr), sg->asce, flags, l, &table, &ptep);
  1428. if (rc)
  1429. return rc;
  1430. }
  1431. if (l < hl) {
  1432. rc = dat_entry_walk(mc, entries[LEVEL_MEM].gfn, sg->parent->asce,
  1433. flags, l, &host, &ptep_h);
  1434. if (rc)
  1435. return rc;
  1436. }
  1437. if (KVM_BUG_ON(l > TABLE_TYPE_REGION3, sg->kvm))
  1438. return -EFAULT;
  1439. if (l == TABLE_TYPE_PAGE_TABLE)
  1440. return _do_shadow_pte(sg, saddr, ptep_h, ptep, entries + LEVEL_MEM, w->p);
  1441. return _do_shadow_crste(sg, saddr, host, table, entries + LEVEL_MEM, w->p);
  1442. }
  1443. static inline int _gaccess_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, gpa_t saddr,
  1444. unsigned long seq, struct pgtwalk *walk)
  1445. {
  1446. struct gmap *parent;
  1447. int rc;
  1448. if (kvm_s390_array_needs_retry_unsafe(vcpu->kvm, seq, walk->raw_entries))
  1449. return -EAGAIN;
  1450. again:
  1451. rc = kvm_s390_mmu_cache_topup(vcpu->arch.mc);
  1452. if (rc)
  1453. return rc;
  1454. scoped_guard(read_lock, &vcpu->kvm->mmu_lock) {
  1455. if (kvm_s390_array_needs_retry_safe(vcpu->kvm, seq, walk->raw_entries))
  1456. return -EAGAIN;
  1457. parent = READ_ONCE(sg->parent);
  1458. if (!parent)
  1459. return -EAGAIN;
  1460. scoped_guard(spinlock, &parent->children_lock) {
  1461. if (READ_ONCE(sg->parent) != parent)
  1462. return -EAGAIN;
  1463. sg->invalidated = false;
  1464. rc = _gaccess_do_shadow(vcpu->arch.mc, sg, saddr, walk);
  1465. }
  1466. if (rc == -ENOMEM)
  1467. goto again;
  1468. if (!rc)
  1469. kvm_s390_release_faultin_array(vcpu->kvm, walk->raw_entries, false);
  1470. }
  1471. return rc;
  1472. }
  1473. /**
  1474. * __gaccess_shadow_fault() - Handle fault on a shadow page table.
  1475. * @vcpu: Virtual cpu that triggered the action.
  1476. * @sg: The shadow guest address space structure.
  1477. * @saddr: Faulting address in the shadow gmap.
  1478. * @datptr: Will contain the address of the faulting DAT table entry, or of
  1479. * the valid leaf, plus some flags.
  1480. * @wr: Whether this is a write access.
  1481. *
  1482. * Return:
  1483. * * %0 if the shadow fault was successfully resolved
  1484. * * > 0 (pgm exception code) on exceptions while faulting
  1485. * * %-EAGAIN if the caller can retry immediately
  1486. * * %-EFAULT when accessing invalid guest addresses
  1487. * * %-ENOMEM if out of memory
  1488. */
  1489. static int __gaccess_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, gpa_t saddr,
  1490. union mvpg_pei *datptr, bool wr)
  1491. {
  1492. struct pgtwalk walk = { .p = false, };
  1493. unsigned long seq;
  1494. int rc;
  1495. seq = vcpu->kvm->mmu_invalidate_seq;
  1496. /* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */
  1497. smp_rmb();
  1498. rc = walk_guest_tables(sg, saddr, &walk, wr);
  1499. if (datptr) {
  1500. datptr->val = walk.last_addr;
  1501. datptr->dat_prot = wr && walk.p;
  1502. datptr->not_pte = walk.level > TABLE_TYPE_PAGE_TABLE;
  1503. datptr->real = sg->guest_asce.r;
  1504. }
  1505. if (!rc)
  1506. rc = _gaccess_shadow_fault(vcpu, sg, saddr, seq, &walk);
  1507. if (rc)
  1508. kvm_s390_release_faultin_array(vcpu->kvm, walk.raw_entries, true);
  1509. return rc;
  1510. }
  1511. int gaccess_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, gpa_t saddr,
  1512. union mvpg_pei *datptr, bool wr)
  1513. {
  1514. int rc;
  1515. if (KVM_BUG_ON(!test_bit(GMAP_FLAG_SHADOW, &sg->flags), vcpu->kvm))
  1516. return -EFAULT;
  1517. rc = kvm_s390_mmu_cache_topup(vcpu->arch.mc);
  1518. if (rc)
  1519. return rc;
  1520. ipte_lock(vcpu->kvm);
  1521. rc = __gaccess_shadow_fault(vcpu, sg, saddr, datptr, wr || sg->guest_asce.r);
  1522. ipte_unlock(vcpu->kvm);
  1523. return rc;
  1524. }