grumain.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * SN Platform GRU Driver
  4. *
  5. * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
  6. *
  7. * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
  8. */
  9. #include <linux/kernel.h>
  10. #include <linux/slab.h>
  11. #include <linux/mm.h>
  12. #include <linux/spinlock.h>
  13. #include <linux/sched.h>
  14. #include <linux/device.h>
  15. #include <linux/list.h>
  16. #include <linux/err.h>
  17. #include <linux/prefetch.h>
  18. #include <asm/uv/uv_hub.h>
  19. #include "gru.h"
  20. #include "grutables.h"
  21. #include "gruhandles.h"
  22. unsigned long gru_options __read_mostly;
  23. static struct device_driver gru_driver = {
  24. .name = "gru"
  25. };
  26. static struct device gru_device = {
  27. .init_name = "",
  28. .driver = &gru_driver,
  29. };
  30. struct device *grudev = &gru_device;
  31. /*
  32. * Select a gru fault map to be used by the current cpu. Note that
  33. * multiple cpus may be using the same map.
  34. * ZZZ should be inline but did not work on emulator
  35. */
  36. int gru_cpu_fault_map_id(void)
  37. {
  38. int cpu = smp_processor_id();
  39. int id, core;
  40. core = uv_cpu_core_number(cpu);
  41. id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu);
  42. return id;
  43. }
  44. /*--------- ASID Management -------------------------------------------
  45. *
  46. * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID.
  47. * Once MAX is reached, flush the TLB & start over. However,
  48. * some asids may still be in use. There won't be many (percentage wise) still
  49. * in use. Search active contexts & determine the value of the first
  50. * asid in use ("x"s below). Set "limit" to this value.
  51. * This defines a block of assignable asids.
  52. *
  53. * When "limit" is reached, search forward from limit+1 and determine the
  54. * next block of assignable asids.
  55. *
  56. * Repeat until MAX_ASID is reached, then start over again.
  57. *
  58. * Each time MAX_ASID is reached, increment the asid generation. Since
  59. * the search for in-use asids only checks contexts with GRUs currently
  60. * assigned, asids in some contexts will be missed. Prior to loading
  61. * a context, the asid generation of the GTS asid is rechecked. If it
  62. * doesn't match the current generation, a new asid will be assigned.
  63. *
  64. * 0---------------x------------x---------------------x----|
  65. * ^-next ^-limit ^-MAX_ASID
  66. *
  67. * All asid manipulation & context loading/unloading is protected by the
  68. * gs_lock.
  69. */
  70. /* Hit the asid limit. Start over */
  71. static int gru_wrap_asid(struct gru_state *gru)
  72. {
  73. gru_dbg(grudev, "gid %d\n", gru->gs_gid);
  74. STAT(asid_wrap);
  75. gru->gs_asid_gen++;
  76. return MIN_ASID;
  77. }
  78. /* Find the next chunk of unused asids */
  79. static int gru_reset_asid_limit(struct gru_state *gru, int asid)
  80. {
  81. int i, gid, inuse_asid, limit;
  82. gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
  83. STAT(asid_next);
  84. limit = MAX_ASID;
  85. if (asid >= limit)
  86. asid = gru_wrap_asid(gru);
  87. gru_flush_all_tlb(gru);
  88. gid = gru->gs_gid;
  89. again:
  90. for (i = 0; i < GRU_NUM_CCH; i++) {
  91. if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i]))
  92. continue;
  93. inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
  94. gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n",
  95. gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms,
  96. inuse_asid, i);
  97. if (inuse_asid == asid) {
  98. asid += ASID_INC;
  99. if (asid >= limit) {
  100. /*
  101. * empty range: reset the range limit and
  102. * start over
  103. */
  104. limit = MAX_ASID;
  105. if (asid >= MAX_ASID)
  106. asid = gru_wrap_asid(gru);
  107. goto again;
  108. }
  109. }
  110. if ((inuse_asid > asid) && (inuse_asid < limit))
  111. limit = inuse_asid;
  112. }
  113. gru->gs_asid_limit = limit;
  114. gru->gs_asid = asid;
  115. gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru->gs_gid,
  116. asid, limit);
  117. return asid;
  118. }
  119. /* Assign a new ASID to a thread context. */
  120. static int gru_assign_asid(struct gru_state *gru)
  121. {
  122. int asid;
  123. gru->gs_asid += ASID_INC;
  124. asid = gru->gs_asid;
  125. if (asid >= gru->gs_asid_limit)
  126. asid = gru_reset_asid_limit(gru, asid);
  127. gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
  128. return asid;
  129. }
  130. /*
  131. * Clear n bits in a word. Return a word indicating the bits that were cleared.
  132. * Optionally, build an array of chars that contain the bit numbers allocated.
  133. */
  134. static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
  135. signed char *idx)
  136. {
  137. unsigned long bits = 0;
  138. int i;
  139. while (n--) {
  140. i = find_first_bit(p, mmax);
  141. if (i == mmax)
  142. BUG();
  143. __clear_bit(i, p);
  144. __set_bit(i, &bits);
  145. if (idx)
  146. *idx++ = i;
  147. }
  148. return bits;
  149. }
  150. unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
  151. signed char *cbmap)
  152. {
  153. return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
  154. cbmap);
  155. }
  156. unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
  157. signed char *dsmap)
  158. {
  159. return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
  160. dsmap);
  161. }
  162. static void reserve_gru_resources(struct gru_state *gru,
  163. struct gru_thread_state *gts)
  164. {
  165. gru->gs_active_contexts++;
  166. gts->ts_cbr_map =
  167. gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
  168. gts->ts_cbr_idx);
  169. gts->ts_dsr_map =
  170. gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
  171. }
  172. static void free_gru_resources(struct gru_state *gru,
  173. struct gru_thread_state *gts)
  174. {
  175. gru->gs_active_contexts--;
  176. gru->gs_cbr_map |= gts->ts_cbr_map;
  177. gru->gs_dsr_map |= gts->ts_dsr_map;
  178. }
  179. /*
  180. * Check if a GRU has sufficient free resources to satisfy an allocation
  181. * request. Note: GRU locks may or may not be held when this is called. If
  182. * not held, recheck after acquiring the appropriate locks.
  183. *
  184. * Returns 1 if sufficient resources, 0 if not
  185. */
  186. static int check_gru_resources(struct gru_state *gru, int cbr_au_count,
  187. int dsr_au_count, int max_active_contexts)
  188. {
  189. return hweight64(gru->gs_cbr_map) >= cbr_au_count
  190. && hweight64(gru->gs_dsr_map) >= dsr_au_count
  191. && gru->gs_active_contexts < max_active_contexts;
  192. }
  193. /*
  194. * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
  195. * context.
  196. */
  197. static int gru_load_mm_tracker(struct gru_state *gru,
  198. struct gru_thread_state *gts)
  199. {
  200. struct gru_mm_struct *gms = gts->ts_gms;
  201. struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid];
  202. unsigned short ctxbitmap = (1 << gts->ts_ctxnum);
  203. int asid;
  204. spin_lock(&gms->ms_asid_lock);
  205. asid = asids->mt_asid;
  206. spin_lock(&gru->gs_asid_lock);
  207. if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen !=
  208. gru->gs_asid_gen)) {
  209. asid = gru_assign_asid(gru);
  210. asids->mt_asid = asid;
  211. asids->mt_asid_gen = gru->gs_asid_gen;
  212. STAT(asid_new);
  213. } else {
  214. STAT(asid_reuse);
  215. }
  216. spin_unlock(&gru->gs_asid_lock);
  217. BUG_ON(asids->mt_ctxbitmap & ctxbitmap);
  218. asids->mt_ctxbitmap |= ctxbitmap;
  219. if (!test_bit(gru->gs_gid, gms->ms_asidmap))
  220. __set_bit(gru->gs_gid, gms->ms_asidmap);
  221. spin_unlock(&gms->ms_asid_lock);
  222. gru_dbg(grudev,
  223. "gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n",
  224. gru->gs_gid, gts, gms, gts->ts_ctxnum, asid,
  225. gms->ms_asidmap[0]);
  226. return asid;
  227. }
  228. static void gru_unload_mm_tracker(struct gru_state *gru,
  229. struct gru_thread_state *gts)
  230. {
  231. struct gru_mm_struct *gms = gts->ts_gms;
  232. struct gru_mm_tracker *asids;
  233. unsigned short ctxbitmap;
  234. asids = &gms->ms_asids[gru->gs_gid];
  235. ctxbitmap = (1 << gts->ts_ctxnum);
  236. spin_lock(&gms->ms_asid_lock);
  237. spin_lock(&gru->gs_asid_lock);
  238. BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
  239. asids->mt_ctxbitmap ^= ctxbitmap;
  240. gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum %d, asidmap 0x%lx\n",
  241. gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]);
  242. spin_unlock(&gru->gs_asid_lock);
  243. spin_unlock(&gms->ms_asid_lock);
  244. }
  245. /*
  246. * Decrement the reference count on a GTS structure. Free the structure
  247. * if the reference count goes to zero.
  248. */
  249. void gts_drop(struct gru_thread_state *gts)
  250. {
  251. if (gts && refcount_dec_and_test(&gts->ts_refcnt)) {
  252. if (gts->ts_gms)
  253. gru_drop_mmu_notifier(gts->ts_gms);
  254. kfree(gts);
  255. STAT(gts_free);
  256. }
  257. }
  258. /*
  259. * Locate the GTS structure for the current thread.
  260. */
  261. static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
  262. *vdata, int tsid)
  263. {
  264. struct gru_thread_state *gts;
  265. list_for_each_entry(gts, &vdata->vd_head, ts_next)
  266. if (gts->ts_tsid == tsid)
  267. return gts;
  268. return NULL;
  269. }
  270. /*
  271. * Allocate a thread state structure.
  272. */
  273. struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
  274. int cbr_au_count, int dsr_au_count,
  275. unsigned char tlb_preload_count, int options, int tsid)
  276. {
  277. struct gru_thread_state *gts;
  278. struct gru_mm_struct *gms;
  279. int bytes;
  280. bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count);
  281. bytes += sizeof(struct gru_thread_state);
  282. gts = kmalloc(bytes, GFP_KERNEL);
  283. if (!gts)
  284. return ERR_PTR(-ENOMEM);
  285. STAT(gts_alloc);
  286. memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */
  287. refcount_set(&gts->ts_refcnt, 1);
  288. mutex_init(&gts->ts_ctxlock);
  289. gts->ts_cbr_au_count = cbr_au_count;
  290. gts->ts_dsr_au_count = dsr_au_count;
  291. gts->ts_tlb_preload_count = tlb_preload_count;
  292. gts->ts_user_options = options;
  293. gts->ts_user_blade_id = -1;
  294. gts->ts_user_chiplet_id = -1;
  295. gts->ts_tsid = tsid;
  296. gts->ts_ctxnum = NULLCTX;
  297. gts->ts_tlb_int_select = -1;
  298. gts->ts_cch_req_slice = -1;
  299. gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT);
  300. if (vma) {
  301. gts->ts_mm = current->mm;
  302. gts->ts_vma = vma;
  303. gms = gru_register_mmu_notifier();
  304. if (IS_ERR(gms))
  305. goto err;
  306. gts->ts_gms = gms;
  307. }
  308. gru_dbg(grudev, "alloc gts %p\n", gts);
  309. return gts;
  310. err:
  311. gts_drop(gts);
  312. return ERR_CAST(gms);
  313. }
  314. /*
  315. * Allocate a vma private data structure.
  316. */
  317. struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid)
  318. {
  319. struct gru_vma_data *vdata = NULL;
  320. vdata = kmalloc_obj(*vdata);
  321. if (!vdata)
  322. return NULL;
  323. STAT(vdata_alloc);
  324. INIT_LIST_HEAD(&vdata->vd_head);
  325. spin_lock_init(&vdata->vd_lock);
  326. gru_dbg(grudev, "alloc vdata %p\n", vdata);
  327. return vdata;
  328. }
  329. /*
  330. * Find the thread state structure for the current thread.
  331. */
  332. struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma,
  333. int tsid)
  334. {
  335. struct gru_vma_data *vdata = vma->vm_private_data;
  336. struct gru_thread_state *gts;
  337. spin_lock(&vdata->vd_lock);
  338. gts = gru_find_current_gts_nolock(vdata, tsid);
  339. spin_unlock(&vdata->vd_lock);
  340. gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
  341. return gts;
  342. }
  343. /*
  344. * Allocate a new thread state for a GSEG. Note that races may allow
  345. * another thread to race to create a gts.
  346. */
  347. struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
  348. int tsid)
  349. {
  350. struct gru_vma_data *vdata = vma->vm_private_data;
  351. struct gru_thread_state *gts, *ngts;
  352. gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count,
  353. vdata->vd_dsr_au_count,
  354. vdata->vd_tlb_preload_count,
  355. vdata->vd_user_options, tsid);
  356. if (IS_ERR(gts))
  357. return gts;
  358. spin_lock(&vdata->vd_lock);
  359. ngts = gru_find_current_gts_nolock(vdata, tsid);
  360. if (ngts) {
  361. gts_drop(gts);
  362. gts = ngts;
  363. STAT(gts_double_allocate);
  364. } else {
  365. list_add(&gts->ts_next, &vdata->vd_head);
  366. }
  367. spin_unlock(&vdata->vd_lock);
  368. gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
  369. return gts;
  370. }
  371. /*
  372. * Free the GRU context assigned to the thread state.
  373. */
  374. static void gru_free_gru_context(struct gru_thread_state *gts)
  375. {
  376. struct gru_state *gru;
  377. gru = gts->ts_gru;
  378. gru_dbg(grudev, "gts %p, gid %d\n", gts, gru->gs_gid);
  379. spin_lock(&gru->gs_lock);
  380. gru->gs_gts[gts->ts_ctxnum] = NULL;
  381. free_gru_resources(gru, gts);
  382. BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0);
  383. __clear_bit(gts->ts_ctxnum, &gru->gs_context_map);
  384. gts->ts_ctxnum = NULLCTX;
  385. gts->ts_gru = NULL;
  386. gts->ts_blade = -1;
  387. spin_unlock(&gru->gs_lock);
  388. gts_drop(gts);
  389. STAT(free_context);
  390. }
  391. /*
  392. * Prefetching cachelines help hardware performance.
  393. * (Strictly a performance enhancement. Not functionally required).
  394. */
  395. static void prefetch_data(void *p, int num, int stride)
  396. {
  397. while (num-- > 0) {
  398. prefetchw(p);
  399. p += stride;
  400. }
  401. }
  402. static inline long gru_copy_handle(void *d, void *s)
  403. {
  404. memcpy(d, s, GRU_HANDLE_BYTES);
  405. return GRU_HANDLE_BYTES;
  406. }
  407. static void gru_prefetch_context(void *gseg, void *cb, void *cbe,
  408. unsigned long cbrmap, unsigned long length)
  409. {
  410. int i, scr;
  411. prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
  412. GRU_CACHE_LINE_BYTES);
  413. for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
  414. prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
  415. prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
  416. GRU_CACHE_LINE_BYTES);
  417. cb += GRU_HANDLE_STRIDE;
  418. }
  419. }
  420. static void gru_load_context_data(void *save, void *grubase, int ctxnum,
  421. unsigned long cbrmap, unsigned long dsrmap,
  422. int data_valid)
  423. {
  424. void *gseg, *cb, *cbe;
  425. unsigned long length;
  426. int i, scr;
  427. gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
  428. cb = gseg + GRU_CB_BASE;
  429. cbe = grubase + GRU_CBE_BASE;
  430. length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
  431. gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
  432. for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
  433. if (data_valid) {
  434. save += gru_copy_handle(cb, save);
  435. save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE,
  436. save);
  437. } else {
  438. memset(cb, 0, GRU_CACHE_LINE_BYTES);
  439. memset(cbe + i * GRU_HANDLE_STRIDE, 0,
  440. GRU_CACHE_LINE_BYTES);
  441. }
  442. /* Flush CBE to hide race in context restart */
  443. mb();
  444. gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
  445. cb += GRU_HANDLE_STRIDE;
  446. }
  447. if (data_valid)
  448. memcpy(gseg + GRU_DS_BASE, save, length);
  449. else
  450. memset(gseg + GRU_DS_BASE, 0, length);
  451. }
  452. static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
  453. unsigned long cbrmap, unsigned long dsrmap)
  454. {
  455. void *gseg, *cb, *cbe;
  456. unsigned long length;
  457. int i, scr;
  458. gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
  459. cb = gseg + GRU_CB_BASE;
  460. cbe = grubase + GRU_CBE_BASE;
  461. length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
  462. /* CBEs may not be coherent. Flush them from cache */
  463. for_each_cbr_in_allocation_map(i, &cbrmap, scr)
  464. gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
  465. mb(); /* Let the CL flush complete */
  466. gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
  467. for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
  468. save += gru_copy_handle(save, cb);
  469. save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
  470. cb += GRU_HANDLE_STRIDE;
  471. }
  472. memcpy(save, gseg + GRU_DS_BASE, length);
  473. }
  474. void gru_unload_context(struct gru_thread_state *gts, int savestate)
  475. {
  476. struct gru_state *gru = gts->ts_gru;
  477. struct gru_context_configuration_handle *cch;
  478. int ctxnum = gts->ts_ctxnum;
  479. if (!is_kernel_context(gts))
  480. zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
  481. cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
  482. gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n",
  483. gts, gts->ts_cbr_map, gts->ts_dsr_map);
  484. lock_cch_handle(cch);
  485. if (cch_interrupt_sync(cch))
  486. BUG();
  487. if (!is_kernel_context(gts))
  488. gru_unload_mm_tracker(gru, gts);
  489. if (savestate) {
  490. gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
  491. ctxnum, gts->ts_cbr_map,
  492. gts->ts_dsr_map);
  493. gts->ts_data_valid = 1;
  494. }
  495. if (cch_deallocate(cch))
  496. BUG();
  497. unlock_cch_handle(cch);
  498. gru_free_gru_context(gts);
  499. }
  500. /*
  501. * Load a GRU context by copying it from the thread data structure in memory
  502. * to the GRU.
  503. */
  504. void gru_load_context(struct gru_thread_state *gts)
  505. {
  506. struct gru_state *gru = gts->ts_gru;
  507. struct gru_context_configuration_handle *cch;
  508. int i, err, asid, ctxnum = gts->ts_ctxnum;
  509. cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
  510. lock_cch_handle(cch);
  511. cch->tfm_fault_bit_enable =
  512. (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
  513. || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
  514. cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
  515. if (cch->tlb_int_enable) {
  516. gts->ts_tlb_int_select = gru_cpu_fault_map_id();
  517. cch->tlb_int_select = gts->ts_tlb_int_select;
  518. }
  519. if (gts->ts_cch_req_slice >= 0) {
  520. cch->req_slice_set_enable = 1;
  521. cch->req_slice = gts->ts_cch_req_slice;
  522. } else {
  523. cch->req_slice_set_enable =0;
  524. }
  525. cch->tfm_done_bit_enable = 0;
  526. cch->dsr_allocation_map = gts->ts_dsr_map;
  527. cch->cbr_allocation_map = gts->ts_cbr_map;
  528. if (is_kernel_context(gts)) {
  529. cch->unmap_enable = 1;
  530. cch->tfm_done_bit_enable = 1;
  531. cch->cb_int_enable = 1;
  532. cch->tlb_int_select = 0; /* For now, ints go to cpu 0 */
  533. } else {
  534. cch->unmap_enable = 0;
  535. cch->tfm_done_bit_enable = 0;
  536. cch->cb_int_enable = 0;
  537. asid = gru_load_mm_tracker(gru, gts);
  538. for (i = 0; i < 8; i++) {
  539. cch->asid[i] = asid + i;
  540. cch->sizeavail[i] = gts->ts_sizeavail;
  541. }
  542. }
  543. err = cch_allocate(cch);
  544. if (err) {
  545. gru_dbg(grudev,
  546. "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
  547. err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map);
  548. BUG();
  549. }
  550. gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
  551. gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid);
  552. if (cch_start(cch))
  553. BUG();
  554. unlock_cch_handle(cch);
  555. gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n",
  556. gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map,
  557. (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select);
  558. }
  559. /*
  560. * Update fields in an active CCH:
  561. * - retarget interrupts on local blade
  562. * - update sizeavail mask
  563. */
  564. int gru_update_cch(struct gru_thread_state *gts)
  565. {
  566. struct gru_context_configuration_handle *cch;
  567. struct gru_state *gru = gts->ts_gru;
  568. int i, ctxnum = gts->ts_ctxnum, ret = 0;
  569. cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
  570. lock_cch_handle(cch);
  571. if (cch->state == CCHSTATE_ACTIVE) {
  572. if (gru->gs_gts[gts->ts_ctxnum] != gts)
  573. goto exit;
  574. if (cch_interrupt(cch))
  575. BUG();
  576. for (i = 0; i < 8; i++)
  577. cch->sizeavail[i] = gts->ts_sizeavail;
  578. gts->ts_tlb_int_select = gru_cpu_fault_map_id();
  579. cch->tlb_int_select = gru_cpu_fault_map_id();
  580. cch->tfm_fault_bit_enable =
  581. (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
  582. || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
  583. if (cch_start(cch))
  584. BUG();
  585. ret = 1;
  586. }
  587. exit:
  588. unlock_cch_handle(cch);
  589. return ret;
  590. }
  591. /*
  592. * Update CCH tlb interrupt select. Required when all the following is true:
  593. * - task's GRU context is loaded into a GRU
  594. * - task is using interrupt notification for TLB faults
  595. * - task has migrated to a different cpu on the same blade where
  596. * it was previously running.
  597. */
  598. static int gru_retarget_intr(struct gru_thread_state *gts)
  599. {
  600. if (gts->ts_tlb_int_select < 0
  601. || gts->ts_tlb_int_select == gru_cpu_fault_map_id())
  602. return 0;
  603. gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
  604. gru_cpu_fault_map_id());
  605. return gru_update_cch(gts);
  606. }
  607. /*
  608. * Check if a GRU context is allowed to use a specific chiplet. By default
  609. * a context is assigned to any blade-local chiplet. However, users can
  610. * override this.
  611. * Returns 1 if assignment allowed, 0 otherwise
  612. */
  613. static int gru_check_chiplet_assignment(struct gru_state *gru,
  614. struct gru_thread_state *gts)
  615. {
  616. int blade_id;
  617. int chiplet_id;
  618. blade_id = gts->ts_user_blade_id;
  619. if (blade_id < 0)
  620. blade_id = uv_numa_blade_id();
  621. chiplet_id = gts->ts_user_chiplet_id;
  622. return gru->gs_blade_id == blade_id &&
  623. (chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id);
  624. }
  625. /*
  626. * Unload the gru context if it is not assigned to the correct blade or
  627. * chiplet. Misassignment can occur if the process migrates to a different
  628. * blade or if the user changes the selected blade/chiplet.
  629. */
  630. int gru_check_context_placement(struct gru_thread_state *gts)
  631. {
  632. struct gru_state *gru;
  633. int ret = 0;
  634. /*
  635. * If the current task is the context owner, verify that the
  636. * context is correctly placed. This test is skipped for non-owner
  637. * references. Pthread apps use non-owner references to the CBRs.
  638. */
  639. gru = gts->ts_gru;
  640. /*
  641. * If gru or gts->ts_tgid_owner isn't initialized properly, return
  642. * success to indicate that the caller does not need to unload the
  643. * gru context.The caller is responsible for their inspection and
  644. * reinitialization if needed.
  645. */
  646. if (!gru || gts->ts_tgid_owner != current->tgid)
  647. return ret;
  648. if (!gru_check_chiplet_assignment(gru, gts)) {
  649. STAT(check_context_unload);
  650. ret = -EINVAL;
  651. } else if (gru_retarget_intr(gts)) {
  652. STAT(check_context_retarget_intr);
  653. }
  654. return ret;
  655. }
  656. /*
  657. * Insufficient GRU resources available on the local blade. Steal a context from
  658. * a process. This is a hack until a _real_ resource scheduler is written....
  659. */
  660. #define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0)
  661. #define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \
  662. ((g)+1) : &(b)->bs_grus[0])
  663. static int is_gts_stealable(struct gru_thread_state *gts,
  664. struct gru_blade_state *bs)
  665. {
  666. if (is_kernel_context(gts))
  667. return down_write_trylock(&bs->bs_kgts_sema);
  668. else
  669. return mutex_trylock(&gts->ts_ctxlock);
  670. }
  671. static void gts_stolen(struct gru_thread_state *gts,
  672. struct gru_blade_state *bs)
  673. {
  674. if (is_kernel_context(gts)) {
  675. up_write(&bs->bs_kgts_sema);
  676. STAT(steal_kernel_context);
  677. } else {
  678. mutex_unlock(&gts->ts_ctxlock);
  679. STAT(steal_user_context);
  680. }
  681. }
  682. void gru_steal_context(struct gru_thread_state *gts)
  683. {
  684. struct gru_blade_state *blade;
  685. struct gru_state *gru, *gru0;
  686. struct gru_thread_state *ngts = NULL;
  687. int ctxnum, ctxnum0, flag = 0, cbr, dsr;
  688. int blade_id;
  689. blade_id = gts->ts_user_blade_id;
  690. if (blade_id < 0)
  691. blade_id = uv_numa_blade_id();
  692. cbr = gts->ts_cbr_au_count;
  693. dsr = gts->ts_dsr_au_count;
  694. blade = gru_base[blade_id];
  695. spin_lock(&blade->bs_lock);
  696. ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
  697. gru = blade->bs_lru_gru;
  698. if (ctxnum == 0)
  699. gru = next_gru(blade, gru);
  700. blade->bs_lru_gru = gru;
  701. blade->bs_lru_ctxnum = ctxnum;
  702. ctxnum0 = ctxnum;
  703. gru0 = gru;
  704. while (1) {
  705. if (gru_check_chiplet_assignment(gru, gts)) {
  706. if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
  707. break;
  708. spin_lock(&gru->gs_lock);
  709. for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
  710. if (flag && gru == gru0 && ctxnum == ctxnum0)
  711. break;
  712. ngts = gru->gs_gts[ctxnum];
  713. /*
  714. * We are grabbing locks out of order, so trylock is
  715. * needed. GTSs are usually not locked, so the odds of
  716. * success are high. If trylock fails, try to steal a
  717. * different GSEG.
  718. */
  719. if (ngts && is_gts_stealable(ngts, blade))
  720. break;
  721. ngts = NULL;
  722. }
  723. spin_unlock(&gru->gs_lock);
  724. if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
  725. break;
  726. }
  727. if (flag && gru == gru0)
  728. break;
  729. flag = 1;
  730. ctxnum = 0;
  731. gru = next_gru(blade, gru);
  732. }
  733. spin_unlock(&blade->bs_lock);
  734. if (ngts) {
  735. gts->ustats.context_stolen++;
  736. ngts->ts_steal_jiffies = jiffies;
  737. gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1);
  738. gts_stolen(ngts, blade);
  739. } else {
  740. STAT(steal_context_failed);
  741. }
  742. gru_dbg(grudev,
  743. "stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;"
  744. " avail cb %ld, ds %ld\n",
  745. gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
  746. hweight64(gru->gs_dsr_map));
  747. }
  748. /*
  749. * Assign a gru context.
  750. */
  751. static int gru_assign_context_number(struct gru_state *gru)
  752. {
  753. int ctxnum;
  754. ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
  755. __set_bit(ctxnum, &gru->gs_context_map);
  756. return ctxnum;
  757. }
  758. /*
  759. * Scan the GRUs on the local blade & assign a GRU context.
  760. */
  761. struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
  762. {
  763. struct gru_state *gru, *grux;
  764. int i, max_active_contexts;
  765. int blade_id = gts->ts_user_blade_id;
  766. if (blade_id < 0)
  767. blade_id = uv_numa_blade_id();
  768. again:
  769. gru = NULL;
  770. max_active_contexts = GRU_NUM_CCH;
  771. for_each_gru_on_blade(grux, blade_id, i) {
  772. if (!gru_check_chiplet_assignment(grux, gts))
  773. continue;
  774. if (check_gru_resources(grux, gts->ts_cbr_au_count,
  775. gts->ts_dsr_au_count,
  776. max_active_contexts)) {
  777. gru = grux;
  778. max_active_contexts = grux->gs_active_contexts;
  779. if (max_active_contexts == 0)
  780. break;
  781. }
  782. }
  783. if (gru) {
  784. spin_lock(&gru->gs_lock);
  785. if (!check_gru_resources(gru, gts->ts_cbr_au_count,
  786. gts->ts_dsr_au_count, GRU_NUM_CCH)) {
  787. spin_unlock(&gru->gs_lock);
  788. goto again;
  789. }
  790. reserve_gru_resources(gru, gts);
  791. gts->ts_gru = gru;
  792. gts->ts_blade = gru->gs_blade_id;
  793. gts->ts_ctxnum = gru_assign_context_number(gru);
  794. refcount_inc(&gts->ts_refcnt);
  795. gru->gs_gts[gts->ts_ctxnum] = gts;
  796. spin_unlock(&gru->gs_lock);
  797. STAT(assign_context);
  798. gru_dbg(grudev,
  799. "gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n",
  800. gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts,
  801. gts->ts_gru->gs_gid, gts->ts_ctxnum,
  802. gts->ts_cbr_au_count, gts->ts_dsr_au_count);
  803. } else {
  804. gru_dbg(grudev, "failed to allocate a GTS %s\n", "");
  805. STAT(assign_context_failed);
  806. }
  807. return gru;
  808. }
  809. /*
  810. * gru_nopage
  811. *
  812. * Map the user's GRU segment
  813. *
  814. * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
  815. */
  816. vm_fault_t gru_fault(struct vm_fault *vmf)
  817. {
  818. struct vm_area_struct *vma = vmf->vma;
  819. struct gru_thread_state *gts;
  820. unsigned long paddr, vaddr;
  821. unsigned long expires;
  822. vaddr = vmf->address;
  823. gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
  824. vma, vaddr, GSEG_BASE(vaddr));
  825. STAT(nopfn);
  826. /* The following check ensures vaddr is a valid address in the VMA */
  827. gts = gru_find_thread_state(vma, TSID(vaddr, vma));
  828. if (!gts)
  829. return VM_FAULT_SIGBUS;
  830. again:
  831. mutex_lock(&gts->ts_ctxlock);
  832. if (gru_check_context_placement(gts)) {
  833. mutex_unlock(&gts->ts_ctxlock);
  834. gru_unload_context(gts, 1);
  835. return VM_FAULT_NOPAGE;
  836. }
  837. if (!gts->ts_gru) {
  838. STAT(load_user_context);
  839. if (!gru_assign_gru_context(gts)) {
  840. mutex_unlock(&gts->ts_ctxlock);
  841. set_current_state(TASK_INTERRUPTIBLE);
  842. schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
  843. expires = gts->ts_steal_jiffies + GRU_STEAL_DELAY;
  844. if (time_before(expires, jiffies))
  845. gru_steal_context(gts);
  846. goto again;
  847. }
  848. gru_load_context(gts);
  849. paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum);
  850. remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1),
  851. paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE,
  852. vma->vm_page_prot);
  853. }
  854. mutex_unlock(&gts->ts_ctxlock);
  855. return VM_FAULT_NOPAGE;
  856. }