virtio_fs.c 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * virtio-fs: Virtio Filesystem
  4. * Copyright (C) 2018 Red Hat, Inc.
  5. */
  6. #include <linux/fs.h>
  7. #include <linux/dax.h>
  8. #include <linux/pci.h>
  9. #include <linux/interrupt.h>
  10. #include <linux/group_cpus.h>
  11. #include <linux/memremap.h>
  12. #include <linux/module.h>
  13. #include <linux/virtio.h>
  14. #include <linux/virtio_fs.h>
  15. #include <linux/delay.h>
  16. #include <linux/fs_context.h>
  17. #include <linux/fs_parser.h>
  18. #include <linux/highmem.h>
  19. #include <linux/cleanup.h>
  20. #include <linux/uio.h>
  21. #include "fuse_i.h"
  22. #include "fuse_dev_i.h"
  23. /* Used to help calculate the FUSE connection's max_pages limit for a request's
  24. * size. Parts of the struct fuse_req are sliced into scattergather lists in
  25. * addition to the pages used, so this can help account for that overhead.
  26. */
  27. #define FUSE_HEADER_OVERHEAD 4
  28. /* List of virtio-fs device instances and a lock for the list. Also provides
  29. * mutual exclusion in device removal and mounting path
  30. */
  31. static DEFINE_MUTEX(virtio_fs_mutex);
  32. static LIST_HEAD(virtio_fs_instances);
  33. /* The /sys/fs/virtio_fs/ kset */
  34. static struct kset *virtio_fs_kset;
  35. enum {
  36. VQ_HIPRIO,
  37. VQ_REQUEST
  38. };
  39. #define VQ_NAME_LEN 24
  40. /* Per-virtqueue state */
  41. struct virtio_fs_vq {
  42. spinlock_t lock;
  43. struct virtqueue *vq; /* protected by ->lock */
  44. struct work_struct done_work;
  45. struct list_head queued_reqs;
  46. struct list_head end_reqs; /* End these requests */
  47. struct work_struct dispatch_work;
  48. struct fuse_dev *fud;
  49. bool connected;
  50. long in_flight;
  51. struct completion in_flight_zero; /* No inflight requests */
  52. struct kobject *kobj;
  53. char name[VQ_NAME_LEN];
  54. } ____cacheline_aligned_in_smp;
  55. /* A virtio-fs device instance */
  56. struct virtio_fs {
  57. struct kobject kobj;
  58. struct kobject *mqs_kobj;
  59. struct list_head list; /* on virtio_fs_instances */
  60. char *tag;
  61. struct virtio_fs_vq *vqs;
  62. unsigned int nvqs; /* number of virtqueues */
  63. unsigned int num_request_queues; /* number of request queues */
  64. struct dax_device *dax_dev;
  65. unsigned int *mq_map; /* index = cpu id, value = request vq id */
  66. /* DAX memory window where file contents are mapped */
  67. void *window_kaddr;
  68. phys_addr_t window_phys_addr;
  69. size_t window_len;
  70. };
  71. struct virtio_fs_forget_req {
  72. struct fuse_in_header ih;
  73. struct fuse_forget_in arg;
  74. };
  75. struct virtio_fs_forget {
  76. /* This request can be temporarily queued on virt queue */
  77. struct list_head list;
  78. struct virtio_fs_forget_req req;
  79. };
  80. struct virtio_fs_req_work {
  81. struct fuse_req *req;
  82. struct virtio_fs_vq *fsvq;
  83. struct work_struct done_work;
  84. };
  85. static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
  86. struct fuse_req *req, bool in_flight,
  87. gfp_t gfp);
  88. static const struct constant_table dax_param_enums[] = {
  89. {"always", FUSE_DAX_ALWAYS },
  90. {"never", FUSE_DAX_NEVER },
  91. {"inode", FUSE_DAX_INODE_USER },
  92. {}
  93. };
  94. enum {
  95. OPT_DAX,
  96. OPT_DAX_ENUM,
  97. };
  98. static const struct fs_parameter_spec virtio_fs_parameters[] = {
  99. fsparam_flag("dax", OPT_DAX),
  100. fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums),
  101. {}
  102. };
  103. static int virtio_fs_parse_param(struct fs_context *fsc,
  104. struct fs_parameter *param)
  105. {
  106. struct fs_parse_result result;
  107. struct fuse_fs_context *ctx = fsc->fs_private;
  108. int opt;
  109. opt = fs_parse(fsc, virtio_fs_parameters, param, &result);
  110. if (opt < 0)
  111. return opt;
  112. switch (opt) {
  113. case OPT_DAX:
  114. ctx->dax_mode = FUSE_DAX_ALWAYS;
  115. break;
  116. case OPT_DAX_ENUM:
  117. ctx->dax_mode = result.uint_32;
  118. break;
  119. default:
  120. return -EINVAL;
  121. }
  122. return 0;
  123. }
  124. static void virtio_fs_free_fsc(struct fs_context *fsc)
  125. {
  126. struct fuse_fs_context *ctx = fsc->fs_private;
  127. kfree(ctx);
  128. }
  129. static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
  130. {
  131. struct virtio_fs *fs = vq->vdev->priv;
  132. return &fs->vqs[vq->index];
  133. }
  134. /* Should be called with fsvq->lock held. */
  135. static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
  136. {
  137. fsvq->in_flight++;
  138. }
  139. /* Should be called with fsvq->lock held. */
  140. static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
  141. {
  142. WARN_ON(fsvq->in_flight <= 0);
  143. fsvq->in_flight--;
  144. if (!fsvq->in_flight)
  145. complete(&fsvq->in_flight_zero);
  146. }
  147. static ssize_t tag_show(struct kobject *kobj,
  148. struct kobj_attribute *attr, char *buf)
  149. {
  150. struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj);
  151. return sysfs_emit(buf, "%s\n", fs->tag);
  152. }
  153. static struct kobj_attribute virtio_fs_tag_attr = __ATTR_RO(tag);
  154. static struct attribute *virtio_fs_attrs[] = {
  155. &virtio_fs_tag_attr.attr,
  156. NULL
  157. };
  158. ATTRIBUTE_GROUPS(virtio_fs);
  159. static void virtio_fs_ktype_release(struct kobject *kobj)
  160. {
  161. struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj);
  162. kfree(vfs->mq_map);
  163. kfree(vfs->vqs);
  164. kfree(vfs);
  165. }
  166. static const struct kobj_type virtio_fs_ktype = {
  167. .release = virtio_fs_ktype_release,
  168. .sysfs_ops = &kobj_sysfs_ops,
  169. .default_groups = virtio_fs_groups,
  170. };
  171. static struct virtio_fs_vq *virtio_fs_kobj_to_vq(struct virtio_fs *fs,
  172. struct kobject *kobj)
  173. {
  174. int i;
  175. for (i = 0; i < fs->nvqs; i++) {
  176. if (kobj == fs->vqs[i].kobj)
  177. return &fs->vqs[i];
  178. }
  179. return NULL;
  180. }
  181. static ssize_t name_show(struct kobject *kobj,
  182. struct kobj_attribute *attr, char *buf)
  183. {
  184. struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj);
  185. struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj);
  186. if (!fsvq)
  187. return -EINVAL;
  188. return sysfs_emit(buf, "%s\n", fsvq->name);
  189. }
  190. static struct kobj_attribute virtio_fs_vq_name_attr = __ATTR_RO(name);
  191. static ssize_t cpu_list_show(struct kobject *kobj,
  192. struct kobj_attribute *attr, char *buf)
  193. {
  194. struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj);
  195. struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj);
  196. unsigned int cpu, qid;
  197. const size_t size = PAGE_SIZE - 1;
  198. bool first = true;
  199. int ret = 0, pos = 0;
  200. if (!fsvq)
  201. return -EINVAL;
  202. qid = fsvq->vq->index;
  203. for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
  204. if (qid < VQ_REQUEST || (fs->mq_map[cpu] == qid)) {
  205. if (first)
  206. ret = snprintf(buf + pos, size - pos, "%u", cpu);
  207. else
  208. ret = snprintf(buf + pos, size - pos, ", %u", cpu);
  209. if (ret >= size - pos)
  210. break;
  211. first = false;
  212. pos += ret;
  213. }
  214. }
  215. ret = snprintf(buf + pos, size + 1 - pos, "\n");
  216. return pos + ret;
  217. }
  218. static struct kobj_attribute virtio_fs_vq_cpu_list_attr = __ATTR_RO(cpu_list);
  219. static struct attribute *virtio_fs_vq_attrs[] = {
  220. &virtio_fs_vq_name_attr.attr,
  221. &virtio_fs_vq_cpu_list_attr.attr,
  222. NULL
  223. };
  224. static struct attribute_group virtio_fs_vq_attr_group = {
  225. .attrs = virtio_fs_vq_attrs,
  226. };
  227. /* Make sure virtiofs_mutex is held */
  228. static void virtio_fs_put_locked(struct virtio_fs *fs)
  229. {
  230. lockdep_assert_held(&virtio_fs_mutex);
  231. kobject_put(&fs->kobj);
  232. }
  233. static void virtio_fs_put(struct virtio_fs *fs)
  234. {
  235. mutex_lock(&virtio_fs_mutex);
  236. virtio_fs_put_locked(fs);
  237. mutex_unlock(&virtio_fs_mutex);
  238. }
  239. static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
  240. {
  241. struct virtio_fs *vfs = fiq->priv;
  242. virtio_fs_put(vfs);
  243. }
  244. static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
  245. {
  246. WARN_ON(fsvq->in_flight < 0);
  247. /* Wait for in flight requests to finish.*/
  248. spin_lock(&fsvq->lock);
  249. if (fsvq->in_flight) {
  250. /* We are holding virtio_fs_mutex. There should not be any
  251. * waiters waiting for completion.
  252. */
  253. reinit_completion(&fsvq->in_flight_zero);
  254. spin_unlock(&fsvq->lock);
  255. wait_for_completion(&fsvq->in_flight_zero);
  256. } else {
  257. spin_unlock(&fsvq->lock);
  258. }
  259. flush_work(&fsvq->done_work);
  260. flush_work(&fsvq->dispatch_work);
  261. }
  262. static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs)
  263. {
  264. struct virtio_fs_vq *fsvq;
  265. int i;
  266. for (i = 0; i < fs->nvqs; i++) {
  267. fsvq = &fs->vqs[i];
  268. virtio_fs_drain_queue(fsvq);
  269. }
  270. }
  271. static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
  272. {
  273. /* Provides mutual exclusion between ->remove and ->kill_sb
  274. * paths. We don't want both of these draining queue at the
  275. * same time. Current completion logic reinits completion
  276. * and that means there should not be any other thread
  277. * doing reinit or waiting for completion already.
  278. */
  279. mutex_lock(&virtio_fs_mutex);
  280. virtio_fs_drain_all_queues_locked(fs);
  281. mutex_unlock(&virtio_fs_mutex);
  282. }
  283. static void virtio_fs_start_all_queues(struct virtio_fs *fs)
  284. {
  285. struct virtio_fs_vq *fsvq;
  286. int i;
  287. for (i = 0; i < fs->nvqs; i++) {
  288. fsvq = &fs->vqs[i];
  289. spin_lock(&fsvq->lock);
  290. fsvq->connected = true;
  291. spin_unlock(&fsvq->lock);
  292. }
  293. }
  294. static void virtio_fs_delete_queues_sysfs(struct virtio_fs *fs)
  295. {
  296. struct virtio_fs_vq *fsvq;
  297. int i;
  298. for (i = 0; i < fs->nvqs; i++) {
  299. fsvq = &fs->vqs[i];
  300. kobject_put(fsvq->kobj);
  301. }
  302. }
  303. static int virtio_fs_add_queues_sysfs(struct virtio_fs *fs)
  304. {
  305. struct virtio_fs_vq *fsvq;
  306. char buff[12];
  307. int i, j, ret;
  308. for (i = 0; i < fs->nvqs; i++) {
  309. fsvq = &fs->vqs[i];
  310. sprintf(buff, "%d", i);
  311. fsvq->kobj = kobject_create_and_add(buff, fs->mqs_kobj);
  312. if (!fsvq->kobj) {
  313. ret = -ENOMEM;
  314. goto out_del;
  315. }
  316. ret = sysfs_create_group(fsvq->kobj, &virtio_fs_vq_attr_group);
  317. if (ret) {
  318. kobject_put(fsvq->kobj);
  319. goto out_del;
  320. }
  321. }
  322. return 0;
  323. out_del:
  324. for (j = 0; j < i; j++) {
  325. fsvq = &fs->vqs[j];
  326. kobject_put(fsvq->kobj);
  327. }
  328. return ret;
  329. }
  330. /* Add a new instance to the list or return -EEXIST if tag name exists*/
  331. static int virtio_fs_add_instance(struct virtio_device *vdev,
  332. struct virtio_fs *fs)
  333. {
  334. struct virtio_fs *fs2;
  335. int ret;
  336. mutex_lock(&virtio_fs_mutex);
  337. list_for_each_entry(fs2, &virtio_fs_instances, list) {
  338. if (strcmp(fs->tag, fs2->tag) == 0) {
  339. mutex_unlock(&virtio_fs_mutex);
  340. return -EEXIST;
  341. }
  342. }
  343. /* Use the virtio_device's index as a unique identifier, there is no
  344. * need to allocate our own identifiers because the virtio_fs instance
  345. * is only visible to userspace as long as the underlying virtio_device
  346. * exists.
  347. */
  348. fs->kobj.kset = virtio_fs_kset;
  349. ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index);
  350. if (ret < 0)
  351. goto out_unlock;
  352. fs->mqs_kobj = kobject_create_and_add("mqs", &fs->kobj);
  353. if (!fs->mqs_kobj) {
  354. ret = -ENOMEM;
  355. goto out_del;
  356. }
  357. ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device");
  358. if (ret < 0)
  359. goto out_put;
  360. ret = virtio_fs_add_queues_sysfs(fs);
  361. if (ret)
  362. goto out_remove;
  363. list_add_tail(&fs->list, &virtio_fs_instances);
  364. mutex_unlock(&virtio_fs_mutex);
  365. kobject_uevent(&fs->kobj, KOBJ_ADD);
  366. return 0;
  367. out_remove:
  368. sysfs_remove_link(&fs->kobj, "device");
  369. out_put:
  370. kobject_put(fs->mqs_kobj);
  371. out_del:
  372. kobject_del(&fs->kobj);
  373. out_unlock:
  374. mutex_unlock(&virtio_fs_mutex);
  375. return ret;
  376. }
  377. /* Return the virtio_fs with a given tag, or NULL */
  378. static struct virtio_fs *virtio_fs_find_instance(const char *tag)
  379. {
  380. struct virtio_fs *fs;
  381. mutex_lock(&virtio_fs_mutex);
  382. list_for_each_entry(fs, &virtio_fs_instances, list) {
  383. if (strcmp(fs->tag, tag) == 0) {
  384. kobject_get(&fs->kobj);
  385. goto found;
  386. }
  387. }
  388. fs = NULL; /* not found */
  389. found:
  390. mutex_unlock(&virtio_fs_mutex);
  391. return fs;
  392. }
  393. static void virtio_fs_free_devs(struct virtio_fs *fs)
  394. {
  395. unsigned int i;
  396. for (i = 0; i < fs->nvqs; i++) {
  397. struct virtio_fs_vq *fsvq = &fs->vqs[i];
  398. if (!fsvq->fud)
  399. continue;
  400. fuse_dev_free(fsvq->fud);
  401. fsvq->fud = NULL;
  402. }
  403. }
  404. /* Read filesystem name from virtio config into fs->tag (must kfree()). */
  405. static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
  406. {
  407. char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
  408. char *end;
  409. size_t len;
  410. virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
  411. &tag_buf, sizeof(tag_buf));
  412. end = memchr(tag_buf, '\0', sizeof(tag_buf));
  413. if (end == tag_buf)
  414. return -EINVAL; /* empty tag */
  415. if (!end)
  416. end = &tag_buf[sizeof(tag_buf)];
  417. len = end - tag_buf;
  418. fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
  419. if (!fs->tag)
  420. return -ENOMEM;
  421. memcpy(fs->tag, tag_buf, len);
  422. fs->tag[len] = '\0';
  423. /* While the VIRTIO specification allows any character, newlines are
  424. * awkward on mount(8) command-lines and cause problems in the sysfs
  425. * "tag" attr and uevent TAG= properties. Forbid them.
  426. */
  427. if (strchr(fs->tag, '\n')) {
  428. dev_dbg(&vdev->dev, "refusing virtiofs tag with newline character\n");
  429. return -EINVAL;
  430. }
  431. dev_info(&vdev->dev, "discovered new tag: %s\n", fs->tag);
  432. return 0;
  433. }
  434. /* Work function for hiprio completion */
  435. static void virtio_fs_hiprio_done_work(struct work_struct *work)
  436. {
  437. struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
  438. done_work);
  439. struct virtqueue *vq = fsvq->vq;
  440. /* Free completed FUSE_FORGET requests */
  441. spin_lock(&fsvq->lock);
  442. do {
  443. unsigned int len;
  444. void *req;
  445. virtqueue_disable_cb(vq);
  446. while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
  447. kfree(req);
  448. dec_in_flight_req(fsvq);
  449. }
  450. } while (!virtqueue_enable_cb(vq));
  451. if (!list_empty(&fsvq->queued_reqs))
  452. schedule_work(&fsvq->dispatch_work);
  453. spin_unlock(&fsvq->lock);
  454. }
  455. static void virtio_fs_request_dispatch_work(struct work_struct *work)
  456. {
  457. struct fuse_req *req;
  458. struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
  459. dispatch_work);
  460. int ret;
  461. pr_debug("virtio-fs: worker %s called.\n", __func__);
  462. while (1) {
  463. spin_lock(&fsvq->lock);
  464. req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
  465. list);
  466. if (!req) {
  467. spin_unlock(&fsvq->lock);
  468. break;
  469. }
  470. list_del_init(&req->list);
  471. spin_unlock(&fsvq->lock);
  472. fuse_request_end(req);
  473. }
  474. /* Dispatch pending requests */
  475. while (1) {
  476. unsigned int flags;
  477. spin_lock(&fsvq->lock);
  478. req = list_first_entry_or_null(&fsvq->queued_reqs,
  479. struct fuse_req, list);
  480. if (!req) {
  481. spin_unlock(&fsvq->lock);
  482. return;
  483. }
  484. list_del_init(&req->list);
  485. spin_unlock(&fsvq->lock);
  486. flags = memalloc_nofs_save();
  487. ret = virtio_fs_enqueue_req(fsvq, req, true, GFP_KERNEL);
  488. memalloc_nofs_restore(flags);
  489. if (ret < 0) {
  490. if (ret == -ENOSPC) {
  491. spin_lock(&fsvq->lock);
  492. list_add_tail(&req->list, &fsvq->queued_reqs);
  493. spin_unlock(&fsvq->lock);
  494. return;
  495. }
  496. req->out.h.error = ret;
  497. spin_lock(&fsvq->lock);
  498. dec_in_flight_req(fsvq);
  499. spin_unlock(&fsvq->lock);
  500. pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
  501. ret);
  502. fuse_request_end(req);
  503. }
  504. }
  505. }
  506. /*
  507. * Returns 1 if queue is full and sender should wait a bit before sending
  508. * next request, 0 otherwise.
  509. */
  510. static int send_forget_request(struct virtio_fs_vq *fsvq,
  511. struct virtio_fs_forget *forget,
  512. bool in_flight)
  513. {
  514. struct scatterlist sg;
  515. struct virtqueue *vq;
  516. int ret = 0;
  517. bool notify;
  518. struct virtio_fs_forget_req *req = &forget->req;
  519. spin_lock(&fsvq->lock);
  520. if (!fsvq->connected) {
  521. if (in_flight)
  522. dec_in_flight_req(fsvq);
  523. kfree(forget);
  524. goto out;
  525. }
  526. sg_init_one(&sg, req, sizeof(*req));
  527. vq = fsvq->vq;
  528. dev_dbg(&vq->vdev->dev, "%s\n", __func__);
  529. ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC);
  530. if (ret < 0) {
  531. if (ret == -ENOSPC) {
  532. pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
  533. ret);
  534. list_add_tail(&forget->list, &fsvq->queued_reqs);
  535. if (!in_flight)
  536. inc_in_flight_req(fsvq);
  537. /* Queue is full */
  538. ret = 1;
  539. } else {
  540. pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
  541. ret);
  542. kfree(forget);
  543. if (in_flight)
  544. dec_in_flight_req(fsvq);
  545. }
  546. goto out;
  547. }
  548. if (!in_flight)
  549. inc_in_flight_req(fsvq);
  550. notify = virtqueue_kick_prepare(vq);
  551. spin_unlock(&fsvq->lock);
  552. if (notify)
  553. virtqueue_notify(vq);
  554. return ret;
  555. out:
  556. spin_unlock(&fsvq->lock);
  557. return ret;
  558. }
  559. static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
  560. {
  561. struct virtio_fs_forget *forget;
  562. struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
  563. dispatch_work);
  564. pr_debug("virtio-fs: worker %s called.\n", __func__);
  565. while (1) {
  566. spin_lock(&fsvq->lock);
  567. forget = list_first_entry_or_null(&fsvq->queued_reqs,
  568. struct virtio_fs_forget, list);
  569. if (!forget) {
  570. spin_unlock(&fsvq->lock);
  571. return;
  572. }
  573. list_del(&forget->list);
  574. spin_unlock(&fsvq->lock);
  575. if (send_forget_request(fsvq, forget, true))
  576. return;
  577. }
  578. }
  579. /* Allocate and copy args into req->argbuf */
  580. static int copy_args_to_argbuf(struct fuse_req *req, gfp_t gfp)
  581. {
  582. struct fuse_args *args = req->args;
  583. unsigned int offset = 0;
  584. unsigned int num_in;
  585. unsigned int num_out;
  586. unsigned int len;
  587. unsigned int i;
  588. num_in = args->in_numargs - args->in_pages;
  589. num_out = args->out_numargs - args->out_pages;
  590. len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
  591. fuse_len_args(num_out, args->out_args);
  592. req->argbuf = kmalloc(len, gfp);
  593. if (!req->argbuf)
  594. return -ENOMEM;
  595. for (i = 0; i < num_in; i++) {
  596. memcpy(req->argbuf + offset,
  597. args->in_args[i].value,
  598. args->in_args[i].size);
  599. offset += args->in_args[i].size;
  600. }
  601. return 0;
  602. }
  603. /* Copy args out of and free req->argbuf */
  604. static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
  605. {
  606. unsigned int remaining;
  607. unsigned int offset;
  608. unsigned int num_in;
  609. unsigned int num_out;
  610. unsigned int i;
  611. remaining = req->out.h.len - sizeof(req->out.h);
  612. num_in = args->in_numargs - args->in_pages;
  613. num_out = args->out_numargs - args->out_pages;
  614. offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
  615. for (i = 0; i < num_out; i++) {
  616. unsigned int argsize = args->out_args[i].size;
  617. if (args->out_argvar &&
  618. i == args->out_numargs - 1 &&
  619. argsize > remaining) {
  620. argsize = remaining;
  621. }
  622. memcpy(args->out_args[i].value, req->argbuf + offset, argsize);
  623. offset += argsize;
  624. if (i != args->out_numargs - 1)
  625. remaining -= argsize;
  626. }
  627. /* Store the actual size of the variable-length arg */
  628. if (args->out_argvar)
  629. args->out_args[args->out_numargs - 1].size = remaining;
  630. kfree(req->argbuf);
  631. req->argbuf = NULL;
  632. }
  633. /* Work function for request completion */
  634. static void virtio_fs_request_complete(struct fuse_req *req,
  635. struct virtio_fs_vq *fsvq)
  636. {
  637. struct fuse_args *args;
  638. struct fuse_args_pages *ap;
  639. unsigned int len, i, thislen;
  640. struct folio *folio;
  641. /*
  642. * TODO verify that server properly follows FUSE protocol
  643. * (oh.uniq, oh.len)
  644. */
  645. args = req->args;
  646. copy_args_from_argbuf(args, req);
  647. if (args->out_pages && args->page_zeroing) {
  648. len = args->out_args[args->out_numargs - 1].size;
  649. ap = container_of(args, typeof(*ap), args);
  650. for (i = 0; i < ap->num_folios; i++) {
  651. thislen = ap->descs[i].length;
  652. if (len < thislen) {
  653. WARN_ON(ap->descs[i].offset);
  654. folio = ap->folios[i];
  655. folio_zero_segment(folio, len, thislen);
  656. len = 0;
  657. } else {
  658. len -= thislen;
  659. }
  660. }
  661. }
  662. clear_bit(FR_SENT, &req->flags);
  663. fuse_request_end(req);
  664. spin_lock(&fsvq->lock);
  665. dec_in_flight_req(fsvq);
  666. spin_unlock(&fsvq->lock);
  667. }
  668. static void virtio_fs_complete_req_work(struct work_struct *work)
  669. {
  670. struct virtio_fs_req_work *w =
  671. container_of(work, typeof(*w), done_work);
  672. virtio_fs_request_complete(w->req, w->fsvq);
  673. kfree(w);
  674. }
  675. static void virtio_fs_requests_done_work(struct work_struct *work)
  676. {
  677. struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
  678. done_work);
  679. struct fuse_pqueue *fpq = &fsvq->fud->pq;
  680. struct virtqueue *vq = fsvq->vq;
  681. struct fuse_req *req;
  682. struct fuse_req *next;
  683. unsigned int len;
  684. LIST_HEAD(reqs);
  685. /* Collect completed requests off the virtqueue */
  686. spin_lock(&fsvq->lock);
  687. do {
  688. virtqueue_disable_cb(vq);
  689. while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
  690. spin_lock(&fpq->lock);
  691. list_move_tail(&req->list, &reqs);
  692. spin_unlock(&fpq->lock);
  693. }
  694. } while (!virtqueue_enable_cb(vq));
  695. spin_unlock(&fsvq->lock);
  696. /* End requests */
  697. list_for_each_entry_safe(req, next, &reqs, list) {
  698. list_del_init(&req->list);
  699. /* blocking async request completes in a worker context */
  700. if (req->args->may_block) {
  701. struct virtio_fs_req_work *w;
  702. w = kzalloc_obj(*w, GFP_NOFS | __GFP_NOFAIL);
  703. INIT_WORK(&w->done_work, virtio_fs_complete_req_work);
  704. w->fsvq = fsvq;
  705. w->req = req;
  706. schedule_work(&w->done_work);
  707. } else {
  708. virtio_fs_request_complete(req, fsvq);
  709. }
  710. }
  711. /* Try to push previously queued requests, as the queue might no longer be full */
  712. spin_lock(&fsvq->lock);
  713. if (!list_empty(&fsvq->queued_reqs))
  714. schedule_work(&fsvq->dispatch_work);
  715. spin_unlock(&fsvq->lock);
  716. }
  717. static void virtio_fs_map_queues(struct virtio_device *vdev, struct virtio_fs *fs)
  718. {
  719. const struct cpumask *mask, *masks;
  720. unsigned int q, cpu, nr_masks;
  721. /* First attempt to map using existing transport layer affinities
  722. * e.g. PCIe MSI-X
  723. */
  724. if (!vdev->config->get_vq_affinity)
  725. goto fallback;
  726. for (q = 0; q < fs->num_request_queues; q++) {
  727. mask = vdev->config->get_vq_affinity(vdev, VQ_REQUEST + q);
  728. if (!mask)
  729. goto fallback;
  730. for_each_cpu(cpu, mask)
  731. fs->mq_map[cpu] = q + VQ_REQUEST;
  732. }
  733. return;
  734. fallback:
  735. /* Attempt to map evenly in groups over the CPUs */
  736. masks = group_cpus_evenly(fs->num_request_queues, &nr_masks);
  737. /* If even this fails we default to all CPUs use first request queue */
  738. if (!masks) {
  739. for_each_possible_cpu(cpu)
  740. fs->mq_map[cpu] = VQ_REQUEST;
  741. return;
  742. }
  743. for (q = 0; q < fs->num_request_queues; q++) {
  744. for_each_cpu(cpu, &masks[q % nr_masks])
  745. fs->mq_map[cpu] = q + VQ_REQUEST;
  746. }
  747. kfree(masks);
  748. }
  749. /* Virtqueue interrupt handler */
  750. static void virtio_fs_vq_done(struct virtqueue *vq)
  751. {
  752. struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
  753. dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
  754. schedule_work(&fsvq->done_work);
  755. }
  756. static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name,
  757. int vq_type)
  758. {
  759. strscpy(fsvq->name, name, VQ_NAME_LEN);
  760. spin_lock_init(&fsvq->lock);
  761. INIT_LIST_HEAD(&fsvq->queued_reqs);
  762. INIT_LIST_HEAD(&fsvq->end_reqs);
  763. init_completion(&fsvq->in_flight_zero);
  764. if (vq_type == VQ_REQUEST) {
  765. INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work);
  766. INIT_WORK(&fsvq->dispatch_work,
  767. virtio_fs_request_dispatch_work);
  768. } else {
  769. INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work);
  770. INIT_WORK(&fsvq->dispatch_work,
  771. virtio_fs_hiprio_dispatch_work);
  772. }
  773. }
  774. /* Initialize virtqueues */
  775. static int virtio_fs_setup_vqs(struct virtio_device *vdev,
  776. struct virtio_fs *fs)
  777. {
  778. struct virtqueue_info *vqs_info;
  779. struct virtqueue **vqs;
  780. /* Specify pre_vectors to ensure that the queues before the
  781. * request queues (e.g. hiprio) don't claim any of the CPUs in
  782. * the multi-queue mapping and interrupt affinities
  783. */
  784. struct irq_affinity desc = { .pre_vectors = VQ_REQUEST };
  785. unsigned int i;
  786. int ret = 0;
  787. virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues,
  788. &fs->num_request_queues);
  789. if (fs->num_request_queues == 0)
  790. return -EINVAL;
  791. /* Truncate nr of request queues to nr_cpu_id */
  792. fs->num_request_queues = min_t(unsigned int, fs->num_request_queues,
  793. nr_cpu_ids);
  794. fs->nvqs = VQ_REQUEST + fs->num_request_queues;
  795. fs->vqs = kzalloc_objs(fs->vqs[VQ_HIPRIO], fs->nvqs);
  796. if (!fs->vqs)
  797. return -ENOMEM;
  798. vqs = kmalloc_objs(vqs[VQ_HIPRIO], fs->nvqs);
  799. fs->mq_map = kcalloc_node(nr_cpu_ids, sizeof(*fs->mq_map), GFP_KERNEL,
  800. dev_to_node(&vdev->dev));
  801. vqs_info = kzalloc_objs(*vqs_info, fs->nvqs);
  802. if (!vqs || !vqs_info || !fs->mq_map) {
  803. ret = -ENOMEM;
  804. goto out;
  805. }
  806. /* Initialize the hiprio/forget request virtqueue */
  807. vqs_info[VQ_HIPRIO].callback = virtio_fs_vq_done;
  808. virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO);
  809. vqs_info[VQ_HIPRIO].name = fs->vqs[VQ_HIPRIO].name;
  810. /* Initialize the requests virtqueues */
  811. for (i = VQ_REQUEST; i < fs->nvqs; i++) {
  812. char vq_name[VQ_NAME_LEN];
  813. snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST);
  814. virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST);
  815. vqs_info[i].callback = virtio_fs_vq_done;
  816. vqs_info[i].name = fs->vqs[i].name;
  817. }
  818. ret = virtio_find_vqs(vdev, fs->nvqs, vqs, vqs_info, &desc);
  819. if (ret < 0)
  820. goto out;
  821. for (i = 0; i < fs->nvqs; i++)
  822. fs->vqs[i].vq = vqs[i];
  823. virtio_fs_start_all_queues(fs);
  824. out:
  825. kfree(vqs_info);
  826. kfree(vqs);
  827. if (ret) {
  828. kfree(fs->vqs);
  829. kfree(fs->mq_map);
  830. }
  831. return ret;
  832. }
  833. /* Free virtqueues (device must already be reset) */
  834. static void virtio_fs_cleanup_vqs(struct virtio_device *vdev)
  835. {
  836. vdev->config->del_vqs(vdev);
  837. }
  838. /* Map a window offset to a page frame number. The window offset will have
  839. * been produced by .iomap_begin(), which maps a file offset to a window
  840. * offset.
  841. */
  842. static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
  843. long nr_pages, enum dax_access_mode mode,
  844. void **kaddr, unsigned long *pfn)
  845. {
  846. struct virtio_fs *fs = dax_get_private(dax_dev);
  847. phys_addr_t offset = PFN_PHYS(pgoff);
  848. size_t max_nr_pages = fs->window_len / PAGE_SIZE - pgoff;
  849. if (kaddr)
  850. *kaddr = fs->window_kaddr + offset;
  851. if (pfn)
  852. *pfn = PHYS_PFN(fs->window_phys_addr + offset);
  853. return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
  854. }
  855. static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
  856. pgoff_t pgoff, size_t nr_pages)
  857. {
  858. long rc;
  859. void *kaddr;
  860. rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr,
  861. NULL);
  862. if (rc < 0)
  863. return dax_mem2blk_err(rc);
  864. memset(kaddr, 0, nr_pages << PAGE_SHIFT);
  865. dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
  866. return 0;
  867. }
  868. static const struct dax_operations virtio_fs_dax_ops = {
  869. .direct_access = virtio_fs_direct_access,
  870. .zero_page_range = virtio_fs_zero_page_range,
  871. };
  872. static void virtio_fs_cleanup_dax(void *data)
  873. {
  874. struct dax_device *dax_dev = data;
  875. kill_dax(dax_dev);
  876. put_dax(dax_dev);
  877. }
  878. DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T))
  879. static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
  880. {
  881. struct dax_device *dax_dev __free(cleanup_dax) = NULL;
  882. struct virtio_shm_region cache_reg;
  883. struct dev_pagemap *pgmap;
  884. bool have_cache;
  885. if (!IS_ENABLED(CONFIG_FUSE_DAX))
  886. return 0;
  887. dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
  888. if (IS_ERR(dax_dev)) {
  889. int rc = PTR_ERR(dax_dev);
  890. return rc == -EOPNOTSUPP ? 0 : rc;
  891. }
  892. /* Get cache region */
  893. have_cache = virtio_get_shm_region(vdev, &cache_reg,
  894. (u8)VIRTIO_FS_SHMCAP_ID_CACHE);
  895. if (!have_cache) {
  896. dev_notice(&vdev->dev, "%s: No cache capability\n", __func__);
  897. return 0;
  898. }
  899. if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len,
  900. dev_name(&vdev->dev))) {
  901. dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n",
  902. cache_reg.addr, cache_reg.len);
  903. return -EBUSY;
  904. }
  905. dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len,
  906. cache_reg.addr);
  907. pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL);
  908. if (!pgmap)
  909. return -ENOMEM;
  910. pgmap->type = MEMORY_DEVICE_FS_DAX;
  911. /* Ideally we would directly use the PCI BAR resource but
  912. * devm_memremap_pages() wants its own copy in pgmap. So
  913. * initialize a struct resource from scratch (only the start
  914. * and end fields will be used).
  915. */
  916. pgmap->range = (struct range) {
  917. .start = (phys_addr_t) cache_reg.addr,
  918. .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1,
  919. };
  920. pgmap->nr_range = 1;
  921. fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap);
  922. if (IS_ERR(fs->window_kaddr))
  923. return PTR_ERR(fs->window_kaddr);
  924. fs->window_phys_addr = (phys_addr_t) cache_reg.addr;
  925. fs->window_len = (phys_addr_t) cache_reg.len;
  926. dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
  927. __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
  928. fs->dax_dev = no_free_ptr(dax_dev);
  929. return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
  930. fs->dax_dev);
  931. }
  932. static int virtio_fs_probe(struct virtio_device *vdev)
  933. {
  934. struct virtio_fs *fs;
  935. int ret;
  936. fs = kzalloc_obj(*fs);
  937. if (!fs)
  938. return -ENOMEM;
  939. kobject_init(&fs->kobj, &virtio_fs_ktype);
  940. vdev->priv = fs;
  941. ret = virtio_fs_read_tag(vdev, fs);
  942. if (ret < 0)
  943. goto out;
  944. ret = virtio_fs_setup_vqs(vdev, fs);
  945. if (ret < 0)
  946. goto out;
  947. virtio_fs_map_queues(vdev, fs);
  948. ret = virtio_fs_setup_dax(vdev, fs);
  949. if (ret < 0)
  950. goto out_vqs;
  951. /* Bring the device online in case the filesystem is mounted and
  952. * requests need to be sent before we return.
  953. */
  954. virtio_device_ready(vdev);
  955. ret = virtio_fs_add_instance(vdev, fs);
  956. if (ret < 0)
  957. goto out_vqs;
  958. return 0;
  959. out_vqs:
  960. virtio_reset_device(vdev);
  961. virtio_fs_cleanup_vqs(vdev);
  962. out:
  963. vdev->priv = NULL;
  964. kobject_put(&fs->kobj);
  965. return ret;
  966. }
  967. static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
  968. {
  969. struct virtio_fs_vq *fsvq;
  970. int i;
  971. for (i = 0; i < fs->nvqs; i++) {
  972. fsvq = &fs->vqs[i];
  973. spin_lock(&fsvq->lock);
  974. fsvq->connected = false;
  975. spin_unlock(&fsvq->lock);
  976. }
  977. }
  978. static void virtio_fs_remove(struct virtio_device *vdev)
  979. {
  980. struct virtio_fs *fs = vdev->priv;
  981. mutex_lock(&virtio_fs_mutex);
  982. /* This device is going away. No one should get new reference */
  983. list_del_init(&fs->list);
  984. virtio_fs_delete_queues_sysfs(fs);
  985. sysfs_remove_link(&fs->kobj, "device");
  986. kobject_put(fs->mqs_kobj);
  987. kobject_del(&fs->kobj);
  988. virtio_fs_stop_all_queues(fs);
  989. virtio_fs_drain_all_queues_locked(fs);
  990. virtio_reset_device(vdev);
  991. virtio_fs_cleanup_vqs(vdev);
  992. vdev->priv = NULL;
  993. /* Put device reference on virtio_fs object */
  994. virtio_fs_put_locked(fs);
  995. mutex_unlock(&virtio_fs_mutex);
  996. }
  997. #ifdef CONFIG_PM_SLEEP
  998. static int virtio_fs_freeze(struct virtio_device *vdev)
  999. {
  1000. /* TODO need to save state here */
  1001. pr_warn("virtio-fs: suspend/resume not yet supported\n");
  1002. return -EOPNOTSUPP;
  1003. }
  1004. static int virtio_fs_restore(struct virtio_device *vdev)
  1005. {
  1006. /* TODO need to restore state here */
  1007. return 0;
  1008. }
  1009. #endif /* CONFIG_PM_SLEEP */
  1010. static const struct virtio_device_id id_table[] = {
  1011. { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
  1012. {},
  1013. };
  1014. static const unsigned int feature_table[] = {};
  1015. static struct virtio_driver virtio_fs_driver = {
  1016. .driver.name = KBUILD_MODNAME,
  1017. .id_table = id_table,
  1018. .feature_table = feature_table,
  1019. .feature_table_size = ARRAY_SIZE(feature_table),
  1020. .probe = virtio_fs_probe,
  1021. .remove = virtio_fs_remove,
  1022. #ifdef CONFIG_PM_SLEEP
  1023. .freeze = virtio_fs_freeze,
  1024. .restore = virtio_fs_restore,
  1025. #endif
  1026. };
  1027. static void virtio_fs_send_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *link)
  1028. {
  1029. struct virtio_fs_forget *forget;
  1030. struct virtio_fs_forget_req *req;
  1031. struct virtio_fs *fs = fiq->priv;
  1032. struct virtio_fs_vq *fsvq = &fs->vqs[VQ_HIPRIO];
  1033. u64 unique = fuse_get_unique(fiq);
  1034. /* Allocate a buffer for the request */
  1035. forget = kmalloc_obj(*forget, GFP_NOFS | __GFP_NOFAIL);
  1036. req = &forget->req;
  1037. req->ih = (struct fuse_in_header){
  1038. .opcode = FUSE_FORGET,
  1039. .nodeid = link->forget_one.nodeid,
  1040. .unique = unique,
  1041. .len = sizeof(*req),
  1042. };
  1043. req->arg = (struct fuse_forget_in){
  1044. .nlookup = link->forget_one.nlookup,
  1045. };
  1046. send_forget_request(fsvq, forget, false);
  1047. kfree(link);
  1048. }
  1049. static void virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
  1050. {
  1051. /*
  1052. * TODO interrupts.
  1053. *
  1054. * Normal fs operations on a local filesystems aren't interruptible.
  1055. * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
  1056. * with shared lock between host and guest.
  1057. */
  1058. }
  1059. /* Count number of scatter-gather elements required */
  1060. static unsigned int sg_count_fuse_folios(struct fuse_folio_desc *folio_descs,
  1061. unsigned int num_folios,
  1062. unsigned int total_len)
  1063. {
  1064. unsigned int i;
  1065. unsigned int this_len;
  1066. for (i = 0; i < num_folios && total_len; i++) {
  1067. this_len = min(folio_descs[i].length, total_len);
  1068. total_len -= this_len;
  1069. }
  1070. return i;
  1071. }
  1072. /* Return the number of scatter-gather list elements required */
  1073. static unsigned int sg_count_fuse_req(struct fuse_req *req)
  1074. {
  1075. struct fuse_args *args = req->args;
  1076. struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
  1077. unsigned int size, total_sgs = 1 /* fuse_in_header */;
  1078. if (args->in_numargs - args->in_pages)
  1079. total_sgs += 1;
  1080. if (args->in_pages) {
  1081. size = args->in_args[args->in_numargs - 1].size;
  1082. total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios,
  1083. size);
  1084. }
  1085. if (!test_bit(FR_ISREPLY, &req->flags))
  1086. return total_sgs;
  1087. total_sgs += 1 /* fuse_out_header */;
  1088. if (args->out_numargs - args->out_pages)
  1089. total_sgs += 1;
  1090. if (args->out_pages) {
  1091. size = args->out_args[args->out_numargs - 1].size;
  1092. total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios,
  1093. size);
  1094. }
  1095. return total_sgs;
  1096. }
  1097. /* Add folios to scatter-gather list and return number of elements used */
  1098. static unsigned int sg_init_fuse_folios(struct scatterlist *sg,
  1099. struct folio **folios,
  1100. struct fuse_folio_desc *folio_descs,
  1101. unsigned int num_folios,
  1102. unsigned int total_len)
  1103. {
  1104. unsigned int i;
  1105. unsigned int this_len;
  1106. for (i = 0; i < num_folios && total_len; i++) {
  1107. sg_init_table(&sg[i], 1);
  1108. this_len = min(folio_descs[i].length, total_len);
  1109. sg_set_folio(&sg[i], folios[i], this_len, folio_descs[i].offset);
  1110. total_len -= this_len;
  1111. }
  1112. return i;
  1113. }
  1114. /* Add args to scatter-gather list and return number of elements used */
  1115. static unsigned int sg_init_fuse_args(struct scatterlist *sg,
  1116. struct fuse_req *req,
  1117. struct fuse_arg *args,
  1118. unsigned int numargs,
  1119. bool argpages,
  1120. void *argbuf,
  1121. unsigned int *len_used)
  1122. {
  1123. struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
  1124. unsigned int total_sgs = 0;
  1125. unsigned int len;
  1126. len = fuse_len_args(numargs - argpages, args);
  1127. if (len)
  1128. sg_init_one(&sg[total_sgs++], argbuf, len);
  1129. if (argpages)
  1130. total_sgs += sg_init_fuse_folios(&sg[total_sgs],
  1131. ap->folios, ap->descs,
  1132. ap->num_folios,
  1133. args[numargs - 1].size);
  1134. if (len_used)
  1135. *len_used = len;
  1136. return total_sgs;
  1137. }
  1138. /* Add a request to a virtqueue and kick the device */
  1139. static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
  1140. struct fuse_req *req, bool in_flight,
  1141. gfp_t gfp)
  1142. {
  1143. /* requests need at least 4 elements */
  1144. struct scatterlist *stack_sgs[6];
  1145. struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
  1146. struct scatterlist **sgs = stack_sgs;
  1147. struct scatterlist *sg = stack_sg;
  1148. struct virtqueue *vq;
  1149. struct fuse_args *args = req->args;
  1150. unsigned int argbuf_used = 0;
  1151. unsigned int out_sgs = 0;
  1152. unsigned int in_sgs = 0;
  1153. unsigned int total_sgs;
  1154. unsigned int i, hash;
  1155. int ret;
  1156. bool notify;
  1157. struct fuse_pqueue *fpq;
  1158. /* Does the sglist fit on the stack? */
  1159. total_sgs = sg_count_fuse_req(req);
  1160. if (total_sgs > ARRAY_SIZE(stack_sgs)) {
  1161. sgs = kmalloc_objs(sgs[0], total_sgs, gfp);
  1162. sg = kmalloc_objs(sg[0], total_sgs, gfp);
  1163. if (!sgs || !sg) {
  1164. ret = -ENOMEM;
  1165. goto out;
  1166. }
  1167. }
  1168. /* Use a bounce buffer since stack args cannot be mapped */
  1169. ret = copy_args_to_argbuf(req, gfp);
  1170. if (ret < 0)
  1171. goto out;
  1172. /* Request elements */
  1173. sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
  1174. out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
  1175. (struct fuse_arg *)args->in_args,
  1176. args->in_numargs, args->in_pages,
  1177. req->argbuf, &argbuf_used);
  1178. /* Reply elements */
  1179. if (test_bit(FR_ISREPLY, &req->flags)) {
  1180. sg_init_one(&sg[out_sgs + in_sgs++],
  1181. &req->out.h, sizeof(req->out.h));
  1182. in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
  1183. args->out_args, args->out_numargs,
  1184. args->out_pages,
  1185. req->argbuf + argbuf_used, NULL);
  1186. }
  1187. WARN_ON(out_sgs + in_sgs != total_sgs);
  1188. for (i = 0; i < total_sgs; i++)
  1189. sgs[i] = &sg[i];
  1190. spin_lock(&fsvq->lock);
  1191. if (!fsvq->connected) {
  1192. spin_unlock(&fsvq->lock);
  1193. ret = -ENOTCONN;
  1194. goto out;
  1195. }
  1196. vq = fsvq->vq;
  1197. ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
  1198. if (ret < 0) {
  1199. spin_unlock(&fsvq->lock);
  1200. goto out;
  1201. }
  1202. /* Request successfully sent. */
  1203. fpq = &fsvq->fud->pq;
  1204. hash = fuse_req_hash(req->in.h.unique);
  1205. spin_lock(&fpq->lock);
  1206. list_add_tail(&req->list, &fpq->processing[hash]);
  1207. spin_unlock(&fpq->lock);
  1208. set_bit(FR_SENT, &req->flags);
  1209. /* matches barrier in request_wait_answer() */
  1210. smp_mb__after_atomic();
  1211. if (!in_flight)
  1212. inc_in_flight_req(fsvq);
  1213. notify = virtqueue_kick_prepare(vq);
  1214. spin_unlock(&fsvq->lock);
  1215. if (notify)
  1216. virtqueue_notify(vq);
  1217. out:
  1218. if (ret < 0 && req->argbuf) {
  1219. kfree(req->argbuf);
  1220. req->argbuf = NULL;
  1221. }
  1222. if (sgs != stack_sgs) {
  1223. kfree(sgs);
  1224. kfree(sg);
  1225. }
  1226. return ret;
  1227. }
  1228. static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req)
  1229. {
  1230. unsigned int queue_id;
  1231. struct virtio_fs *fs;
  1232. struct virtio_fs_vq *fsvq;
  1233. int ret;
  1234. fuse_request_assign_unique(fiq, req);
  1235. clear_bit(FR_PENDING, &req->flags);
  1236. fs = fiq->priv;
  1237. queue_id = fs->mq_map[raw_smp_processor_id()];
  1238. pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u queue_id %u\n",
  1239. __func__, req->in.h.opcode, req->in.h.unique,
  1240. req->in.h.nodeid, req->in.h.len,
  1241. fuse_len_args(req->args->out_numargs, req->args->out_args),
  1242. queue_id);
  1243. fsvq = &fs->vqs[queue_id];
  1244. ret = virtio_fs_enqueue_req(fsvq, req, false, GFP_ATOMIC);
  1245. if (ret < 0) {
  1246. if (ret == -ENOSPC) {
  1247. /*
  1248. * Virtqueue full. Retry submission from worker
  1249. * context as we might be holding fc->bg_lock.
  1250. */
  1251. spin_lock(&fsvq->lock);
  1252. list_add_tail(&req->list, &fsvq->queued_reqs);
  1253. inc_in_flight_req(fsvq);
  1254. spin_unlock(&fsvq->lock);
  1255. return;
  1256. }
  1257. req->out.h.error = ret;
  1258. pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
  1259. /* Can't end request in submission context. Use a worker */
  1260. spin_lock(&fsvq->lock);
  1261. list_add_tail(&req->list, &fsvq->end_reqs);
  1262. schedule_work(&fsvq->dispatch_work);
  1263. spin_unlock(&fsvq->lock);
  1264. return;
  1265. }
  1266. }
  1267. static const struct fuse_iqueue_ops virtio_fs_fiq_ops = {
  1268. .send_forget = virtio_fs_send_forget,
  1269. .send_interrupt = virtio_fs_send_interrupt,
  1270. .send_req = virtio_fs_send_req,
  1271. .release = virtio_fs_fiq_release,
  1272. };
  1273. static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx)
  1274. {
  1275. ctx->rootmode = S_IFDIR;
  1276. ctx->default_permissions = 1;
  1277. ctx->allow_other = 1;
  1278. ctx->max_read = UINT_MAX;
  1279. ctx->blksize = 512;
  1280. ctx->destroy = true;
  1281. ctx->no_control = true;
  1282. ctx->no_force_umount = true;
  1283. }
  1284. static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
  1285. {
  1286. struct fuse_mount *fm = get_fuse_mount_super(sb);
  1287. struct fuse_conn *fc = fm->fc;
  1288. struct virtio_fs *fs = fc->iq.priv;
  1289. struct fuse_fs_context *ctx = fsc->fs_private;
  1290. unsigned int i;
  1291. int err;
  1292. virtio_fs_ctx_set_defaults(ctx);
  1293. mutex_lock(&virtio_fs_mutex);
  1294. /* After holding mutex, make sure virtiofs device is still there.
  1295. * Though we are holding a reference to it, drive ->remove might
  1296. * still have cleaned up virtual queues. In that case bail out.
  1297. */
  1298. err = -EINVAL;
  1299. if (list_empty(&fs->list)) {
  1300. pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
  1301. goto err;
  1302. }
  1303. err = -ENOMEM;
  1304. /* Allocate fuse_dev for hiprio and notification queues */
  1305. for (i = 0; i < fs->nvqs; i++) {
  1306. struct virtio_fs_vq *fsvq = &fs->vqs[i];
  1307. fsvq->fud = fuse_dev_alloc();
  1308. if (!fsvq->fud)
  1309. goto err_free_fuse_devs;
  1310. }
  1311. /* virtiofs allocates and installs its own fuse devices */
  1312. ctx->fudptr = NULL;
  1313. if (ctx->dax_mode != FUSE_DAX_NEVER) {
  1314. if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) {
  1315. err = -EINVAL;
  1316. pr_err("virtio-fs: dax can't be enabled as filesystem"
  1317. " device does not support it.\n");
  1318. goto err_free_fuse_devs;
  1319. }
  1320. ctx->dax_dev = fs->dax_dev;
  1321. }
  1322. err = fuse_fill_super_common(sb, ctx);
  1323. if (err < 0)
  1324. goto err_free_fuse_devs;
  1325. for (i = 0; i < fs->nvqs; i++) {
  1326. struct virtio_fs_vq *fsvq = &fs->vqs[i];
  1327. fuse_dev_install(fsvq->fud, fc);
  1328. }
  1329. /* Previous unmount will stop all queues. Start these again */
  1330. virtio_fs_start_all_queues(fs);
  1331. fuse_send_init(fm);
  1332. mutex_unlock(&virtio_fs_mutex);
  1333. return 0;
  1334. err_free_fuse_devs:
  1335. virtio_fs_free_devs(fs);
  1336. err:
  1337. mutex_unlock(&virtio_fs_mutex);
  1338. return err;
  1339. }
  1340. static void virtio_fs_conn_destroy(struct fuse_mount *fm)
  1341. {
  1342. struct fuse_conn *fc = fm->fc;
  1343. struct virtio_fs *vfs = fc->iq.priv;
  1344. struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO];
  1345. /* Stop dax worker. Soon evict_inodes() will be called which
  1346. * will free all memory ranges belonging to all inodes.
  1347. */
  1348. if (IS_ENABLED(CONFIG_FUSE_DAX))
  1349. fuse_dax_cancel_work(fc);
  1350. /* Stop forget queue. Soon destroy will be sent */
  1351. spin_lock(&fsvq->lock);
  1352. fsvq->connected = false;
  1353. spin_unlock(&fsvq->lock);
  1354. virtio_fs_drain_all_queues(vfs);
  1355. fuse_conn_destroy(fm);
  1356. /* fuse_conn_destroy() must have sent destroy. Stop all queues
  1357. * and drain one more time and free fuse devices. Freeing fuse
  1358. * devices will drop their reference on fuse_conn and that in
  1359. * turn will drop its reference on virtio_fs object.
  1360. */
  1361. virtio_fs_stop_all_queues(vfs);
  1362. virtio_fs_drain_all_queues(vfs);
  1363. virtio_fs_free_devs(vfs);
  1364. }
  1365. static void virtio_kill_sb(struct super_block *sb)
  1366. {
  1367. struct fuse_mount *fm = get_fuse_mount_super(sb);
  1368. bool last;
  1369. /* If mount failed, we can still be called without any fc */
  1370. if (sb->s_root) {
  1371. last = fuse_mount_remove(fm);
  1372. if (last)
  1373. virtio_fs_conn_destroy(fm);
  1374. }
  1375. kill_anon_super(sb);
  1376. fuse_mount_destroy(fm);
  1377. }
  1378. static int virtio_fs_test_super(struct super_block *sb,
  1379. struct fs_context *fsc)
  1380. {
  1381. struct fuse_mount *fsc_fm = fsc->s_fs_info;
  1382. struct fuse_mount *sb_fm = get_fuse_mount_super(sb);
  1383. return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv;
  1384. }
  1385. static int virtio_fs_get_tree(struct fs_context *fsc)
  1386. {
  1387. struct virtio_fs *fs;
  1388. struct super_block *sb;
  1389. struct fuse_conn *fc = NULL;
  1390. struct fuse_mount *fm;
  1391. unsigned int virtqueue_size;
  1392. int err = -EIO;
  1393. if (!fsc->source)
  1394. return invalf(fsc, "No source specified");
  1395. /* This gets a reference on virtio_fs object. This ptr gets installed
  1396. * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
  1397. * to drop the reference to this object.
  1398. */
  1399. fs = virtio_fs_find_instance(fsc->source);
  1400. if (!fs) {
  1401. pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
  1402. return -EINVAL;
  1403. }
  1404. virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq);
  1405. if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD))
  1406. goto out_err;
  1407. err = -ENOMEM;
  1408. fc = kzalloc_obj(struct fuse_conn);
  1409. if (!fc)
  1410. goto out_err;
  1411. fm = kzalloc_obj(struct fuse_mount);
  1412. if (!fm)
  1413. goto out_err;
  1414. fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs);
  1415. fc->release = fuse_free_conn;
  1416. fc->delete_stale = true;
  1417. fc->auto_submounts = true;
  1418. fc->sync_fs = true;
  1419. fc->use_pages_for_kvec_io = true;
  1420. /* Tell FUSE to split requests that exceed the virtqueue's size */
  1421. fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,
  1422. virtqueue_size - FUSE_HEADER_OVERHEAD);
  1423. fsc->s_fs_info = fm;
  1424. sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc);
  1425. if (fsc->s_fs_info)
  1426. fuse_mount_destroy(fm);
  1427. if (IS_ERR(sb))
  1428. return PTR_ERR(sb);
  1429. if (!sb->s_root) {
  1430. err = virtio_fs_fill_super(sb, fsc);
  1431. if (err) {
  1432. deactivate_locked_super(sb);
  1433. return err;
  1434. }
  1435. sb->s_flags |= SB_ACTIVE;
  1436. }
  1437. WARN_ON(fsc->root);
  1438. fsc->root = dget(sb->s_root);
  1439. return 0;
  1440. out_err:
  1441. kfree(fc);
  1442. virtio_fs_put(fs);
  1443. return err;
  1444. }
  1445. static const struct fs_context_operations virtio_fs_context_ops = {
  1446. .free = virtio_fs_free_fsc,
  1447. .parse_param = virtio_fs_parse_param,
  1448. .get_tree = virtio_fs_get_tree,
  1449. };
  1450. static int virtio_fs_init_fs_context(struct fs_context *fsc)
  1451. {
  1452. struct fuse_fs_context *ctx;
  1453. if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT)
  1454. return fuse_init_fs_context_submount(fsc);
  1455. ctx = kzalloc_obj(struct fuse_fs_context);
  1456. if (!ctx)
  1457. return -ENOMEM;
  1458. fsc->fs_private = ctx;
  1459. fsc->ops = &virtio_fs_context_ops;
  1460. return 0;
  1461. }
  1462. static struct file_system_type virtio_fs_type = {
  1463. .owner = THIS_MODULE,
  1464. .name = "virtiofs",
  1465. .init_fs_context = virtio_fs_init_fs_context,
  1466. .kill_sb = virtio_kill_sb,
  1467. .fs_flags = FS_ALLOW_IDMAP,
  1468. };
  1469. static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
  1470. {
  1471. const struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj);
  1472. add_uevent_var(env, "TAG=%s", fs->tag);
  1473. return 0;
  1474. }
  1475. static const struct kset_uevent_ops virtio_fs_uevent_ops = {
  1476. .uevent = virtio_fs_uevent,
  1477. };
  1478. static int __init virtio_fs_sysfs_init(void)
  1479. {
  1480. virtio_fs_kset = kset_create_and_add("virtiofs", &virtio_fs_uevent_ops,
  1481. fs_kobj);
  1482. if (!virtio_fs_kset)
  1483. return -ENOMEM;
  1484. return 0;
  1485. }
  1486. static void virtio_fs_sysfs_exit(void)
  1487. {
  1488. kset_unregister(virtio_fs_kset);
  1489. virtio_fs_kset = NULL;
  1490. }
  1491. static int __init virtio_fs_init(void)
  1492. {
  1493. int ret;
  1494. ret = virtio_fs_sysfs_init();
  1495. if (ret < 0)
  1496. return ret;
  1497. ret = register_virtio_driver(&virtio_fs_driver);
  1498. if (ret < 0)
  1499. goto sysfs_exit;
  1500. ret = register_filesystem(&virtio_fs_type);
  1501. if (ret < 0)
  1502. goto unregister_virtio_driver;
  1503. return 0;
  1504. unregister_virtio_driver:
  1505. unregister_virtio_driver(&virtio_fs_driver);
  1506. sysfs_exit:
  1507. virtio_fs_sysfs_exit();
  1508. return ret;
  1509. }
  1510. module_init(virtio_fs_init);
  1511. static void __exit virtio_fs_exit(void)
  1512. {
  1513. unregister_filesystem(&virtio_fs_type);
  1514. unregister_virtio_driver(&virtio_fs_driver);
  1515. virtio_fs_sysfs_exit();
  1516. }
  1517. module_exit(virtio_fs_exit);
  1518. MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
  1519. MODULE_DESCRIPTION("Virtio Filesystem");
  1520. MODULE_LICENSE("GPL");
  1521. MODULE_ALIAS_FS(KBUILD_MODNAME);
  1522. MODULE_DEVICE_TABLE(virtio, id_table);