nfs42xattr.c 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright 2019, 2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  4. *
  5. * User extended attribute client side cache functions.
  6. *
  7. * Author: Frank van der Linden <fllinden@amazon.com>
  8. */
  9. #include <linux/errno.h>
  10. #include <linux/nfs_fs.h>
  11. #include <linux/hashtable.h>
  12. #include <linux/refcount.h>
  13. #include <uapi/linux/xattr.h>
  14. #include "nfs4_fs.h"
  15. #include "internal.h"
  16. /*
  17. * User extended attributes client side caching is implemented by having
  18. * a cache structure attached to NFS inodes. This structure is allocated
  19. * when needed, and freed when the cache is zapped.
  20. *
  21. * The cache structure contains as hash table of entries, and a pointer
  22. * to a special-cased entry for the listxattr cache.
  23. *
  24. * Accessing and allocating / freeing the caches is done via reference
  25. * counting. The cache entries use a similar refcounting scheme.
  26. *
  27. * This makes freeing a cache, both from the shrinker and from the
  28. * zap cache path, easy. It also means that, in current use cases,
  29. * the large majority of inodes will not waste any memory, as they
  30. * will never have any user extended attributes assigned to them.
  31. *
  32. * Attribute entries are hashed in to a simple hash table. They are
  33. * also part of an LRU.
  34. *
  35. * There are three shrinkers.
  36. *
  37. * Two shrinkers deal with the cache entries themselves: one for
  38. * large entries (> PAGE_SIZE), and one for smaller entries. The
  39. * shrinker for the larger entries works more aggressively than
  40. * those for the smaller entries.
  41. *
  42. * The other shrinker frees the cache structures themselves.
  43. */
  44. /*
  45. * 64 buckets is a good default. There is likely no reasonable
  46. * workload that uses more than even 64 user extended attributes.
  47. * You can certainly add a lot more - but you get what you ask for
  48. * in those circumstances.
  49. */
  50. #define NFS4_XATTR_HASH_SIZE 64
  51. #define NFSDBG_FACILITY NFSDBG_XATTRCACHE
  52. struct nfs4_xattr_cache;
  53. struct nfs4_xattr_entry;
  54. struct nfs4_xattr_bucket {
  55. spinlock_t lock;
  56. struct hlist_head hlist;
  57. struct nfs4_xattr_cache *cache;
  58. bool draining;
  59. };
  60. struct nfs4_xattr_cache {
  61. struct kref ref;
  62. struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE];
  63. struct list_head lru;
  64. struct list_head dispose;
  65. atomic_long_t nent;
  66. spinlock_t listxattr_lock;
  67. struct inode *inode;
  68. struct nfs4_xattr_entry *listxattr;
  69. };
  70. struct nfs4_xattr_entry {
  71. struct kref ref;
  72. struct hlist_node hnode;
  73. struct list_head lru;
  74. struct list_head dispose;
  75. char *xattr_name;
  76. void *xattr_value;
  77. size_t xattr_size;
  78. struct nfs4_xattr_bucket *bucket;
  79. uint32_t flags;
  80. };
  81. #define NFS4_XATTR_ENTRY_EXTVAL 0x0001
  82. /*
  83. * LRU list of NFS inodes that have xattr caches.
  84. */
  85. static struct list_lru nfs4_xattr_cache_lru;
  86. static struct list_lru nfs4_xattr_entry_lru;
  87. static struct list_lru nfs4_xattr_large_entry_lru;
  88. static struct kmem_cache *nfs4_xattr_cache_cachep;
  89. /*
  90. * Hashing helper functions.
  91. */
  92. static void
  93. nfs4_xattr_hash_init(struct nfs4_xattr_cache *cache)
  94. {
  95. unsigned int i;
  96. for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
  97. INIT_HLIST_HEAD(&cache->buckets[i].hlist);
  98. spin_lock_init(&cache->buckets[i].lock);
  99. cache->buckets[i].cache = cache;
  100. cache->buckets[i].draining = false;
  101. }
  102. }
  103. /*
  104. * Locking order:
  105. * 1. inode i_lock or bucket lock
  106. * 2. list_lru lock (taken by list_lru_* functions)
  107. */
  108. /*
  109. * Wrapper functions to add a cache entry to the right LRU.
  110. */
  111. static bool
  112. nfs4_xattr_entry_lru_add(struct nfs4_xattr_entry *entry)
  113. {
  114. struct list_lru *lru;
  115. lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ?
  116. &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
  117. return list_lru_add_obj(lru, &entry->lru);
  118. }
  119. static bool
  120. nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry)
  121. {
  122. struct list_lru *lru;
  123. lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ?
  124. &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
  125. return list_lru_del_obj(lru, &entry->lru);
  126. }
  127. /*
  128. * This function allocates cache entries. They are the normal
  129. * extended attribute name/value pairs, but may also be a listxattr
  130. * cache. Those allocations use the same entry so that they can be
  131. * treated as one by the memory shrinker.
  132. *
  133. * xattr cache entries are allocated together with names. If the
  134. * value fits in to one page with the entry structure and the name,
  135. * it will also be part of the same allocation (kmalloc). This is
  136. * expected to be the vast majority of cases. Larger allocations
  137. * have a value pointer that is allocated separately by kvmalloc.
  138. *
  139. * Parameters:
  140. *
  141. * @name: Name of the extended attribute. NULL for listxattr cache
  142. * entry.
  143. * @value: Value of attribute, or listxattr cache. NULL if the
  144. * value is to be copied from pages instead.
  145. * @pages: Pages to copy the value from, if not NULL. Passed in to
  146. * make it easier to copy the value after an RPC, even if
  147. * the value will not be passed up to application (e.g.
  148. * for a 'query' getxattr with NULL buffer).
  149. * @len: Length of the value. Can be 0 for zero-length attributes.
  150. * @value and @pages will be NULL if @len is 0.
  151. */
  152. static struct nfs4_xattr_entry *
  153. nfs4_xattr_alloc_entry(const char *name, const void *value,
  154. struct page **pages, size_t len)
  155. {
  156. struct nfs4_xattr_entry *entry;
  157. void *valp;
  158. char *namep;
  159. size_t alloclen, slen;
  160. char *buf;
  161. uint32_t flags;
  162. BUILD_BUG_ON(sizeof(struct nfs4_xattr_entry) +
  163. XATTR_NAME_MAX + 1 > PAGE_SIZE);
  164. alloclen = sizeof(struct nfs4_xattr_entry);
  165. if (name != NULL) {
  166. slen = strlen(name) + 1;
  167. alloclen += slen;
  168. } else
  169. slen = 0;
  170. if (alloclen + len <= PAGE_SIZE) {
  171. alloclen += len;
  172. flags = 0;
  173. } else {
  174. flags = NFS4_XATTR_ENTRY_EXTVAL;
  175. }
  176. buf = kmalloc(alloclen, GFP_KERNEL);
  177. if (buf == NULL)
  178. return NULL;
  179. entry = (struct nfs4_xattr_entry *)buf;
  180. if (name != NULL) {
  181. namep = buf + sizeof(struct nfs4_xattr_entry);
  182. memcpy(namep, name, slen);
  183. } else {
  184. namep = NULL;
  185. }
  186. if (flags & NFS4_XATTR_ENTRY_EXTVAL) {
  187. valp = kvmalloc(len, GFP_KERNEL);
  188. if (valp == NULL) {
  189. kfree(buf);
  190. return NULL;
  191. }
  192. } else if (len != 0) {
  193. valp = buf + sizeof(struct nfs4_xattr_entry) + slen;
  194. } else
  195. valp = NULL;
  196. if (valp != NULL) {
  197. if (value != NULL)
  198. memcpy(valp, value, len);
  199. else
  200. _copy_from_pages(valp, pages, 0, len);
  201. }
  202. entry->flags = flags;
  203. entry->xattr_value = valp;
  204. kref_init(&entry->ref);
  205. entry->xattr_name = namep;
  206. entry->xattr_size = len;
  207. entry->bucket = NULL;
  208. INIT_LIST_HEAD(&entry->lru);
  209. INIT_LIST_HEAD(&entry->dispose);
  210. INIT_HLIST_NODE(&entry->hnode);
  211. return entry;
  212. }
  213. static void
  214. nfs4_xattr_free_entry(struct nfs4_xattr_entry *entry)
  215. {
  216. if (entry->flags & NFS4_XATTR_ENTRY_EXTVAL)
  217. kvfree(entry->xattr_value);
  218. kfree(entry);
  219. }
  220. static void
  221. nfs4_xattr_free_entry_cb(struct kref *kref)
  222. {
  223. struct nfs4_xattr_entry *entry;
  224. entry = container_of(kref, struct nfs4_xattr_entry, ref);
  225. if (WARN_ON(!list_empty(&entry->lru)))
  226. return;
  227. nfs4_xattr_free_entry(entry);
  228. }
  229. static void
  230. nfs4_xattr_free_cache_cb(struct kref *kref)
  231. {
  232. struct nfs4_xattr_cache *cache;
  233. int i;
  234. cache = container_of(kref, struct nfs4_xattr_cache, ref);
  235. for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
  236. if (WARN_ON(!hlist_empty(&cache->buckets[i].hlist)))
  237. return;
  238. cache->buckets[i].draining = false;
  239. }
  240. cache->listxattr = NULL;
  241. kmem_cache_free(nfs4_xattr_cache_cachep, cache);
  242. }
  243. static struct nfs4_xattr_cache *
  244. nfs4_xattr_alloc_cache(void)
  245. {
  246. struct nfs4_xattr_cache *cache;
  247. cache = kmem_cache_alloc(nfs4_xattr_cache_cachep, GFP_KERNEL);
  248. if (cache == NULL)
  249. return NULL;
  250. kref_init(&cache->ref);
  251. atomic_long_set(&cache->nent, 0);
  252. return cache;
  253. }
  254. /*
  255. * Set the listxattr cache, which is a special-cased cache entry.
  256. * The special value ERR_PTR(-ESTALE) is used to indicate that
  257. * the cache is being drained - this prevents a new listxattr
  258. * cache from being added to what is now a stale cache.
  259. */
  260. static int
  261. nfs4_xattr_set_listcache(struct nfs4_xattr_cache *cache,
  262. struct nfs4_xattr_entry *new)
  263. {
  264. struct nfs4_xattr_entry *old;
  265. int ret = 1;
  266. spin_lock(&cache->listxattr_lock);
  267. old = cache->listxattr;
  268. if (old == ERR_PTR(-ESTALE)) {
  269. ret = 0;
  270. goto out;
  271. }
  272. cache->listxattr = new;
  273. if (new != NULL && new != ERR_PTR(-ESTALE))
  274. nfs4_xattr_entry_lru_add(new);
  275. if (old != NULL) {
  276. nfs4_xattr_entry_lru_del(old);
  277. kref_put(&old->ref, nfs4_xattr_free_entry_cb);
  278. }
  279. out:
  280. spin_unlock(&cache->listxattr_lock);
  281. return ret;
  282. }
  283. /*
  284. * Unlink a cache from its parent inode, clearing out an invalid
  285. * cache. Must be called with i_lock held.
  286. */
  287. static struct nfs4_xattr_cache *
  288. nfs4_xattr_cache_unlink(struct inode *inode)
  289. {
  290. struct nfs_inode *nfsi;
  291. struct nfs4_xattr_cache *oldcache;
  292. nfsi = NFS_I(inode);
  293. oldcache = nfsi->xattr_cache;
  294. if (oldcache != NULL) {
  295. list_lru_del_obj(&nfs4_xattr_cache_lru, &oldcache->lru);
  296. oldcache->inode = NULL;
  297. }
  298. nfsi->xattr_cache = NULL;
  299. nfsi->cache_validity &= ~NFS_INO_INVALID_XATTR;
  300. return oldcache;
  301. }
  302. /*
  303. * Discard a cache. Called by get_cache() if there was an old,
  304. * invalid cache. Can also be called from a shrinker callback.
  305. *
  306. * The cache is dead, it has already been unlinked from its inode,
  307. * and no longer appears on the cache LRU list.
  308. *
  309. * Mark all buckets as draining, so that no new entries are added. This
  310. * could still happen in the unlikely, but possible case that another
  311. * thread had grabbed a reference before it was unlinked from the inode,
  312. * and is still holding it for an add operation.
  313. *
  314. * Remove all entries from the LRU lists, so that there is no longer
  315. * any way to 'find' this cache. Then, remove the entries from the hash
  316. * table.
  317. *
  318. * At that point, the cache will remain empty and can be freed when the final
  319. * reference drops, which is very likely the kref_put at the end of
  320. * this function, or the one called immediately afterwards in the
  321. * shrinker callback.
  322. */
  323. static void
  324. nfs4_xattr_discard_cache(struct nfs4_xattr_cache *cache)
  325. {
  326. unsigned int i;
  327. struct nfs4_xattr_entry *entry;
  328. struct nfs4_xattr_bucket *bucket;
  329. struct hlist_node *n;
  330. nfs4_xattr_set_listcache(cache, ERR_PTR(-ESTALE));
  331. for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
  332. bucket = &cache->buckets[i];
  333. spin_lock(&bucket->lock);
  334. bucket->draining = true;
  335. hlist_for_each_entry_safe(entry, n, &bucket->hlist, hnode) {
  336. nfs4_xattr_entry_lru_del(entry);
  337. hlist_del_init(&entry->hnode);
  338. kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
  339. }
  340. spin_unlock(&bucket->lock);
  341. }
  342. atomic_long_set(&cache->nent, 0);
  343. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  344. }
  345. /*
  346. * Get a referenced copy of the cache structure. Avoid doing allocs
  347. * while holding i_lock. Which means that we do some optimistic allocation,
  348. * and might have to free the result in rare cases.
  349. *
  350. * This function only checks the NFS_INO_INVALID_XATTR cache validity bit
  351. * and acts accordingly, replacing the cache when needed. For the read case
  352. * (!add), this means that the caller must make sure that the cache
  353. * is valid before caling this function. getxattr and listxattr call
  354. * revalidate_inode to do this. The attribute cache timeout (for the
  355. * non-delegated case) is expected to be dealt with in the revalidate
  356. * call.
  357. */
  358. static struct nfs4_xattr_cache *
  359. nfs4_xattr_get_cache(struct inode *inode, int add)
  360. {
  361. struct nfs_inode *nfsi;
  362. struct nfs4_xattr_cache *cache, *oldcache, *newcache;
  363. nfsi = NFS_I(inode);
  364. cache = oldcache = NULL;
  365. spin_lock(&inode->i_lock);
  366. if (nfsi->cache_validity & NFS_INO_INVALID_XATTR)
  367. oldcache = nfs4_xattr_cache_unlink(inode);
  368. else
  369. cache = nfsi->xattr_cache;
  370. if (cache != NULL)
  371. kref_get(&cache->ref);
  372. spin_unlock(&inode->i_lock);
  373. if (add && cache == NULL) {
  374. newcache = NULL;
  375. cache = nfs4_xattr_alloc_cache();
  376. if (cache == NULL)
  377. goto out;
  378. spin_lock(&inode->i_lock);
  379. if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) {
  380. /*
  381. * The cache was invalidated again. Give up,
  382. * since what we want to enter is now likely
  383. * outdated anyway.
  384. */
  385. spin_unlock(&inode->i_lock);
  386. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  387. cache = NULL;
  388. goto out;
  389. }
  390. /*
  391. * Check if someone beat us to it.
  392. */
  393. if (nfsi->xattr_cache != NULL) {
  394. newcache = nfsi->xattr_cache;
  395. kref_get(&newcache->ref);
  396. } else {
  397. kref_get(&cache->ref);
  398. nfsi->xattr_cache = cache;
  399. cache->inode = inode;
  400. list_lru_add_obj(&nfs4_xattr_cache_lru, &cache->lru);
  401. }
  402. spin_unlock(&inode->i_lock);
  403. /*
  404. * If there was a race, throw away the cache we just
  405. * allocated, and use the new one allocated by someone
  406. * else.
  407. */
  408. if (newcache != NULL) {
  409. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  410. cache = newcache;
  411. }
  412. }
  413. out:
  414. /*
  415. * Discard the now orphaned old cache.
  416. */
  417. if (oldcache != NULL)
  418. nfs4_xattr_discard_cache(oldcache);
  419. return cache;
  420. }
  421. static inline struct nfs4_xattr_bucket *
  422. nfs4_xattr_hash_bucket(struct nfs4_xattr_cache *cache, const char *name)
  423. {
  424. return &cache->buckets[jhash(name, strlen(name), 0) &
  425. (ARRAY_SIZE(cache->buckets) - 1)];
  426. }
  427. static struct nfs4_xattr_entry *
  428. nfs4_xattr_get_entry(struct nfs4_xattr_bucket *bucket, const char *name)
  429. {
  430. struct nfs4_xattr_entry *entry;
  431. entry = NULL;
  432. hlist_for_each_entry(entry, &bucket->hlist, hnode) {
  433. if (!strcmp(entry->xattr_name, name))
  434. break;
  435. }
  436. return entry;
  437. }
  438. static int
  439. nfs4_xattr_hash_add(struct nfs4_xattr_cache *cache,
  440. struct nfs4_xattr_entry *entry)
  441. {
  442. struct nfs4_xattr_bucket *bucket;
  443. struct nfs4_xattr_entry *oldentry = NULL;
  444. int ret = 1;
  445. bucket = nfs4_xattr_hash_bucket(cache, entry->xattr_name);
  446. entry->bucket = bucket;
  447. spin_lock(&bucket->lock);
  448. if (bucket->draining) {
  449. ret = 0;
  450. goto out;
  451. }
  452. oldentry = nfs4_xattr_get_entry(bucket, entry->xattr_name);
  453. if (oldentry != NULL) {
  454. hlist_del_init(&oldentry->hnode);
  455. nfs4_xattr_entry_lru_del(oldentry);
  456. } else {
  457. atomic_long_inc(&cache->nent);
  458. }
  459. hlist_add_head(&entry->hnode, &bucket->hlist);
  460. nfs4_xattr_entry_lru_add(entry);
  461. out:
  462. spin_unlock(&bucket->lock);
  463. if (oldentry != NULL)
  464. kref_put(&oldentry->ref, nfs4_xattr_free_entry_cb);
  465. return ret;
  466. }
  467. static void
  468. nfs4_xattr_hash_remove(struct nfs4_xattr_cache *cache, const char *name)
  469. {
  470. struct nfs4_xattr_bucket *bucket;
  471. struct nfs4_xattr_entry *entry;
  472. bucket = nfs4_xattr_hash_bucket(cache, name);
  473. spin_lock(&bucket->lock);
  474. entry = nfs4_xattr_get_entry(bucket, name);
  475. if (entry != NULL) {
  476. hlist_del_init(&entry->hnode);
  477. nfs4_xattr_entry_lru_del(entry);
  478. atomic_long_dec(&cache->nent);
  479. }
  480. spin_unlock(&bucket->lock);
  481. if (entry != NULL)
  482. kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
  483. }
  484. static struct nfs4_xattr_entry *
  485. nfs4_xattr_hash_find(struct nfs4_xattr_cache *cache, const char *name)
  486. {
  487. struct nfs4_xattr_bucket *bucket;
  488. struct nfs4_xattr_entry *entry;
  489. bucket = nfs4_xattr_hash_bucket(cache, name);
  490. spin_lock(&bucket->lock);
  491. entry = nfs4_xattr_get_entry(bucket, name);
  492. if (entry != NULL)
  493. kref_get(&entry->ref);
  494. spin_unlock(&bucket->lock);
  495. return entry;
  496. }
  497. /*
  498. * Entry point to retrieve an entry from the cache.
  499. */
  500. ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name, char *buf,
  501. ssize_t buflen)
  502. {
  503. struct nfs4_xattr_cache *cache;
  504. struct nfs4_xattr_entry *entry;
  505. ssize_t ret;
  506. cache = nfs4_xattr_get_cache(inode, 0);
  507. if (cache == NULL)
  508. return -ENOENT;
  509. ret = 0;
  510. entry = nfs4_xattr_hash_find(cache, name);
  511. if (entry != NULL) {
  512. dprintk("%s: cache hit '%s', len %lu\n", __func__,
  513. entry->xattr_name, (unsigned long)entry->xattr_size);
  514. if (buflen == 0) {
  515. /* Length probe only */
  516. ret = entry->xattr_size;
  517. } else if (buflen < entry->xattr_size)
  518. ret = -ERANGE;
  519. else {
  520. memcpy(buf, entry->xattr_value, entry->xattr_size);
  521. ret = entry->xattr_size;
  522. }
  523. kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
  524. } else {
  525. dprintk("%s: cache miss '%s'\n", __func__, name);
  526. ret = -ENOENT;
  527. }
  528. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  529. return ret;
  530. }
  531. /*
  532. * Retrieve a cached list of xattrs from the cache.
  533. */
  534. ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf, ssize_t buflen)
  535. {
  536. struct nfs4_xattr_cache *cache;
  537. struct nfs4_xattr_entry *entry;
  538. ssize_t ret;
  539. cache = nfs4_xattr_get_cache(inode, 0);
  540. if (cache == NULL)
  541. return -ENOENT;
  542. spin_lock(&cache->listxattr_lock);
  543. entry = cache->listxattr;
  544. if (entry != NULL && entry != ERR_PTR(-ESTALE)) {
  545. if (buflen == 0) {
  546. /* Length probe only */
  547. ret = entry->xattr_size;
  548. } else if (entry->xattr_size > buflen)
  549. ret = -ERANGE;
  550. else {
  551. memcpy(buf, entry->xattr_value, entry->xattr_size);
  552. ret = entry->xattr_size;
  553. }
  554. } else {
  555. ret = -ENOENT;
  556. }
  557. spin_unlock(&cache->listxattr_lock);
  558. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  559. return ret;
  560. }
  561. /*
  562. * Add an xattr to the cache.
  563. *
  564. * This also invalidates the xattr list cache.
  565. */
  566. void nfs4_xattr_cache_add(struct inode *inode, const char *name,
  567. const char *buf, struct page **pages, ssize_t buflen)
  568. {
  569. struct nfs4_xattr_cache *cache;
  570. struct nfs4_xattr_entry *entry;
  571. dprintk("%s: add '%s' len %lu\n", __func__,
  572. name, (unsigned long)buflen);
  573. cache = nfs4_xattr_get_cache(inode, 1);
  574. if (cache == NULL)
  575. return;
  576. entry = nfs4_xattr_alloc_entry(name, buf, pages, buflen);
  577. if (entry == NULL)
  578. goto out;
  579. (void)nfs4_xattr_set_listcache(cache, NULL);
  580. if (!nfs4_xattr_hash_add(cache, entry))
  581. kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
  582. out:
  583. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  584. }
  585. /*
  586. * Remove an xattr from the cache.
  587. *
  588. * This also invalidates the xattr list cache.
  589. */
  590. void nfs4_xattr_cache_remove(struct inode *inode, const char *name)
  591. {
  592. struct nfs4_xattr_cache *cache;
  593. dprintk("%s: remove '%s'\n", __func__, name);
  594. cache = nfs4_xattr_get_cache(inode, 0);
  595. if (cache == NULL)
  596. return;
  597. (void)nfs4_xattr_set_listcache(cache, NULL);
  598. nfs4_xattr_hash_remove(cache, name);
  599. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  600. }
  601. /*
  602. * Cache listxattr output, replacing any possible old one.
  603. */
  604. void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf,
  605. ssize_t buflen)
  606. {
  607. struct nfs4_xattr_cache *cache;
  608. struct nfs4_xattr_entry *entry;
  609. cache = nfs4_xattr_get_cache(inode, 1);
  610. if (cache == NULL)
  611. return;
  612. entry = nfs4_xattr_alloc_entry(NULL, buf, NULL, buflen);
  613. if (entry == NULL)
  614. goto out;
  615. /*
  616. * This is just there to be able to get to bucket->cache,
  617. * which is obviously the same for all buckets, so just
  618. * use bucket 0.
  619. */
  620. entry->bucket = &cache->buckets[0];
  621. if (!nfs4_xattr_set_listcache(cache, entry))
  622. kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
  623. out:
  624. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  625. }
  626. /*
  627. * Zap the entire cache. Called when an inode is evicted.
  628. */
  629. void nfs4_xattr_cache_zap(struct inode *inode)
  630. {
  631. struct nfs4_xattr_cache *oldcache;
  632. spin_lock(&inode->i_lock);
  633. oldcache = nfs4_xattr_cache_unlink(inode);
  634. spin_unlock(&inode->i_lock);
  635. if (oldcache)
  636. nfs4_xattr_discard_cache(oldcache);
  637. }
  638. /*
  639. * The entry LRU is shrunk more aggressively than the cache LRU,
  640. * by settings @seeks to 1.
  641. *
  642. * Cache structures are freed only when they've become empty, after
  643. * pruning all but one entry.
  644. */
  645. static unsigned long nfs4_xattr_cache_count(struct shrinker *shrink,
  646. struct shrink_control *sc);
  647. static unsigned long nfs4_xattr_entry_count(struct shrinker *shrink,
  648. struct shrink_control *sc);
  649. static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink,
  650. struct shrink_control *sc);
  651. static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink,
  652. struct shrink_control *sc);
  653. static struct shrinker *nfs4_xattr_cache_shrinker;
  654. static struct shrinker *nfs4_xattr_entry_shrinker;
  655. static struct shrinker *nfs4_xattr_large_entry_shrinker;
  656. static enum lru_status
  657. cache_lru_isolate(struct list_head *item,
  658. struct list_lru_one *lru, void *arg)
  659. {
  660. struct list_head *dispose = arg;
  661. struct inode *inode;
  662. struct nfs4_xattr_cache *cache = container_of(item,
  663. struct nfs4_xattr_cache, lru);
  664. if (atomic_long_read(&cache->nent) > 1)
  665. return LRU_SKIP;
  666. /*
  667. * If a cache structure is on the LRU list, we know that
  668. * its inode is valid. Try to lock it to break the link.
  669. * Since we're inverting the lock order here, only try.
  670. */
  671. inode = cache->inode;
  672. if (!spin_trylock(&inode->i_lock))
  673. return LRU_SKIP;
  674. kref_get(&cache->ref);
  675. cache->inode = NULL;
  676. NFS_I(inode)->xattr_cache = NULL;
  677. NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_XATTR;
  678. list_lru_isolate(lru, &cache->lru);
  679. spin_unlock(&inode->i_lock);
  680. list_add_tail(&cache->dispose, dispose);
  681. return LRU_REMOVED;
  682. }
  683. static unsigned long
  684. nfs4_xattr_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
  685. {
  686. LIST_HEAD(dispose);
  687. unsigned long freed;
  688. struct nfs4_xattr_cache *cache;
  689. freed = list_lru_shrink_walk(&nfs4_xattr_cache_lru, sc,
  690. cache_lru_isolate, &dispose);
  691. while (!list_empty(&dispose)) {
  692. cache = list_first_entry(&dispose, struct nfs4_xattr_cache,
  693. dispose);
  694. list_del_init(&cache->dispose);
  695. nfs4_xattr_discard_cache(cache);
  696. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  697. }
  698. return freed;
  699. }
  700. static unsigned long
  701. nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc)
  702. {
  703. unsigned long count;
  704. count = list_lru_shrink_count(&nfs4_xattr_cache_lru, sc);
  705. return vfs_pressure_ratio(count);
  706. }
  707. static enum lru_status
  708. entry_lru_isolate(struct list_head *item,
  709. struct list_lru_one *lru, void *arg)
  710. {
  711. struct list_head *dispose = arg;
  712. struct nfs4_xattr_bucket *bucket;
  713. struct nfs4_xattr_cache *cache;
  714. struct nfs4_xattr_entry *entry = container_of(item,
  715. struct nfs4_xattr_entry, lru);
  716. bucket = entry->bucket;
  717. cache = bucket->cache;
  718. /*
  719. * Unhook the entry from its parent (either a cache bucket
  720. * or a cache structure if it's a listxattr buf), so that
  721. * it's no longer found. Then add it to the isolate list,
  722. * to be freed later.
  723. *
  724. * In both cases, we're reverting lock order, so use
  725. * trylock and skip the entry if we can't get the lock.
  726. */
  727. if (entry->xattr_name != NULL) {
  728. /* Regular cache entry */
  729. if (!spin_trylock(&bucket->lock))
  730. return LRU_SKIP;
  731. kref_get(&entry->ref);
  732. hlist_del_init(&entry->hnode);
  733. atomic_long_dec(&cache->nent);
  734. list_lru_isolate(lru, &entry->lru);
  735. spin_unlock(&bucket->lock);
  736. } else {
  737. /* Listxattr cache entry */
  738. if (!spin_trylock(&cache->listxattr_lock))
  739. return LRU_SKIP;
  740. kref_get(&entry->ref);
  741. cache->listxattr = NULL;
  742. list_lru_isolate(lru, &entry->lru);
  743. spin_unlock(&cache->listxattr_lock);
  744. }
  745. list_add_tail(&entry->dispose, dispose);
  746. return LRU_REMOVED;
  747. }
  748. static unsigned long
  749. nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc)
  750. {
  751. LIST_HEAD(dispose);
  752. unsigned long freed;
  753. struct nfs4_xattr_entry *entry;
  754. struct list_lru *lru;
  755. lru = (shrink == nfs4_xattr_large_entry_shrinker) ?
  756. &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
  757. freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose);
  758. while (!list_empty(&dispose)) {
  759. entry = list_first_entry(&dispose, struct nfs4_xattr_entry,
  760. dispose);
  761. list_del_init(&entry->dispose);
  762. /*
  763. * Drop two references: the one that we just grabbed
  764. * in entry_lru_isolate, and the one that was set
  765. * when the entry was first allocated.
  766. */
  767. kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
  768. kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
  769. }
  770. return freed;
  771. }
  772. static unsigned long
  773. nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc)
  774. {
  775. unsigned long count;
  776. struct list_lru *lru;
  777. lru = (shrink == nfs4_xattr_large_entry_shrinker) ?
  778. &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
  779. count = list_lru_shrink_count(lru, sc);
  780. return vfs_pressure_ratio(count);
  781. }
  782. static void nfs4_xattr_cache_init_once(void *p)
  783. {
  784. struct nfs4_xattr_cache *cache = p;
  785. spin_lock_init(&cache->listxattr_lock);
  786. atomic_long_set(&cache->nent, 0);
  787. nfs4_xattr_hash_init(cache);
  788. cache->listxattr = NULL;
  789. INIT_LIST_HEAD(&cache->lru);
  790. INIT_LIST_HEAD(&cache->dispose);
  791. }
  792. typedef unsigned long (*count_objects_cb)(struct shrinker *s,
  793. struct shrink_control *sc);
  794. typedef unsigned long (*scan_objects_cb)(struct shrinker *s,
  795. struct shrink_control *sc);
  796. static int __init nfs4_xattr_shrinker_init(struct shrinker **shrinker,
  797. struct list_lru *lru, const char *name,
  798. count_objects_cb count,
  799. scan_objects_cb scan, long batch, int seeks)
  800. {
  801. int ret;
  802. *shrinker = shrinker_alloc(SHRINKER_MEMCG_AWARE, name);
  803. if (!*shrinker)
  804. return -ENOMEM;
  805. ret = list_lru_init_memcg(lru, *shrinker);
  806. if (ret) {
  807. shrinker_free(*shrinker);
  808. return ret;
  809. }
  810. (*shrinker)->count_objects = count;
  811. (*shrinker)->scan_objects = scan;
  812. (*shrinker)->batch = batch;
  813. (*shrinker)->seeks = seeks;
  814. shrinker_register(*shrinker);
  815. return ret;
  816. }
  817. static void nfs4_xattr_shrinker_destroy(struct shrinker *shrinker,
  818. struct list_lru *lru)
  819. {
  820. shrinker_free(shrinker);
  821. list_lru_destroy(lru);
  822. }
  823. int __init nfs4_xattr_cache_init(void)
  824. {
  825. int ret = 0;
  826. nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache",
  827. sizeof(struct nfs4_xattr_cache), 0,
  828. (SLAB_RECLAIM_ACCOUNT),
  829. nfs4_xattr_cache_init_once);
  830. if (nfs4_xattr_cache_cachep == NULL)
  831. return -ENOMEM;
  832. ret = nfs4_xattr_shrinker_init(&nfs4_xattr_cache_shrinker,
  833. &nfs4_xattr_cache_lru, "nfs-xattr_cache",
  834. nfs4_xattr_cache_count,
  835. nfs4_xattr_cache_scan, 0, DEFAULT_SEEKS);
  836. if (ret)
  837. goto out1;
  838. ret = nfs4_xattr_shrinker_init(&nfs4_xattr_entry_shrinker,
  839. &nfs4_xattr_entry_lru, "nfs-xattr_entry",
  840. nfs4_xattr_entry_count,
  841. nfs4_xattr_entry_scan, 512, DEFAULT_SEEKS);
  842. if (ret)
  843. goto out2;
  844. ret = nfs4_xattr_shrinker_init(&nfs4_xattr_large_entry_shrinker,
  845. &nfs4_xattr_large_entry_lru,
  846. "nfs-xattr_large_entry",
  847. nfs4_xattr_entry_count,
  848. nfs4_xattr_entry_scan, 512, 1);
  849. if (!ret)
  850. return 0;
  851. nfs4_xattr_shrinker_destroy(nfs4_xattr_entry_shrinker,
  852. &nfs4_xattr_entry_lru);
  853. out2:
  854. nfs4_xattr_shrinker_destroy(nfs4_xattr_cache_shrinker,
  855. &nfs4_xattr_cache_lru);
  856. out1:
  857. kmem_cache_destroy(nfs4_xattr_cache_cachep);
  858. return ret;
  859. }
  860. void nfs4_xattr_cache_exit(void)
  861. {
  862. nfs4_xattr_shrinker_destroy(nfs4_xattr_large_entry_shrinker,
  863. &nfs4_xattr_large_entry_lru);
  864. nfs4_xattr_shrinker_destroy(nfs4_xattr_entry_shrinker,
  865. &nfs4_xattr_entry_lru);
  866. nfs4_xattr_shrinker_destroy(nfs4_xattr_cache_shrinker,
  867. &nfs4_xattr_cache_lru);
  868. kmem_cache_destroy(nfs4_xattr_cache_cachep);
  869. }