page.c 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. /*
  2. * Copyright (c) 2006 Oracle. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. *
  32. */
  33. #include <linux/highmem.h>
  34. #include <linux/gfp.h>
  35. #include <linux/cpu.h>
  36. #include <linux/export.h>
  37. #include "rds.h"
  38. struct rds_page_remainder {
  39. struct page *r_page;
  40. unsigned long r_offset;
  41. local_lock_t bh_lock;
  42. };
  43. static DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders) = {
  44. .bh_lock = INIT_LOCAL_LOCK(bh_lock),
  45. };
  46. /**
  47. * rds_page_remainder_alloc - build up regions of a message.
  48. *
  49. * @scat: Scatter list for message
  50. * @bytes: the number of bytes needed.
  51. * @gfp: the waiting behaviour of the allocation
  52. *
  53. * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to
  54. * kmap the pages, etc.
  55. *
  56. * If @bytes is at least a full page then this just returns a page from
  57. * alloc_page().
  58. *
  59. * If @bytes is a partial page then this stores the unused region of the
  60. * page in a per-cpu structure. Future partial-page allocations may be
  61. * satisfied from that cached region. This lets us waste less memory on
  62. * small allocations with minimal complexity. It works because the transmit
  63. * path passes read-only page regions down to devices. They hold a page
  64. * reference until they are done with the region.
  65. */
  66. int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
  67. gfp_t gfp)
  68. {
  69. struct rds_page_remainder *rem;
  70. struct page *page;
  71. int ret;
  72. gfp |= __GFP_HIGHMEM;
  73. /* jump straight to allocation if we're trying for a huge page */
  74. if (bytes >= PAGE_SIZE) {
  75. page = alloc_page(gfp);
  76. if (!page) {
  77. ret = -ENOMEM;
  78. } else {
  79. sg_set_page(scat, page, PAGE_SIZE, 0);
  80. ret = 0;
  81. }
  82. goto out;
  83. }
  84. local_bh_disable();
  85. local_lock_nested_bh(&rds_page_remainders.bh_lock);
  86. rem = this_cpu_ptr(&rds_page_remainders);
  87. while (1) {
  88. /* avoid a tiny region getting stuck by tossing it */
  89. if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) {
  90. rds_stats_inc(s_page_remainder_miss);
  91. __free_page(rem->r_page);
  92. rem->r_page = NULL;
  93. }
  94. /* hand out a fragment from the cached page */
  95. if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) {
  96. sg_set_page(scat, rem->r_page, bytes, rem->r_offset);
  97. get_page(sg_page(scat));
  98. if (rem->r_offset != 0)
  99. rds_stats_inc(s_page_remainder_hit);
  100. rem->r_offset += ALIGN(bytes, 8);
  101. if (rem->r_offset >= PAGE_SIZE) {
  102. __free_page(rem->r_page);
  103. rem->r_page = NULL;
  104. }
  105. ret = 0;
  106. break;
  107. }
  108. /* alloc if there is nothing for us to use */
  109. local_unlock_nested_bh(&rds_page_remainders.bh_lock);
  110. local_bh_enable();
  111. page = alloc_page(gfp);
  112. local_bh_disable();
  113. local_lock_nested_bh(&rds_page_remainders.bh_lock);
  114. rem = this_cpu_ptr(&rds_page_remainders);
  115. if (!page) {
  116. ret = -ENOMEM;
  117. break;
  118. }
  119. /* did someone race to fill the remainder before us? */
  120. if (rem->r_page) {
  121. __free_page(page);
  122. continue;
  123. }
  124. /* otherwise install our page and loop around to alloc */
  125. rem->r_page = page;
  126. rem->r_offset = 0;
  127. }
  128. local_unlock_nested_bh(&rds_page_remainders.bh_lock);
  129. local_bh_enable();
  130. out:
  131. rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret,
  132. ret ? NULL : sg_page(scat), ret ? 0 : scat->offset,
  133. ret ? 0 : scat->length);
  134. return ret;
  135. }
  136. EXPORT_SYMBOL_GPL(rds_page_remainder_alloc);
  137. void rds_page_exit(void)
  138. {
  139. unsigned int cpu;
  140. for_each_possible_cpu(cpu) {
  141. struct rds_page_remainder *rem;
  142. rem = &per_cpu(rds_page_remainders, cpu);
  143. rdsdebug("cpu %u\n", cpu);
  144. if (rem->r_page)
  145. __free_page(rem->r_page);
  146. rem->r_page = NULL;
  147. }
  148. }