vl_rotate.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /* Handle vlserver selection and rotation.
  3. *
  4. * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
  5. * Written by David Howells (dhowells@redhat.com)
  6. */
  7. #include <linux/kernel.h>
  8. #include <linux/sched.h>
  9. #include <linux/sched/signal.h>
  10. #include "internal.h"
  11. #include "afs_vl.h"
  12. /*
  13. * Begin an operation on a volume location server.
  14. */
  15. bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
  16. struct key *key)
  17. {
  18. static atomic_t debug_ids;
  19. memset(vc, 0, sizeof(*vc));
  20. vc->cell = cell;
  21. vc->key = key;
  22. vc->cumul_error.error = -EDESTADDRREQ;
  23. vc->nr_iterations = -1;
  24. if (signal_pending(current)) {
  25. vc->cumul_error.error = -EINTR;
  26. vc->flags |= AFS_VL_CURSOR_STOP;
  27. return false;
  28. }
  29. vc->debug_id = atomic_inc_return(&debug_ids);
  30. return true;
  31. }
  32. /*
  33. * Begin iteration through a server list, starting with the last used server if
  34. * possible, or the last recorded good server if not.
  35. */
  36. static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
  37. {
  38. struct afs_cell *cell = vc->cell;
  39. unsigned int dns_lookup_count;
  40. if (cell->dns_source == DNS_RECORD_UNAVAILABLE ||
  41. cell->dns_expiry <= ktime_get_real_seconds()) {
  42. dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
  43. set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
  44. afs_queue_cell(cell, afs_cell_trace_queue_dns);
  45. if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
  46. if (wait_var_event_interruptible(
  47. &cell->dns_lookup_count,
  48. smp_load_acquire(&cell->dns_lookup_count)
  49. != dns_lookup_count) < 0) {
  50. vc->cumul_error.error = -ERESTARTSYS;
  51. return false;
  52. }
  53. }
  54. /* Status load is ordered after lookup counter load */
  55. if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) {
  56. pr_warn("No record of cell %s\n", cell->name);
  57. vc->cumul_error.error = -ENOENT;
  58. return false;
  59. }
  60. if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
  61. vc->cumul_error.error = -EDESTADDRREQ;
  62. return false;
  63. }
  64. }
  65. read_lock(&cell->vl_servers_lock);
  66. vc->server_list = afs_get_vlserverlist(
  67. rcu_dereference_protected(cell->vl_servers,
  68. lockdep_is_held(&cell->vl_servers_lock)));
  69. read_unlock(&cell->vl_servers_lock);
  70. if (!vc->server_list->nr_servers)
  71. return false;
  72. vc->untried_servers = (1UL << vc->server_list->nr_servers) - 1;
  73. vc->server_index = -1;
  74. return true;
  75. }
  76. /*
  77. * Select the vlserver to use. May be called multiple times to rotate
  78. * through the vlservers.
  79. */
  80. bool afs_select_vlserver(struct afs_vl_cursor *vc)
  81. {
  82. struct afs_addr_list *alist = vc->alist;
  83. struct afs_vlserver *vlserver;
  84. unsigned long set, failed;
  85. unsigned int rtt;
  86. s32 abort_code = vc->call_abort_code;
  87. int error = vc->call_error, i;
  88. vc->nr_iterations++;
  89. _enter("VC=%x+%x,%d{%lx},%d{%lx},%d,%d",
  90. vc->debug_id, vc->nr_iterations, vc->server_index, vc->untried_servers,
  91. vc->addr_index, vc->addr_tried,
  92. error, abort_code);
  93. if (vc->flags & AFS_VL_CURSOR_STOP) {
  94. _leave(" = f [stopped]");
  95. return false;
  96. }
  97. if (vc->nr_iterations == 0)
  98. goto start;
  99. WRITE_ONCE(alist->addrs[vc->addr_index].last_error, error);
  100. /* Evaluate the result of the previous operation, if there was one. */
  101. switch (error) {
  102. default:
  103. case 0:
  104. /* Success or local failure. Stop. */
  105. vc->cumul_error.error = error;
  106. vc->flags |= AFS_VL_CURSOR_STOP;
  107. _leave(" = f [okay/local %d]", vc->cumul_error.error);
  108. return false;
  109. case -ECONNABORTED:
  110. /* The far side rejected the operation on some grounds. This
  111. * might involve the server being busy or the volume having been moved.
  112. */
  113. switch (abort_code) {
  114. case AFSVL_IO:
  115. case AFSVL_BADVOLOPER:
  116. case AFSVL_NOMEM:
  117. /* The server went weird. */
  118. afs_prioritise_error(&vc->cumul_error, -EREMOTEIO, abort_code);
  119. //write_lock(&vc->cell->vl_servers_lock);
  120. //vc->server_list->weird_mask |= 1 << vc->server_index;
  121. //write_unlock(&vc->cell->vl_servers_lock);
  122. goto next_server;
  123. default:
  124. afs_prioritise_error(&vc->cumul_error, error, abort_code);
  125. goto failed;
  126. }
  127. case -ERFKILL:
  128. case -EADDRNOTAVAIL:
  129. case -ENETUNREACH:
  130. case -EHOSTUNREACH:
  131. case -EHOSTDOWN:
  132. case -ECONNREFUSED:
  133. case -ETIMEDOUT:
  134. case -ETIME:
  135. _debug("no conn %d", error);
  136. afs_prioritise_error(&vc->cumul_error, error, 0);
  137. goto iterate_address;
  138. case -ECONNRESET:
  139. _debug("call reset");
  140. afs_prioritise_error(&vc->cumul_error, error, 0);
  141. vc->flags |= AFS_VL_CURSOR_RETRY;
  142. goto next_server;
  143. case -EOPNOTSUPP:
  144. _debug("notsupp");
  145. goto next_server;
  146. }
  147. restart_from_beginning:
  148. _debug("restart");
  149. if (vc->call_responded &&
  150. vc->addr_index != vc->alist->preferred &&
  151. test_bit(alist->preferred, &vc->addr_tried))
  152. WRITE_ONCE(alist->preferred, vc->addr_index);
  153. afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_restart);
  154. alist = vc->alist = NULL;
  155. afs_put_vlserverlist(vc->cell->net, vc->server_list);
  156. vc->server_list = NULL;
  157. if (vc->flags & AFS_VL_CURSOR_RETRIED)
  158. goto failed;
  159. vc->flags |= AFS_VL_CURSOR_RETRIED;
  160. start:
  161. _debug("start");
  162. ASSERTCMP(alist, ==, NULL);
  163. if (!afs_start_vl_iteration(vc))
  164. goto failed;
  165. error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
  166. if (error < 0) {
  167. afs_prioritise_error(&vc->cumul_error, error, 0);
  168. goto failed;
  169. }
  170. pick_server:
  171. _debug("pick [%lx]", vc->untried_servers);
  172. ASSERTCMP(alist, ==, NULL);
  173. error = afs_wait_for_vl_probes(vc->server_list, vc->untried_servers);
  174. if (error < 0) {
  175. afs_prioritise_error(&vc->cumul_error, error, 0);
  176. goto failed;
  177. }
  178. /* Pick the untried server with the lowest RTT. */
  179. vc->server_index = vc->server_list->preferred;
  180. if (test_bit(vc->server_index, &vc->untried_servers))
  181. goto selected_server;
  182. vc->server_index = -1;
  183. rtt = UINT_MAX;
  184. for (i = 0; i < vc->server_list->nr_servers; i++) {
  185. struct afs_vlserver *s = vc->server_list->servers[i].server;
  186. if (!test_bit(i, &vc->untried_servers) ||
  187. !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
  188. continue;
  189. if (s->probe.rtt <= rtt) {
  190. vc->server_index = i;
  191. rtt = s->probe.rtt;
  192. }
  193. }
  194. if (vc->server_index == -1)
  195. goto no_more_servers;
  196. selected_server:
  197. _debug("use %d", vc->server_index);
  198. __clear_bit(vc->server_index, &vc->untried_servers);
  199. /* We're starting on a different vlserver from the list. We need to
  200. * check it, find its address list and probe its capabilities before we
  201. * use it.
  202. */
  203. vlserver = vc->server_list->servers[vc->server_index].server;
  204. vc->server = vlserver;
  205. _debug("USING VLSERVER: %s", vlserver->name);
  206. read_lock(&vlserver->lock);
  207. alist = rcu_dereference_protected(vlserver->addresses,
  208. lockdep_is_held(&vlserver->lock));
  209. vc->alist = afs_get_addrlist(alist, afs_alist_trace_get_vlrotate_set);
  210. read_unlock(&vlserver->lock);
  211. vc->addr_tried = 0;
  212. vc->addr_index = -1;
  213. iterate_address:
  214. /* Iterate over the current server's address list to try and find an
  215. * address on which it will respond to us.
  216. */
  217. set = READ_ONCE(alist->responded);
  218. failed = READ_ONCE(alist->probe_failed);
  219. vc->addr_index = READ_ONCE(alist->preferred);
  220. _debug("%lx-%lx-%lx,%d", set, failed, vc->addr_tried, vc->addr_index);
  221. set &= ~(failed | vc->addr_tried);
  222. if (!set)
  223. goto next_server;
  224. if (!test_bit(vc->addr_index, &set))
  225. vc->addr_index = __ffs(set);
  226. set_bit(vc->addr_index, &vc->addr_tried);
  227. vc->alist = alist;
  228. _debug("VL address %d/%d", vc->addr_index, alist->nr_addrs);
  229. vc->call_responded = false;
  230. _leave(" = t %pISpc", rxrpc_kernel_remote_addr(alist->addrs[vc->addr_index].peer));
  231. return true;
  232. next_server:
  233. _debug("next");
  234. ASSERT(alist);
  235. if (vc->call_responded &&
  236. vc->addr_index != alist->preferred &&
  237. test_bit(alist->preferred, &vc->addr_tried))
  238. WRITE_ONCE(alist->preferred, vc->addr_index);
  239. afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_next);
  240. alist = vc->alist = NULL;
  241. goto pick_server;
  242. no_more_servers:
  243. /* That's all the servers poked to no good effect. Try again if some
  244. * of them were busy.
  245. */
  246. if (vc->flags & AFS_VL_CURSOR_RETRY)
  247. goto restart_from_beginning;
  248. for (i = 0; i < vc->server_list->nr_servers; i++) {
  249. struct afs_vlserver *s = vc->server_list->servers[i].server;
  250. if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
  251. vc->cumul_error.responded = true;
  252. afs_prioritise_error(&vc->cumul_error, READ_ONCE(s->probe.error),
  253. s->probe.abort_code);
  254. }
  255. failed:
  256. if (alist) {
  257. if (vc->call_responded &&
  258. vc->addr_index != alist->preferred &&
  259. test_bit(alist->preferred, &vc->addr_tried))
  260. WRITE_ONCE(alist->preferred, vc->addr_index);
  261. afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_fail);
  262. alist = vc->alist = NULL;
  263. }
  264. vc->flags |= AFS_VL_CURSOR_STOP;
  265. _leave(" = f [failed %d]", vc->cumul_error.error);
  266. return false;
  267. }
  268. /*
  269. * Dump cursor state in the case of the error being EDESTADDRREQ.
  270. */
  271. static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
  272. {
  273. struct afs_cell *cell = vc->cell;
  274. static int count;
  275. int i;
  276. if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
  277. return;
  278. count++;
  279. rcu_read_lock();
  280. pr_notice("EDESTADDR occurred\n");
  281. pr_notice("CELL: %s err=%d\n", cell->name, cell->error);
  282. pr_notice("DNS: src=%u st=%u lc=%x\n",
  283. cell->dns_source, cell->dns_status, cell->dns_lookup_count);
  284. pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
  285. vc->untried_servers, vc->server_index, vc->nr_iterations,
  286. vc->flags, vc->cumul_error.error);
  287. pr_notice("VC: call er=%d ac=%d r=%u\n",
  288. vc->call_error, vc->call_abort_code, vc->call_responded);
  289. if (vc->server_list) {
  290. const struct afs_vlserver_list *sl = vc->server_list;
  291. pr_notice("VC: SL nr=%u ix=%u\n",
  292. sl->nr_servers, sl->index);
  293. for (i = 0; i < sl->nr_servers; i++) {
  294. const struct afs_vlserver *s = sl->servers[i].server;
  295. pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
  296. s->name, s->port, s->flags, s->probe.error);
  297. if (s->addresses) {
  298. const struct afs_addr_list *a =
  299. rcu_dereference(s->addresses);
  300. pr_notice("VC: - nr=%u/%u/%u pf=%u\n",
  301. a->nr_ipv4, a->nr_addrs, a->max_addrs,
  302. a->preferred);
  303. pr_notice("VC: - R=%lx F=%lx\n",
  304. a->responded, a->probe_failed);
  305. if (a == vc->alist)
  306. pr_notice("VC: - current\n");
  307. }
  308. }
  309. }
  310. pr_notice("AC: t=%lx ax=%u\n", vc->addr_tried, vc->addr_index);
  311. rcu_read_unlock();
  312. }
  313. /*
  314. * Tidy up a volume location server cursor and unlock the vnode.
  315. */
  316. int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
  317. {
  318. struct afs_net *net = vc->cell->net;
  319. _enter("VC=%x+%x", vc->debug_id, vc->nr_iterations);
  320. switch (vc->cumul_error.error) {
  321. case -EDESTADDRREQ:
  322. case -EADDRNOTAVAIL:
  323. case -ENETUNREACH:
  324. case -EHOSTUNREACH:
  325. afs_vl_dump_edestaddrreq(vc);
  326. break;
  327. }
  328. if (vc->alist) {
  329. if (vc->call_responded &&
  330. vc->addr_index != vc->alist->preferred &&
  331. test_bit(vc->alist->preferred, &vc->addr_tried))
  332. WRITE_ONCE(vc->alist->preferred, vc->addr_index);
  333. afs_put_addrlist(vc->alist, afs_alist_trace_put_vlrotate_end);
  334. vc->alist = NULL;
  335. }
  336. afs_put_vlserverlist(net, vc->server_list);
  337. return vc->cumul_error.error;
  338. }