rotate.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /* Handle fileserver selection and rotation.
  3. *
  4. * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
  5. * Written by David Howells (dhowells@redhat.com)
  6. */
  7. #include <linux/kernel.h>
  8. #include <linux/slab.h>
  9. #include <linux/fs.h>
  10. #include <linux/sched.h>
  11. #include <linux/delay.h>
  12. #include <linux/sched/signal.h>
  13. #include "internal.h"
  14. #include "afs_fs.h"
  15. #include "protocol_uae.h"
  16. void afs_clear_server_states(struct afs_operation *op)
  17. {
  18. unsigned int i;
  19. if (op->server_states) {
  20. for (i = 0; i < op->server_list->nr_servers; i++)
  21. afs_put_endpoint_state(op->server_states[i].endpoint_state,
  22. afs_estate_trace_put_server_state);
  23. kfree(op->server_states);
  24. }
  25. }
  26. /*
  27. * Begin iteration through a server list, starting with the vnode's last used
  28. * server if possible, or the last recorded good server if not.
  29. */
  30. static bool afs_start_fs_iteration(struct afs_operation *op,
  31. struct afs_vnode *vnode)
  32. {
  33. struct afs_server *server;
  34. void *cb_server;
  35. int i;
  36. trace_afs_rotate(op, afs_rotate_trace_start, 0);
  37. read_lock(&op->volume->servers_lock);
  38. op->server_list = afs_get_serverlist(
  39. rcu_dereference_protected(op->volume->servers,
  40. lockdep_is_held(&op->volume->servers_lock)));
  41. read_unlock(&op->volume->servers_lock);
  42. op->server_states = kzalloc_objs(op->server_states[0],
  43. op->server_list->nr_servers);
  44. if (!op->server_states) {
  45. afs_op_nomem(op);
  46. trace_afs_rotate(op, afs_rotate_trace_nomem, 0);
  47. return false;
  48. }
  49. rcu_read_lock();
  50. for (i = 0; i < op->server_list->nr_servers; i++) {
  51. struct afs_endpoint_state *estate;
  52. struct afs_server_state *s = &op->server_states[i];
  53. server = op->server_list->servers[i].server;
  54. estate = rcu_dereference(server->endpoint_state);
  55. s->endpoint_state = afs_get_endpoint_state(estate,
  56. afs_estate_trace_get_server_state);
  57. s->probe_seq = estate->probe_seq;
  58. s->untried_addrs = (1UL << estate->addresses->nr_addrs) - 1;
  59. init_waitqueue_entry(&s->probe_waiter, current);
  60. afs_get_address_preferences(op->net, estate->addresses);
  61. }
  62. rcu_read_unlock();
  63. op->untried_servers = (1UL << op->server_list->nr_servers) - 1;
  64. op->server_index = -1;
  65. cb_server = vnode->cb_server;
  66. if (cb_server) {
  67. /* See if the vnode's preferred record is still available */
  68. for (i = 0; i < op->server_list->nr_servers; i++) {
  69. server = op->server_list->servers[i].server;
  70. if (server == cb_server) {
  71. op->server_index = i;
  72. goto found_interest;
  73. }
  74. }
  75. /* If we have a lock outstanding on a server that's no longer
  76. * serving this vnode, then we can't switch to another server
  77. * and have to return an error.
  78. */
  79. if (op->flags & AFS_OPERATION_CUR_ONLY) {
  80. afs_op_set_error(op, -ESTALE);
  81. trace_afs_rotate(op, afs_rotate_trace_stale_lock, 0);
  82. return false;
  83. }
  84. /* Note that the callback promise is effectively broken */
  85. write_seqlock(&vnode->cb_lock);
  86. ASSERTCMP(cb_server, ==, vnode->cb_server);
  87. vnode->cb_server = NULL;
  88. if (afs_clear_cb_promise(vnode, afs_cb_promise_clear_rotate_server))
  89. vnode->cb_break++;
  90. write_sequnlock(&vnode->cb_lock);
  91. }
  92. found_interest:
  93. return true;
  94. }
  95. /*
  96. * Post volume busy note.
  97. */
  98. static void afs_busy(struct afs_operation *op, u32 abort_code)
  99. {
  100. const char *m;
  101. switch (abort_code) {
  102. case VOFFLINE: m = "offline"; break;
  103. case VRESTARTING: m = "restarting"; break;
  104. case VSALVAGING: m = "being salvaged"; break;
  105. default: m = "busy"; break;
  106. }
  107. pr_notice("kAFS: Volume %llu '%s' on server %pU is %s\n",
  108. op->volume->vid, op->volume->name, &op->server->uuid, m);
  109. }
  110. /*
  111. * Sleep and retry the operation to the same fileserver.
  112. */
  113. static bool afs_sleep_and_retry(struct afs_operation *op)
  114. {
  115. trace_afs_rotate(op, afs_rotate_trace_busy_sleep, 0);
  116. if (!(op->flags & AFS_OPERATION_UNINTR)) {
  117. msleep_interruptible(1000);
  118. if (signal_pending(current)) {
  119. afs_op_set_error(op, -ERESTARTSYS);
  120. return false;
  121. }
  122. } else {
  123. msleep(1000);
  124. }
  125. return true;
  126. }
  127. /*
  128. * Select the fileserver to use. May be called multiple times to rotate
  129. * through the fileservers.
  130. */
  131. bool afs_select_fileserver(struct afs_operation *op)
  132. {
  133. struct afs_addr_list *alist;
  134. struct afs_server *server;
  135. struct afs_vnode *vnode = op->file[0].vnode;
  136. unsigned long set, failed;
  137. s32 abort_code = op->call_abort_code;
  138. int best_prio = 0;
  139. int error = op->call_error, addr_index, i, j;
  140. op->nr_iterations++;
  141. _enter("OP=%x+%x,%llx,%u{%lx},%u{%lx},%d,%d",
  142. op->debug_id, op->nr_iterations, op->volume->vid,
  143. op->server_index, op->untried_servers,
  144. op->addr_index, op->addr_tried,
  145. error, abort_code);
  146. if (op->flags & AFS_OPERATION_STOP) {
  147. trace_afs_rotate(op, afs_rotate_trace_stopped, 0);
  148. _leave(" = f [stopped]");
  149. return false;
  150. }
  151. if (op->nr_iterations == 0)
  152. goto start;
  153. WRITE_ONCE(op->estate->addresses->addrs[op->addr_index].last_error, error);
  154. trace_afs_rotate(op, afs_rotate_trace_iter, op->call_error);
  155. /* Evaluate the result of the previous operation, if there was one. */
  156. switch (op->call_error) {
  157. case 0:
  158. clear_bit(AFS_SE_VOLUME_OFFLINE,
  159. &op->server_list->servers[op->server_index].flags);
  160. clear_bit(AFS_SE_VOLUME_BUSY,
  161. &op->server_list->servers[op->server_index].flags);
  162. op->cumul_error.responded = true;
  163. /* We succeeded, but we may need to redo the op from another
  164. * server if we're looking at a set of RO volumes where some of
  165. * the servers have not yet been brought up to date lest we
  166. * regress the data. We only switch to the new version once
  167. * >=50% of the servers are updated.
  168. */
  169. error = afs_update_volume_state(op);
  170. if (error != 0) {
  171. if (error == 1) {
  172. afs_sleep_and_retry(op);
  173. goto restart_from_beginning;
  174. }
  175. afs_op_set_error(op, error);
  176. goto failed;
  177. }
  178. fallthrough;
  179. default:
  180. /* Success or local failure. Stop. */
  181. afs_op_set_error(op, error);
  182. op->flags |= AFS_OPERATION_STOP;
  183. trace_afs_rotate(op, afs_rotate_trace_stop, error);
  184. _leave(" = f [okay/local %d]", error);
  185. return false;
  186. case -ECONNABORTED:
  187. /* The far side rejected the operation on some grounds. This
  188. * might involve the server being busy or the volume having been moved.
  189. *
  190. * Note that various V* errors should not be sent to a cache manager
  191. * by a fileserver as they should be translated to more modern UAE*
  192. * errors instead. IBM AFS and OpenAFS fileservers, however, do leak
  193. * these abort codes.
  194. */
  195. trace_afs_rotate(op, afs_rotate_trace_aborted, abort_code);
  196. op->cumul_error.responded = true;
  197. switch (abort_code) {
  198. case VNOVOL:
  199. /* This fileserver doesn't know about the volume.
  200. * - May indicate that the VL is wrong - retry once and compare
  201. * the results.
  202. * - May indicate that the fileserver couldn't attach to the vol.
  203. * - The volume might have been temporarily removed so that it can
  204. * be replaced by a volume restore. "vos" might have ended one
  205. * transaction and has yet to create the next.
  206. * - The volume might not be blessed or might not be in-service
  207. * (administrative action).
  208. */
  209. if (op->flags & AFS_OPERATION_VNOVOL) {
  210. afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
  211. goto next_server;
  212. }
  213. write_lock(&op->volume->servers_lock);
  214. op->server_list->vnovol_mask |= 1 << op->server_index;
  215. write_unlock(&op->volume->servers_lock);
  216. set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
  217. error = afs_check_volume_status(op->volume, op);
  218. if (error < 0) {
  219. afs_op_set_error(op, error);
  220. goto failed;
  221. }
  222. if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) {
  223. afs_op_set_error(op, -ENOMEDIUM);
  224. goto failed;
  225. }
  226. /* If the server list didn't change, then assume that
  227. * it's the fileserver having trouble.
  228. */
  229. if (rcu_access_pointer(op->volume->servers) == op->server_list) {
  230. afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
  231. goto next_server;
  232. }
  233. /* Try again */
  234. op->flags |= AFS_OPERATION_VNOVOL;
  235. _leave(" = t [vnovol]");
  236. return true;
  237. case VVOLEXISTS:
  238. case VONLINE:
  239. /* These should not be returned from the fileserver. */
  240. pr_warn("Fileserver returned unexpected abort %d\n",
  241. abort_code);
  242. afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
  243. goto next_server;
  244. case VNOSERVICE:
  245. /* Prior to AFS 3.2 VNOSERVICE was returned from the fileserver
  246. * if the volume was neither in-service nor administratively
  247. * blessed. All usage was replaced by VNOVOL because AFS 3.1 and
  248. * earlier cache managers did not handle VNOSERVICE and assumed
  249. * it was the client OSes errno 105.
  250. *
  251. * Starting with OpenAFS 1.4.8 VNOSERVICE was repurposed as the
  252. * fileserver idle dead time error which was sent in place of
  253. * RX_CALL_TIMEOUT (-3). The error was intended to be sent if the
  254. * fileserver took too long to send a reply to the client.
  255. * RX_CALL_TIMEOUT would have caused the cache manager to mark the
  256. * server down whereas VNOSERVICE since AFS 3.2 would cause cache
  257. * manager to temporarily (up to 15 minutes) mark the volume
  258. * instance as unusable.
  259. *
  260. * The idle dead logic resulted in cache inconsistency since a
  261. * state changing call that the cache manager assumed was dead
  262. * could still be processed to completion by the fileserver. This
  263. * logic was removed in OpenAFS 1.8.0 and VNOSERVICE is no longer
  264. * returned. However, many 1.4.8 through 1.6.24 fileservers are
  265. * still in existence.
  266. *
  267. * AuriStorFS fileservers have never returned VNOSERVICE.
  268. *
  269. * VNOSERVICE should be treated as an alias for RX_CALL_TIMEOUT.
  270. */
  271. case RX_CALL_TIMEOUT:
  272. afs_op_accumulate_error(op, -ETIMEDOUT, abort_code);
  273. goto next_server;
  274. case VSALVAGING: /* This error should not be leaked to cache managers
  275. * but is from OpenAFS demand attach fileservers.
  276. * It should be treated as an alias for VOFFLINE.
  277. */
  278. case VSALVAGE: /* VSALVAGE should be treated as a synonym of VOFFLINE */
  279. case VOFFLINE:
  280. /* The volume is in use by the volserver or another volume utility
  281. * for an operation that might alter the contents. The volume is
  282. * expected to come back but it might take a long time (could be
  283. * days).
  284. */
  285. if (!test_and_set_bit(AFS_SE_VOLUME_OFFLINE,
  286. &op->server_list->servers[op->server_index].flags)) {
  287. afs_busy(op, abort_code);
  288. clear_bit(AFS_SE_VOLUME_BUSY,
  289. &op->server_list->servers[op->server_index].flags);
  290. }
  291. if (op->flags & AFS_OPERATION_NO_VSLEEP) {
  292. afs_op_set_error(op, -EADV);
  293. goto failed;
  294. }
  295. goto busy;
  296. case VRESTARTING: /* The fileserver is either shutting down or starting up. */
  297. case VBUSY:
  298. /* The volume is in use by the volserver or another volume
  299. * utility for an operation that is not expected to alter the
  300. * contents of the volume. VBUSY does not need to be returned
  301. * for a ROVOL or BACKVOL bound to an ITBusy volserver
  302. * transaction. The fileserver is permitted to continue serving
  303. * content from ROVOLs and BACKVOLs during an ITBusy transaction
  304. * because the content will not change. However, many fileserver
  305. * releases do return VBUSY for ROVOL and BACKVOL instances under
  306. * many circumstances.
  307. *
  308. * Retry after going round all the servers unless we have a file
  309. * lock we need to maintain.
  310. */
  311. if (op->flags & AFS_OPERATION_NO_VSLEEP) {
  312. afs_op_set_error(op, -EBUSY);
  313. goto failed;
  314. }
  315. if (!test_and_set_bit(AFS_SE_VOLUME_BUSY,
  316. &op->server_list->servers[op->server_index].flags)) {
  317. afs_busy(op, abort_code);
  318. clear_bit(AFS_SE_VOLUME_OFFLINE,
  319. &op->server_list->servers[op->server_index].flags);
  320. }
  321. busy:
  322. if (op->flags & AFS_OPERATION_CUR_ONLY) {
  323. if (!afs_sleep_and_retry(op))
  324. goto failed;
  325. /* Retry with same server & address */
  326. _leave(" = t [vbusy]");
  327. return true;
  328. }
  329. op->flags |= AFS_OPERATION_VBUSY;
  330. goto next_server;
  331. case VMOVED:
  332. /* The volume migrated to another server. We consider
  333. * consider all locks and callbacks broken and request
  334. * an update from the VLDB.
  335. *
  336. * We also limit the number of VMOVED hops we will
  337. * honour, just in case someone sets up a loop.
  338. */
  339. if (op->flags & AFS_OPERATION_VMOVED) {
  340. afs_op_set_error(op, -EREMOTEIO);
  341. goto failed;
  342. }
  343. op->flags |= AFS_OPERATION_VMOVED;
  344. set_bit(AFS_VOLUME_WAIT, &op->volume->flags);
  345. set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
  346. error = afs_check_volume_status(op->volume, op);
  347. if (error < 0) {
  348. afs_op_set_error(op, error);
  349. goto failed;
  350. }
  351. /* If the server list didn't change, then the VLDB is
  352. * out of sync with the fileservers. This is hopefully
  353. * a temporary condition, however, so we don't want to
  354. * permanently block access to the file.
  355. *
  356. * TODO: Try other fileservers if we can.
  357. *
  358. * TODO: Retry a few times with sleeps.
  359. */
  360. if (rcu_access_pointer(op->volume->servers) == op->server_list) {
  361. afs_op_accumulate_error(op, -ENOMEDIUM, abort_code);
  362. goto failed;
  363. }
  364. goto restart_from_beginning;
  365. case UAEIO:
  366. case VIO:
  367. afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
  368. if (op->volume->type != AFSVL_RWVOL)
  369. goto next_server;
  370. goto failed;
  371. case VDISKFULL:
  372. case UAENOSPC:
  373. /* The partition is full. Only applies to RWVOLs.
  374. * Translate locally and return ENOSPC.
  375. * No replicas to failover to.
  376. */
  377. afs_op_set_error(op, -ENOSPC);
  378. goto failed_but_online;
  379. case VOVERQUOTA:
  380. case UAEDQUOT:
  381. /* Volume is full. Only applies to RWVOLs.
  382. * Translate locally and return EDQUOT.
  383. * No replicas to failover to.
  384. */
  385. afs_op_set_error(op, -EDQUOT);
  386. goto failed_but_online;
  387. case RX_INVALID_OPERATION:
  388. case RXGEN_OPCODE:
  389. /* Handle downgrading to an older operation. */
  390. afs_op_set_error(op, -ENOTSUPP);
  391. if (op->flags & AFS_OPERATION_DOWNGRADE) {
  392. op->flags &= ~AFS_OPERATION_DOWNGRADE;
  393. goto go_again;
  394. }
  395. goto failed_but_online;
  396. default:
  397. afs_op_accumulate_error(op, error, abort_code);
  398. failed_but_online:
  399. clear_bit(AFS_SE_VOLUME_OFFLINE,
  400. &op->server_list->servers[op->server_index].flags);
  401. clear_bit(AFS_SE_VOLUME_BUSY,
  402. &op->server_list->servers[op->server_index].flags);
  403. goto failed;
  404. }
  405. case -ETIMEDOUT:
  406. case -ETIME:
  407. if (afs_op_error(op) != -EDESTADDRREQ)
  408. goto iterate_address;
  409. fallthrough;
  410. case -ERFKILL:
  411. case -EADDRNOTAVAIL:
  412. case -ENETUNREACH:
  413. case -EHOSTUNREACH:
  414. case -EHOSTDOWN:
  415. case -ECONNREFUSED:
  416. _debug("no conn");
  417. afs_op_accumulate_error(op, error, 0);
  418. goto iterate_address;
  419. case -ENETRESET:
  420. pr_warn("kAFS: Peer reset %s (op=%x)\n",
  421. op->type ? op->type->name : "???", op->debug_id);
  422. fallthrough;
  423. case -ECONNRESET:
  424. _debug("call reset");
  425. afs_op_set_error(op, error);
  426. goto failed;
  427. }
  428. restart_from_beginning:
  429. trace_afs_rotate(op, afs_rotate_trace_restart, 0);
  430. _debug("restart");
  431. op->estate = NULL;
  432. op->server = NULL;
  433. afs_clear_server_states(op);
  434. op->server_states = NULL;
  435. afs_put_serverlist(op->net, op->server_list);
  436. op->server_list = NULL;
  437. start:
  438. _debug("start");
  439. ASSERTCMP(op->estate, ==, NULL);
  440. /* See if we need to do an update of the volume record. Note that the
  441. * volume may have moved or even have been deleted.
  442. */
  443. error = afs_check_volume_status(op->volume, op);
  444. trace_afs_rotate(op, afs_rotate_trace_check_vol_status, error);
  445. if (error < 0) {
  446. afs_op_set_error(op, error);
  447. goto failed;
  448. }
  449. if (!afs_start_fs_iteration(op, vnode))
  450. goto failed;
  451. _debug("__ VOL %llx __", op->volume->vid);
  452. pick_server:
  453. _debug("pick [%lx]", op->untried_servers);
  454. ASSERTCMP(op->estate, ==, NULL);
  455. error = afs_wait_for_fs_probes(op, op->server_states,
  456. !(op->flags & AFS_OPERATION_UNINTR));
  457. switch (error) {
  458. case 0: /* No untried responsive servers and no outstanding probes */
  459. trace_afs_rotate(op, afs_rotate_trace_probe_none, 0);
  460. goto no_more_servers;
  461. case 1: /* Got a response */
  462. trace_afs_rotate(op, afs_rotate_trace_probe_response, 0);
  463. break;
  464. case 2: /* Probe data superseded */
  465. trace_afs_rotate(op, afs_rotate_trace_probe_superseded, 0);
  466. goto restart_from_beginning;
  467. default:
  468. trace_afs_rotate(op, afs_rotate_trace_probe_error, error);
  469. afs_op_set_error(op, error);
  470. goto failed;
  471. }
  472. /* Pick the untried server with the highest priority untried endpoint.
  473. * If we have outstanding callbacks, we stick with the server we're
  474. * already using if we can.
  475. */
  476. if (op->server) {
  477. _debug("server %u", op->server_index);
  478. if (test_bit(op->server_index, &op->untried_servers))
  479. goto selected_server;
  480. op->server = NULL;
  481. _debug("no server");
  482. }
  483. rcu_read_lock();
  484. op->server_index = -1;
  485. best_prio = -1;
  486. for (i = 0; i < op->server_list->nr_servers; i++) {
  487. struct afs_endpoint_state *es;
  488. struct afs_server_entry *se = &op->server_list->servers[i];
  489. struct afs_addr_list *sal;
  490. struct afs_server *s = se->server;
  491. if (!test_bit(i, &op->untried_servers) ||
  492. test_bit(AFS_SE_EXCLUDED, &se->flags) ||
  493. !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
  494. continue;
  495. es = op->server_states[i].endpoint_state;
  496. sal = es->addresses;
  497. afs_get_address_preferences_rcu(op->net, sal);
  498. for (j = 0; j < sal->nr_addrs; j++) {
  499. if (es->failed_set & (1 << j))
  500. continue;
  501. if (!sal->addrs[j].peer)
  502. continue;
  503. if (sal->addrs[j].prio > best_prio) {
  504. op->server_index = i;
  505. best_prio = sal->addrs[j].prio;
  506. }
  507. }
  508. }
  509. rcu_read_unlock();
  510. if (op->server_index == -1)
  511. goto no_more_servers;
  512. selected_server:
  513. trace_afs_rotate(op, afs_rotate_trace_selected_server, best_prio);
  514. _debug("use %d prio %u", op->server_index, best_prio);
  515. __clear_bit(op->server_index, &op->untried_servers);
  516. /* We're starting on a different fileserver from the list. We need to
  517. * check it, create a callback intercept, find its address list and
  518. * probe its capabilities before we use it.
  519. */
  520. ASSERTCMP(op->estate, ==, NULL);
  521. server = op->server_list->servers[op->server_index].server;
  522. if (!afs_check_server_record(op, server, op->key))
  523. goto failed;
  524. _debug("USING SERVER: %pU", &server->uuid);
  525. op->flags |= AFS_OPERATION_RETRY_SERVER;
  526. op->server = server;
  527. if (vnode->cb_server != server) {
  528. vnode->cb_server = server;
  529. vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
  530. afs_clear_cb_promise(vnode, afs_cb_promise_clear_server_change);
  531. }
  532. retry_server:
  533. op->addr_tried = 0;
  534. op->addr_index = -1;
  535. iterate_address:
  536. /* Iterate over the current server's address list to try and find an
  537. * address on which it will respond to us.
  538. */
  539. op->estate = op->server_states[op->server_index].endpoint_state;
  540. set = READ_ONCE(op->estate->responsive_set);
  541. failed = READ_ONCE(op->estate->failed_set);
  542. _debug("iterate ES=%x rs=%lx fs=%lx", op->estate->probe_seq, set, failed);
  543. set &= ~(failed | op->addr_tried);
  544. trace_afs_rotate(op, afs_rotate_trace_iterate_addr, set);
  545. if (!set)
  546. goto wait_for_more_probe_results;
  547. alist = op->estate->addresses;
  548. best_prio = -1;
  549. addr_index = 0;
  550. for (i = 0; i < alist->nr_addrs; i++) {
  551. if (!(set & (1 << i)))
  552. continue;
  553. if (alist->addrs[i].prio > best_prio) {
  554. addr_index = i;
  555. best_prio = alist->addrs[i].prio;
  556. }
  557. }
  558. alist->preferred = addr_index;
  559. op->addr_index = addr_index;
  560. set_bit(addr_index, &op->addr_tried);
  561. _debug("address [%u] %u/%u %pISp",
  562. op->server_index, addr_index, alist->nr_addrs,
  563. rxrpc_kernel_remote_addr(alist->addrs[op->addr_index].peer));
  564. go_again:
  565. op->volsync.creation = TIME64_MIN;
  566. op->volsync.update = TIME64_MIN;
  567. op->call_responded = false;
  568. _leave(" = t");
  569. return true;
  570. wait_for_more_probe_results:
  571. error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
  572. !(op->flags & AFS_OPERATION_UNINTR));
  573. if (error == 1)
  574. goto iterate_address;
  575. if (!error)
  576. goto restart_from_beginning;
  577. /* We've now had a failure to respond on all of a server's addresses -
  578. * immediately probe them again and consider retrying the server.
  579. */
  580. trace_afs_rotate(op, afs_rotate_trace_probe_fileserver, 0);
  581. afs_probe_fileserver(op->net, op->server);
  582. if (op->flags & AFS_OPERATION_RETRY_SERVER) {
  583. error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
  584. !(op->flags & AFS_OPERATION_UNINTR));
  585. switch (error) {
  586. case 1:
  587. op->flags &= ~AFS_OPERATION_RETRY_SERVER;
  588. trace_afs_rotate(op, afs_rotate_trace_retry_server, 1);
  589. goto retry_server;
  590. case 0:
  591. trace_afs_rotate(op, afs_rotate_trace_retry_server, 0);
  592. goto restart_from_beginning;
  593. case -ERESTARTSYS:
  594. afs_op_set_error(op, error);
  595. goto failed;
  596. case -ETIME:
  597. case -EDESTADDRREQ:
  598. goto next_server;
  599. }
  600. }
  601. next_server:
  602. trace_afs_rotate(op, afs_rotate_trace_next_server, 0);
  603. _debug("next");
  604. op->estate = NULL;
  605. goto pick_server;
  606. no_more_servers:
  607. /* That's all the servers poked to no good effect. Try again if some
  608. * of them were busy.
  609. */
  610. trace_afs_rotate(op, afs_rotate_trace_no_more_servers, 0);
  611. if (op->flags & AFS_OPERATION_VBUSY) {
  612. afs_sleep_and_retry(op);
  613. op->flags &= ~AFS_OPERATION_VBUSY;
  614. goto restart_from_beginning;
  615. }
  616. rcu_read_lock();
  617. for (i = 0; i < op->server_list->nr_servers; i++) {
  618. struct afs_endpoint_state *estate;
  619. estate = op->server_states[i].endpoint_state;
  620. error = READ_ONCE(estate->error);
  621. if (error < 0)
  622. afs_op_accumulate_error(op, error, estate->abort_code);
  623. }
  624. rcu_read_unlock();
  625. failed:
  626. trace_afs_rotate(op, afs_rotate_trace_failed, 0);
  627. op->flags |= AFS_OPERATION_STOP;
  628. op->estate = NULL;
  629. _leave(" = f [failed %d]", afs_op_error(op));
  630. return false;
  631. }
  632. /*
  633. * Dump cursor state in the case of the error being EDESTADDRREQ.
  634. */
  635. void afs_dump_edestaddrreq(const struct afs_operation *op)
  636. {
  637. static int count;
  638. int i;
  639. if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
  640. return;
  641. count++;
  642. rcu_read_lock();
  643. pr_notice("EDESTADDR occurred\n");
  644. pr_notice("OP: cbb=%x cbb2=%x fl=%x err=%hd\n",
  645. op->file[0].cb_break_before,
  646. op->file[1].cb_break_before, op->flags, op->cumul_error.error);
  647. pr_notice("OP: ut=%lx ix=%d ni=%u\n",
  648. op->untried_servers, op->server_index, op->nr_iterations);
  649. pr_notice("OP: call er=%d ac=%d r=%u\n",
  650. op->call_error, op->call_abort_code, op->call_responded);
  651. if (op->server_list) {
  652. const struct afs_server_list *sl = op->server_list;
  653. pr_notice("FC: SL nr=%u vnov=%hx\n",
  654. sl->nr_servers, sl->vnovol_mask);
  655. for (i = 0; i < sl->nr_servers; i++) {
  656. const struct afs_server *s = sl->servers[i].server;
  657. const struct afs_endpoint_state *e =
  658. rcu_dereference(s->endpoint_state);
  659. const struct afs_addr_list *a = e->addresses;
  660. pr_notice("FC: server fl=%lx av=%u %pU\n",
  661. s->flags, s->addr_version, &s->uuid);
  662. pr_notice("FC: - pq=%x R=%lx F=%lx\n",
  663. e->probe_seq, e->responsive_set, e->failed_set);
  664. if (a) {
  665. pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n",
  666. a->version,
  667. a->nr_ipv4, a->nr_addrs, a->max_addrs,
  668. a->preferred);
  669. if (a == e->addresses)
  670. pr_notice("FC: - current\n");
  671. }
  672. }
  673. }
  674. pr_notice("AC: t=%lx ax=%d\n", op->addr_tried, op->addr_index);
  675. rcu_read_unlock();
  676. }