sch_taprio.c 67 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* net/sched/sch_taprio.c Time Aware Priority Scheduler
  3. *
  4. * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com>
  5. *
  6. */
  7. #include <linux/ethtool.h>
  8. #include <linux/ethtool_netlink.h>
  9. #include <linux/types.h>
  10. #include <linux/slab.h>
  11. #include <linux/kernel.h>
  12. #include <linux/string.h>
  13. #include <linux/list.h>
  14. #include <linux/errno.h>
  15. #include <linux/skbuff.h>
  16. #include <linux/math64.h>
  17. #include <linux/module.h>
  18. #include <linux/spinlock.h>
  19. #include <linux/rcupdate.h>
  20. #include <linux/time.h>
  21. #include <net/gso.h>
  22. #include <net/netlink.h>
  23. #include <net/pkt_sched.h>
  24. #include <net/pkt_cls.h>
  25. #include <net/sch_generic.h>
  26. #include <net/sock.h>
  27. #include <net/tcp.h>
  28. #define TAPRIO_STAT_NOT_SET (~0ULL)
  29. #include "sch_mqprio_lib.h"
  30. static LIST_HEAD(taprio_list);
  31. static struct static_key_false taprio_have_broken_mqprio;
  32. static struct static_key_false taprio_have_working_mqprio;
  33. #define TAPRIO_ALL_GATES_OPEN -1
  34. #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
  35. #define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
  36. #define TAPRIO_SUPPORTED_FLAGS \
  37. (TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
  38. #define TAPRIO_FLAGS_INVALID U32_MAX
  39. /* Minimum value for picos_per_byte to ensure non-zero duration
  40. * for minimum-sized Ethernet frames (ETH_ZLEN = 60).
  41. * 60 * 17 > PSEC_PER_NSEC (1000)
  42. */
  43. #define TAPRIO_PICOS_PER_BYTE_MIN 17
  44. struct sched_entry {
  45. /* Durations between this GCL entry and the GCL entry where the
  46. * respective traffic class gate closes
  47. */
  48. u64 gate_duration[TC_MAX_QUEUE];
  49. atomic_t budget[TC_MAX_QUEUE];
  50. /* The qdisc makes some effort so that no packet leaves
  51. * after this time
  52. */
  53. ktime_t gate_close_time[TC_MAX_QUEUE];
  54. struct list_head list;
  55. /* Used to calculate when to advance the schedule */
  56. ktime_t end_time;
  57. ktime_t next_txtime;
  58. int index;
  59. u32 gate_mask;
  60. u32 interval;
  61. u8 command;
  62. };
  63. struct sched_gate_list {
  64. /* Longest non-zero contiguous gate durations per traffic class,
  65. * or 0 if a traffic class gate never opens during the schedule.
  66. */
  67. u64 max_open_gate_duration[TC_MAX_QUEUE];
  68. u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */
  69. u32 max_sdu[TC_MAX_QUEUE]; /* for dump */
  70. struct rcu_head rcu;
  71. struct list_head entries;
  72. size_t num_entries;
  73. ktime_t cycle_end_time;
  74. s64 cycle_time;
  75. s64 cycle_time_extension;
  76. s64 base_time;
  77. };
  78. struct taprio_sched {
  79. struct Qdisc **qdiscs;
  80. struct Qdisc *root;
  81. u32 flags;
  82. enum tk_offsets tk_offset;
  83. int clockid;
  84. bool offloaded;
  85. bool detected_mqprio;
  86. bool broken_mqprio;
  87. atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
  88. * speeds it's sub-nanoseconds per byte
  89. */
  90. /* Protects the update side of the RCU protected current_entry */
  91. spinlock_t current_entry_lock;
  92. struct sched_entry __rcu *current_entry;
  93. struct sched_gate_list __rcu *oper_sched;
  94. struct sched_gate_list __rcu *admin_sched;
  95. struct hrtimer advance_timer;
  96. struct list_head taprio_list;
  97. int cur_txq[TC_MAX_QUEUE];
  98. u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */
  99. u32 fp[TC_QOPT_MAX_QUEUE]; /* only for dump and offloading */
  100. u32 txtime_delay;
  101. };
  102. struct __tc_taprio_qopt_offload {
  103. refcount_t users;
  104. struct tc_taprio_qopt_offload offload;
  105. };
  106. static void taprio_calculate_gate_durations(struct taprio_sched *q,
  107. struct sched_gate_list *sched)
  108. {
  109. struct net_device *dev = qdisc_dev(q->root);
  110. int num_tc = netdev_get_num_tc(dev);
  111. struct sched_entry *entry, *cur;
  112. int tc;
  113. list_for_each_entry(entry, &sched->entries, list) {
  114. u32 gates_still_open = entry->gate_mask;
  115. /* For each traffic class, calculate each open gate duration,
  116. * starting at this schedule entry and ending at the schedule
  117. * entry containing a gate close event for that TC.
  118. */
  119. cur = entry;
  120. do {
  121. if (!gates_still_open)
  122. break;
  123. for (tc = 0; tc < num_tc; tc++) {
  124. if (!(gates_still_open & BIT(tc)))
  125. continue;
  126. if (cur->gate_mask & BIT(tc))
  127. entry->gate_duration[tc] += cur->interval;
  128. else
  129. gates_still_open &= ~BIT(tc);
  130. }
  131. cur = list_next_entry_circular(cur, &sched->entries, list);
  132. } while (cur != entry);
  133. /* Keep track of the maximum gate duration for each traffic
  134. * class, taking care to not confuse a traffic class which is
  135. * temporarily closed with one that is always closed.
  136. */
  137. for (tc = 0; tc < num_tc; tc++)
  138. if (entry->gate_duration[tc] &&
  139. sched->max_open_gate_duration[tc] < entry->gate_duration[tc])
  140. sched->max_open_gate_duration[tc] = entry->gate_duration[tc];
  141. }
  142. }
  143. static bool taprio_entry_allows_tx(ktime_t skb_end_time,
  144. struct sched_entry *entry, int tc)
  145. {
  146. return ktime_before(skb_end_time, entry->gate_close_time[tc]);
  147. }
  148. static ktime_t sched_base_time(const struct sched_gate_list *sched)
  149. {
  150. if (!sched)
  151. return KTIME_MAX;
  152. return ns_to_ktime(sched->base_time);
  153. }
  154. static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono)
  155. {
  156. /* This pairs with WRITE_ONCE() in taprio_parse_clockid() */
  157. enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
  158. switch (tk_offset) {
  159. case TK_OFFS_MAX:
  160. return mono;
  161. default:
  162. return ktime_mono_to_any(mono, tk_offset);
  163. }
  164. }
  165. static ktime_t taprio_get_time(const struct taprio_sched *q)
  166. {
  167. return taprio_mono_to_any(q, ktime_get());
  168. }
  169. static void taprio_free_sched_cb(struct rcu_head *head)
  170. {
  171. struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu);
  172. struct sched_entry *entry, *n;
  173. list_for_each_entry_safe(entry, n, &sched->entries, list) {
  174. list_del(&entry->list);
  175. kfree(entry);
  176. }
  177. kfree(sched);
  178. }
  179. static void switch_schedules(struct taprio_sched *q,
  180. struct sched_gate_list **admin,
  181. struct sched_gate_list **oper)
  182. {
  183. rcu_assign_pointer(q->oper_sched, *admin);
  184. rcu_assign_pointer(q->admin_sched, NULL);
  185. if (*oper)
  186. call_rcu(&(*oper)->rcu, taprio_free_sched_cb);
  187. *oper = *admin;
  188. *admin = NULL;
  189. }
  190. /* Get how much time has been already elapsed in the current cycle. */
  191. static s32 get_cycle_time_elapsed(struct sched_gate_list *sched, ktime_t time)
  192. {
  193. ktime_t time_since_sched_start;
  194. s32 time_elapsed;
  195. time_since_sched_start = ktime_sub(time, sched->base_time);
  196. div_s64_rem(time_since_sched_start, sched->cycle_time, &time_elapsed);
  197. return time_elapsed;
  198. }
  199. static ktime_t get_interval_end_time(struct sched_gate_list *sched,
  200. struct sched_gate_list *admin,
  201. struct sched_entry *entry,
  202. ktime_t intv_start)
  203. {
  204. s32 cycle_elapsed = get_cycle_time_elapsed(sched, intv_start);
  205. ktime_t intv_end, cycle_ext_end, cycle_end;
  206. cycle_end = ktime_add_ns(intv_start, sched->cycle_time - cycle_elapsed);
  207. intv_end = ktime_add_ns(intv_start, entry->interval);
  208. cycle_ext_end = ktime_add(cycle_end, sched->cycle_time_extension);
  209. if (ktime_before(intv_end, cycle_end))
  210. return intv_end;
  211. else if (admin && admin != sched &&
  212. ktime_after(admin->base_time, cycle_end) &&
  213. ktime_before(admin->base_time, cycle_ext_end))
  214. return admin->base_time;
  215. else
  216. return cycle_end;
  217. }
  218. static int length_to_duration(struct taprio_sched *q, int len)
  219. {
  220. return div_u64(len * atomic64_read(&q->picos_per_byte), PSEC_PER_NSEC);
  221. }
  222. static int duration_to_length(struct taprio_sched *q, u64 duration)
  223. {
  224. return div_u64(duration * PSEC_PER_NSEC, atomic64_read(&q->picos_per_byte));
  225. }
  226. /* Sets sched->max_sdu[] and sched->max_frm_len[] to the minimum between the
  227. * q->max_sdu[] requested by the user and the max_sdu dynamically determined by
  228. * the maximum open gate durations at the given link speed.
  229. */
  230. static void taprio_update_queue_max_sdu(struct taprio_sched *q,
  231. struct sched_gate_list *sched,
  232. struct qdisc_size_table *stab)
  233. {
  234. struct net_device *dev = qdisc_dev(q->root);
  235. int num_tc = netdev_get_num_tc(dev);
  236. u32 max_sdu_from_user;
  237. u32 max_sdu_dynamic;
  238. u32 max_sdu;
  239. int tc;
  240. for (tc = 0; tc < num_tc; tc++) {
  241. max_sdu_from_user = q->max_sdu[tc] ?: U32_MAX;
  242. /* TC gate never closes => keep the queueMaxSDU
  243. * selected by the user
  244. */
  245. if (sched->max_open_gate_duration[tc] == sched->cycle_time) {
  246. max_sdu_dynamic = U32_MAX;
  247. } else {
  248. u32 max_frm_len;
  249. max_frm_len = duration_to_length(q, sched->max_open_gate_duration[tc]);
  250. /* Compensate for L1 overhead from size table,
  251. * but don't let the frame size go negative
  252. */
  253. if (stab) {
  254. max_frm_len -= stab->szopts.overhead;
  255. max_frm_len = max_t(int, max_frm_len,
  256. dev->hard_header_len + 1);
  257. }
  258. max_sdu_dynamic = max_frm_len - dev->hard_header_len;
  259. if (max_sdu_dynamic > dev->max_mtu)
  260. max_sdu_dynamic = U32_MAX;
  261. }
  262. max_sdu = min(max_sdu_dynamic, max_sdu_from_user);
  263. if (max_sdu != U32_MAX) {
  264. sched->max_frm_len[tc] = max_sdu + dev->hard_header_len;
  265. sched->max_sdu[tc] = max_sdu;
  266. } else {
  267. sched->max_frm_len[tc] = U32_MAX; /* never oversized */
  268. sched->max_sdu[tc] = 0;
  269. }
  270. }
  271. }
  272. /* Returns the entry corresponding to next available interval. If
  273. * validate_interval is set, it only validates whether the timestamp occurs
  274. * when the gate corresponding to the skb's traffic class is open.
  275. */
  276. static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb,
  277. struct Qdisc *sch,
  278. struct sched_gate_list *sched,
  279. struct sched_gate_list *admin,
  280. ktime_t time,
  281. ktime_t *interval_start,
  282. ktime_t *interval_end,
  283. bool validate_interval)
  284. {
  285. ktime_t curr_intv_start, curr_intv_end, cycle_end, packet_transmit_time;
  286. ktime_t earliest_txtime = KTIME_MAX, txtime, cycle, transmit_end_time;
  287. struct sched_entry *entry = NULL, *entry_found = NULL;
  288. struct taprio_sched *q = qdisc_priv(sch);
  289. struct net_device *dev = qdisc_dev(sch);
  290. bool entry_available = false;
  291. s32 cycle_elapsed;
  292. int tc, n;
  293. tc = netdev_get_prio_tc_map(dev, skb->priority);
  294. packet_transmit_time = length_to_duration(q, qdisc_pkt_len(skb));
  295. *interval_start = 0;
  296. *interval_end = 0;
  297. if (!sched)
  298. return NULL;
  299. cycle = sched->cycle_time;
  300. cycle_elapsed = get_cycle_time_elapsed(sched, time);
  301. curr_intv_end = ktime_sub_ns(time, cycle_elapsed);
  302. cycle_end = ktime_add_ns(curr_intv_end, cycle);
  303. list_for_each_entry(entry, &sched->entries, list) {
  304. curr_intv_start = curr_intv_end;
  305. curr_intv_end = get_interval_end_time(sched, admin, entry,
  306. curr_intv_start);
  307. if (ktime_after(curr_intv_start, cycle_end))
  308. break;
  309. if (!(entry->gate_mask & BIT(tc)) ||
  310. packet_transmit_time > entry->interval)
  311. continue;
  312. txtime = entry->next_txtime;
  313. if (ktime_before(txtime, time) || validate_interval) {
  314. transmit_end_time = ktime_add_ns(time, packet_transmit_time);
  315. if ((ktime_before(curr_intv_start, time) &&
  316. ktime_before(transmit_end_time, curr_intv_end)) ||
  317. (ktime_after(curr_intv_start, time) && !validate_interval)) {
  318. entry_found = entry;
  319. *interval_start = curr_intv_start;
  320. *interval_end = curr_intv_end;
  321. break;
  322. } else if (!entry_available && !validate_interval) {
  323. /* Here, we are just trying to find out the
  324. * first available interval in the next cycle.
  325. */
  326. entry_available = true;
  327. entry_found = entry;
  328. *interval_start = ktime_add_ns(curr_intv_start, cycle);
  329. *interval_end = ktime_add_ns(curr_intv_end, cycle);
  330. }
  331. } else if (ktime_before(txtime, earliest_txtime) &&
  332. !entry_available) {
  333. earliest_txtime = txtime;
  334. entry_found = entry;
  335. n = div_s64(ktime_sub(txtime, curr_intv_start), cycle);
  336. *interval_start = ktime_add(curr_intv_start, n * cycle);
  337. *interval_end = ktime_add(curr_intv_end, n * cycle);
  338. }
  339. }
  340. return entry_found;
  341. }
  342. static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch)
  343. {
  344. struct taprio_sched *q = qdisc_priv(sch);
  345. struct sched_gate_list *sched, *admin;
  346. ktime_t interval_start, interval_end;
  347. struct sched_entry *entry;
  348. rcu_read_lock();
  349. sched = rcu_dereference(q->oper_sched);
  350. admin = rcu_dereference(q->admin_sched);
  351. entry = find_entry_to_transmit(skb, sch, sched, admin, skb->tstamp,
  352. &interval_start, &interval_end, true);
  353. rcu_read_unlock();
  354. return entry;
  355. }
  356. /* This returns the tstamp value set by TCP in terms of the set clock. */
  357. static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
  358. {
  359. unsigned int offset = skb_network_offset(skb);
  360. const struct ipv6hdr *ipv6h;
  361. const struct iphdr *iph;
  362. struct ipv6hdr _ipv6h;
  363. ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
  364. if (!ipv6h)
  365. return 0;
  366. if (ipv6h->version == 4) {
  367. iph = (struct iphdr *)ipv6h;
  368. offset += iph->ihl * 4;
  369. /* special-case 6in4 tunnelling, as that is a common way to get
  370. * v6 connectivity in the home
  371. */
  372. if (iph->protocol == IPPROTO_IPV6) {
  373. ipv6h = skb_header_pointer(skb, offset,
  374. sizeof(_ipv6h), &_ipv6h);
  375. if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
  376. return 0;
  377. } else if (iph->protocol != IPPROTO_TCP) {
  378. return 0;
  379. }
  380. } else if (ipv6h->version == 6 && ipv6h->nexthdr != IPPROTO_TCP) {
  381. return 0;
  382. }
  383. return taprio_mono_to_any(q, skb->skb_mstamp_ns);
  384. }
  385. /* There are a few scenarios where we will have to modify the txtime from
  386. * what is read from next_txtime in sched_entry. They are:
  387. * 1. If txtime is in the past,
  388. * a. The gate for the traffic class is currently open and packet can be
  389. * transmitted before it closes, schedule the packet right away.
  390. * b. If the gate corresponding to the traffic class is going to open later
  391. * in the cycle, set the txtime of packet to the interval start.
  392. * 2. If txtime is in the future, there are packets corresponding to the
  393. * current traffic class waiting to be transmitted. So, the following
  394. * possibilities exist:
  395. * a. We can transmit the packet before the window containing the txtime
  396. * closes.
  397. * b. The window might close before the transmission can be completed
  398. * successfully. So, schedule the packet in the next open window.
  399. */
  400. static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch)
  401. {
  402. ktime_t transmit_end_time, interval_end, interval_start, tcp_tstamp;
  403. struct taprio_sched *q = qdisc_priv(sch);
  404. struct sched_gate_list *sched, *admin;
  405. ktime_t minimum_time, now, txtime;
  406. int len, packet_transmit_time;
  407. struct sched_entry *entry;
  408. bool sched_changed;
  409. now = taprio_get_time(q);
  410. minimum_time = ktime_add_ns(now, q->txtime_delay);
  411. tcp_tstamp = get_tcp_tstamp(q, skb);
  412. minimum_time = max_t(ktime_t, minimum_time, tcp_tstamp);
  413. rcu_read_lock();
  414. admin = rcu_dereference(q->admin_sched);
  415. sched = rcu_dereference(q->oper_sched);
  416. if (admin && ktime_after(minimum_time, admin->base_time))
  417. switch_schedules(q, &admin, &sched);
  418. /* Until the schedule starts, all the queues are open */
  419. if (!sched || ktime_before(minimum_time, sched->base_time)) {
  420. txtime = minimum_time;
  421. goto done;
  422. }
  423. len = qdisc_pkt_len(skb);
  424. packet_transmit_time = length_to_duration(q, len);
  425. do {
  426. sched_changed = false;
  427. entry = find_entry_to_transmit(skb, sch, sched, admin,
  428. minimum_time,
  429. &interval_start, &interval_end,
  430. false);
  431. if (!entry) {
  432. txtime = 0;
  433. goto done;
  434. }
  435. txtime = entry->next_txtime;
  436. txtime = max_t(ktime_t, txtime, minimum_time);
  437. txtime = max_t(ktime_t, txtime, interval_start);
  438. if (admin && admin != sched &&
  439. ktime_after(txtime, admin->base_time)) {
  440. sched = admin;
  441. sched_changed = true;
  442. continue;
  443. }
  444. transmit_end_time = ktime_add(txtime, packet_transmit_time);
  445. minimum_time = transmit_end_time;
  446. /* Update the txtime of current entry to the next time it's
  447. * interval starts.
  448. */
  449. if (ktime_after(transmit_end_time, interval_end))
  450. entry->next_txtime = ktime_add(interval_start, sched->cycle_time);
  451. } while (sched_changed || ktime_after(transmit_end_time, interval_end));
  452. entry->next_txtime = transmit_end_time;
  453. done:
  454. rcu_read_unlock();
  455. return txtime;
  456. }
  457. /* Devices with full offload are expected to honor this in hardware */
  458. static bool taprio_skb_exceeds_queue_max_sdu(struct Qdisc *sch,
  459. struct sk_buff *skb)
  460. {
  461. struct taprio_sched *q = qdisc_priv(sch);
  462. struct net_device *dev = qdisc_dev(sch);
  463. struct sched_gate_list *sched;
  464. int prio = skb->priority;
  465. bool exceeds = false;
  466. u8 tc;
  467. tc = netdev_get_prio_tc_map(dev, prio);
  468. rcu_read_lock();
  469. sched = rcu_dereference(q->oper_sched);
  470. if (sched && skb->len > sched->max_frm_len[tc])
  471. exceeds = true;
  472. rcu_read_unlock();
  473. return exceeds;
  474. }
  475. static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
  476. struct Qdisc *child, struct sk_buff **to_free)
  477. {
  478. struct taprio_sched *q = qdisc_priv(sch);
  479. /* sk_flags are only safe to use on full sockets. */
  480. if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
  481. if (!is_valid_interval(skb, sch))
  482. return qdisc_drop(skb, sch, to_free);
  483. } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
  484. skb->tstamp = get_packet_txtime(skb, sch);
  485. if (!skb->tstamp)
  486. return qdisc_drop(skb, sch, to_free);
  487. }
  488. qdisc_qstats_backlog_inc(sch, skb);
  489. sch->q.qlen++;
  490. return qdisc_enqueue(skb, child, to_free);
  491. }
  492. static int taprio_enqueue_segmented(struct sk_buff *skb, struct Qdisc *sch,
  493. struct Qdisc *child,
  494. struct sk_buff **to_free)
  495. {
  496. unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
  497. netdev_features_t features = netif_skb_features(skb);
  498. struct sk_buff *segs, *nskb;
  499. int ret;
  500. segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
  501. if (IS_ERR_OR_NULL(segs))
  502. return qdisc_drop(skb, sch, to_free);
  503. skb_list_walk_safe(segs, segs, nskb) {
  504. skb_mark_not_on_list(segs);
  505. qdisc_skb_cb(segs)->pkt_len = segs->len;
  506. qdisc_skb_cb(segs)->pkt_segs = 1;
  507. slen += segs->len;
  508. /* FIXME: we should be segmenting to a smaller size
  509. * rather than dropping these
  510. */
  511. if (taprio_skb_exceeds_queue_max_sdu(sch, segs))
  512. ret = qdisc_drop(segs, sch, to_free);
  513. else
  514. ret = taprio_enqueue_one(segs, sch, child, to_free);
  515. if (ret != NET_XMIT_SUCCESS) {
  516. if (net_xmit_drop_count(ret))
  517. qdisc_qstats_drop(sch);
  518. } else {
  519. numsegs++;
  520. }
  521. }
  522. if (numsegs > 1)
  523. qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen);
  524. consume_skb(skb);
  525. return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
  526. }
  527. /* Will not be called in the full offload case, since the TX queues are
  528. * attached to the Qdisc created using qdisc_create_dflt()
  529. */
  530. static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
  531. struct sk_buff **to_free)
  532. {
  533. struct taprio_sched *q = qdisc_priv(sch);
  534. struct Qdisc *child;
  535. int queue;
  536. queue = skb_get_queue_mapping(skb);
  537. child = q->qdiscs[queue];
  538. if (unlikely(!child))
  539. return qdisc_drop(skb, sch, to_free);
  540. if (taprio_skb_exceeds_queue_max_sdu(sch, skb)) {
  541. /* Large packets might not be transmitted when the transmission
  542. * duration exceeds any configured interval. Therefore, segment
  543. * the skb into smaller chunks. Drivers with full offload are
  544. * expected to handle this in hardware.
  545. */
  546. if (skb_is_gso(skb))
  547. return taprio_enqueue_segmented(skb, sch, child,
  548. to_free);
  549. return qdisc_drop(skb, sch, to_free);
  550. }
  551. return taprio_enqueue_one(skb, sch, child, to_free);
  552. }
  553. static struct sk_buff *taprio_peek(struct Qdisc *sch)
  554. {
  555. WARN_ONCE(1, "taprio only supports operating as root qdisc, peek() not implemented");
  556. return NULL;
  557. }
  558. static void taprio_set_budgets(struct taprio_sched *q,
  559. struct sched_gate_list *sched,
  560. struct sched_entry *entry)
  561. {
  562. struct net_device *dev = qdisc_dev(q->root);
  563. int num_tc = netdev_get_num_tc(dev);
  564. int tc, budget;
  565. for (tc = 0; tc < num_tc; tc++) {
  566. /* Traffic classes which never close have infinite budget */
  567. if (entry->gate_duration[tc] == sched->cycle_time)
  568. budget = INT_MAX;
  569. else
  570. budget = div64_u64((u64)entry->gate_duration[tc] * PSEC_PER_NSEC,
  571. atomic64_read(&q->picos_per_byte));
  572. atomic_set(&entry->budget[tc], budget);
  573. }
  574. }
  575. /* When an skb is sent, it consumes from the budget of all traffic classes */
  576. static int taprio_update_budgets(struct sched_entry *entry, size_t len,
  577. int tc_consumed, int num_tc)
  578. {
  579. int tc, budget, new_budget = 0;
  580. for (tc = 0; tc < num_tc; tc++) {
  581. budget = atomic_read(&entry->budget[tc]);
  582. /* Don't consume from infinite budget */
  583. if (budget == INT_MAX) {
  584. if (tc == tc_consumed)
  585. new_budget = budget;
  586. continue;
  587. }
  588. if (tc == tc_consumed)
  589. new_budget = atomic_sub_return(len, &entry->budget[tc]);
  590. else
  591. atomic_sub(len, &entry->budget[tc]);
  592. }
  593. return new_budget;
  594. }
  595. static struct sk_buff *taprio_dequeue_from_txq(struct Qdisc *sch, int txq,
  596. struct sched_entry *entry,
  597. u32 gate_mask)
  598. {
  599. struct taprio_sched *q = qdisc_priv(sch);
  600. struct net_device *dev = qdisc_dev(sch);
  601. struct Qdisc *child = q->qdiscs[txq];
  602. int num_tc = netdev_get_num_tc(dev);
  603. struct sk_buff *skb;
  604. ktime_t guard;
  605. int prio;
  606. int len;
  607. u8 tc;
  608. if (unlikely(!child))
  609. return NULL;
  610. if (TXTIME_ASSIST_IS_ENABLED(q->flags))
  611. goto skip_peek_checks;
  612. skb = child->ops->peek(child);
  613. if (!skb)
  614. return NULL;
  615. prio = skb->priority;
  616. tc = netdev_get_prio_tc_map(dev, prio);
  617. if (!(gate_mask & BIT(tc)))
  618. return NULL;
  619. len = qdisc_pkt_len(skb);
  620. guard = ktime_add_ns(taprio_get_time(q), length_to_duration(q, len));
  621. /* In the case that there's no gate entry, there's no
  622. * guard band ...
  623. */
  624. if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
  625. !taprio_entry_allows_tx(guard, entry, tc))
  626. return NULL;
  627. /* ... and no budget. */
  628. if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
  629. taprio_update_budgets(entry, len, tc, num_tc) < 0)
  630. return NULL;
  631. skip_peek_checks:
  632. skb = child->ops->dequeue(child);
  633. if (unlikely(!skb))
  634. return NULL;
  635. qdisc_bstats_update(sch, skb);
  636. qdisc_qstats_backlog_dec(sch, skb);
  637. sch->q.qlen--;
  638. return skb;
  639. }
  640. static void taprio_next_tc_txq(struct net_device *dev, int tc, int *txq)
  641. {
  642. int offset = dev->tc_to_txq[tc].offset;
  643. int count = dev->tc_to_txq[tc].count;
  644. (*txq)++;
  645. if (*txq == offset + count)
  646. *txq = offset;
  647. }
  648. /* Prioritize higher traffic classes, and select among TXQs belonging to the
  649. * same TC using round robin
  650. */
  651. static struct sk_buff *taprio_dequeue_tc_priority(struct Qdisc *sch,
  652. struct sched_entry *entry,
  653. u32 gate_mask)
  654. {
  655. struct taprio_sched *q = qdisc_priv(sch);
  656. struct net_device *dev = qdisc_dev(sch);
  657. int num_tc = netdev_get_num_tc(dev);
  658. struct sk_buff *skb;
  659. int tc;
  660. for (tc = num_tc - 1; tc >= 0; tc--) {
  661. int first_txq = q->cur_txq[tc];
  662. if (!(gate_mask & BIT(tc)))
  663. continue;
  664. do {
  665. skb = taprio_dequeue_from_txq(sch, q->cur_txq[tc],
  666. entry, gate_mask);
  667. taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]);
  668. if (q->cur_txq[tc] >= dev->num_tx_queues)
  669. q->cur_txq[tc] = first_txq;
  670. if (skb)
  671. return skb;
  672. } while (q->cur_txq[tc] != first_txq);
  673. }
  674. return NULL;
  675. }
  676. /* Broken way of prioritizing smaller TXQ indices and ignoring the traffic
  677. * class other than to determine whether the gate is open or not
  678. */
  679. static struct sk_buff *taprio_dequeue_txq_priority(struct Qdisc *sch,
  680. struct sched_entry *entry,
  681. u32 gate_mask)
  682. {
  683. struct net_device *dev = qdisc_dev(sch);
  684. struct sk_buff *skb;
  685. int i;
  686. for (i = 0; i < dev->num_tx_queues; i++) {
  687. skb = taprio_dequeue_from_txq(sch, i, entry, gate_mask);
  688. if (skb)
  689. return skb;
  690. }
  691. return NULL;
  692. }
  693. /* Will not be called in the full offload case, since the TX queues are
  694. * attached to the Qdisc created using qdisc_create_dflt()
  695. */
  696. static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
  697. {
  698. struct taprio_sched *q = qdisc_priv(sch);
  699. struct sk_buff *skb = NULL;
  700. struct sched_entry *entry;
  701. u32 gate_mask;
  702. rcu_read_lock();
  703. entry = rcu_dereference(q->current_entry);
  704. /* if there's no entry, it means that the schedule didn't
  705. * start yet, so force all gates to be open, this is in
  706. * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
  707. * "AdminGateStates"
  708. */
  709. gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
  710. if (!gate_mask)
  711. goto done;
  712. if (static_branch_unlikely(&taprio_have_broken_mqprio) &&
  713. !static_branch_likely(&taprio_have_working_mqprio)) {
  714. /* Single NIC kind which is broken */
  715. skb = taprio_dequeue_txq_priority(sch, entry, gate_mask);
  716. } else if (static_branch_likely(&taprio_have_working_mqprio) &&
  717. !static_branch_unlikely(&taprio_have_broken_mqprio)) {
  718. /* Single NIC kind which prioritizes properly */
  719. skb = taprio_dequeue_tc_priority(sch, entry, gate_mask);
  720. } else {
  721. /* Mixed NIC kinds present in system, need dynamic testing */
  722. if (q->broken_mqprio)
  723. skb = taprio_dequeue_txq_priority(sch, entry, gate_mask);
  724. else
  725. skb = taprio_dequeue_tc_priority(sch, entry, gate_mask);
  726. }
  727. done:
  728. rcu_read_unlock();
  729. return skb;
  730. }
  731. static bool should_restart_cycle(const struct sched_gate_list *oper,
  732. const struct sched_entry *entry)
  733. {
  734. if (list_is_last(&entry->list, &oper->entries))
  735. return true;
  736. if (ktime_compare(entry->end_time, oper->cycle_end_time) == 0)
  737. return true;
  738. return false;
  739. }
  740. static bool should_change_schedules(const struct sched_gate_list *admin,
  741. const struct sched_gate_list *oper,
  742. ktime_t end_time)
  743. {
  744. ktime_t next_base_time, extension_time;
  745. if (!admin)
  746. return false;
  747. next_base_time = sched_base_time(admin);
  748. /* This is the simple case, the end_time would fall after
  749. * the next schedule base_time.
  750. */
  751. if (ktime_compare(next_base_time, end_time) <= 0)
  752. return true;
  753. /* This is the cycle_time_extension case, if the end_time
  754. * plus the amount that can be extended would fall after the
  755. * next schedule base_time, we can extend the current schedule
  756. * for that amount.
  757. */
  758. extension_time = ktime_add_ns(end_time, oper->cycle_time_extension);
  759. /* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about
  760. * how precisely the extension should be made. So after
  761. * conformance testing, this logic may change.
  762. */
  763. if (ktime_compare(next_base_time, extension_time) <= 0)
  764. return true;
  765. return false;
  766. }
  767. static enum hrtimer_restart advance_sched(struct hrtimer *timer)
  768. {
  769. struct taprio_sched *q = container_of(timer, struct taprio_sched,
  770. advance_timer);
  771. struct net_device *dev = qdisc_dev(q->root);
  772. struct sched_gate_list *oper, *admin;
  773. int num_tc = netdev_get_num_tc(dev);
  774. struct sched_entry *entry, *next;
  775. struct Qdisc *sch = q->root;
  776. ktime_t end_time;
  777. int tc;
  778. spin_lock(&q->current_entry_lock);
  779. entry = rcu_dereference_protected(q->current_entry,
  780. lockdep_is_held(&q->current_entry_lock));
  781. oper = rcu_dereference_protected(q->oper_sched,
  782. lockdep_is_held(&q->current_entry_lock));
  783. admin = rcu_dereference_protected(q->admin_sched,
  784. lockdep_is_held(&q->current_entry_lock));
  785. if (!oper)
  786. switch_schedules(q, &admin, &oper);
  787. /* This can happen in two cases: 1. this is the very first run
  788. * of this function (i.e. we weren't running any schedule
  789. * previously); 2. The previous schedule just ended. The first
  790. * entry of all schedules are pre-calculated during the
  791. * schedule initialization.
  792. */
  793. if (unlikely(!entry || entry->end_time == oper->base_time)) {
  794. next = list_first_entry(&oper->entries, struct sched_entry,
  795. list);
  796. end_time = next->end_time;
  797. goto first_run;
  798. }
  799. if (should_restart_cycle(oper, entry)) {
  800. next = list_first_entry(&oper->entries, struct sched_entry,
  801. list);
  802. oper->cycle_end_time = ktime_add_ns(oper->cycle_end_time,
  803. oper->cycle_time);
  804. } else {
  805. next = list_next_entry(entry, list);
  806. }
  807. end_time = ktime_add_ns(entry->end_time, next->interval);
  808. end_time = min_t(ktime_t, end_time, oper->cycle_end_time);
  809. for (tc = 0; tc < num_tc; tc++) {
  810. if (next->gate_duration[tc] == oper->cycle_time)
  811. next->gate_close_time[tc] = KTIME_MAX;
  812. else
  813. next->gate_close_time[tc] = ktime_add_ns(entry->end_time,
  814. next->gate_duration[tc]);
  815. }
  816. if (should_change_schedules(admin, oper, end_time)) {
  817. /* Set things so the next time this runs, the new
  818. * schedule runs.
  819. */
  820. end_time = sched_base_time(admin);
  821. switch_schedules(q, &admin, &oper);
  822. }
  823. next->end_time = end_time;
  824. taprio_set_budgets(q, oper, next);
  825. first_run:
  826. rcu_assign_pointer(q->current_entry, next);
  827. spin_unlock(&q->current_entry_lock);
  828. hrtimer_set_expires(&q->advance_timer, end_time);
  829. rcu_read_lock();
  830. __netif_schedule(sch);
  831. rcu_read_unlock();
  832. return HRTIMER_RESTART;
  833. }
  834. static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
  835. [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 },
  836. [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 },
  837. [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
  838. [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 },
  839. };
  840. static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
  841. [TCA_TAPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32,
  842. TC_QOPT_MAX_QUEUE - 1),
  843. [TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 },
  844. [TCA_TAPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32,
  845. TC_FP_EXPRESS,
  846. TC_FP_PREEMPTIBLE),
  847. };
  848. static const struct netlink_range_validation_signed taprio_cycle_time_range = {
  849. .min = 0,
  850. .max = INT_MAX,
  851. };
  852. static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
  853. [TCA_TAPRIO_ATTR_PRIOMAP] = {
  854. .len = sizeof(struct tc_mqprio_qopt)
  855. },
  856. [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED },
  857. [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
  858. [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
  859. [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
  860. [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] =
  861. NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
  862. [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
  863. [TCA_TAPRIO_ATTR_FLAGS] =
  864. NLA_POLICY_MASK(NLA_U32, TAPRIO_SUPPORTED_FLAGS),
  865. [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
  866. [TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED },
  867. };
  868. static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb,
  869. struct sched_entry *entry,
  870. struct netlink_ext_ack *extack)
  871. {
  872. int min_duration = length_to_duration(q, ETH_ZLEN);
  873. u32 interval = 0;
  874. if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
  875. entry->command = nla_get_u8(
  876. tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
  877. if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
  878. entry->gate_mask = nla_get_u32(
  879. tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
  880. if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
  881. interval = nla_get_u32(
  882. tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
  883. /* The interval should allow at least the minimum ethernet
  884. * frame to go out.
  885. */
  886. if (interval < min_duration) {
  887. NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
  888. return -EINVAL;
  889. }
  890. entry->interval = interval;
  891. return 0;
  892. }
  893. static int parse_sched_entry(struct taprio_sched *q, struct nlattr *n,
  894. struct sched_entry *entry, int index,
  895. struct netlink_ext_ack *extack)
  896. {
  897. struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
  898. int err;
  899. err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n,
  900. entry_policy, NULL);
  901. if (err < 0) {
  902. NL_SET_ERR_MSG(extack, "Could not parse nested entry");
  903. return -EINVAL;
  904. }
  905. entry->index = index;
  906. return fill_sched_entry(q, tb, entry, extack);
  907. }
  908. static int parse_sched_list(struct taprio_sched *q, struct nlattr *list,
  909. struct sched_gate_list *sched,
  910. struct netlink_ext_ack *extack)
  911. {
  912. struct nlattr *n;
  913. int err, rem;
  914. int i = 0;
  915. if (!list)
  916. return -EINVAL;
  917. nla_for_each_nested(n, list, rem) {
  918. struct sched_entry *entry;
  919. if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) {
  920. NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
  921. continue;
  922. }
  923. entry = kzalloc_obj(*entry);
  924. if (!entry) {
  925. NL_SET_ERR_MSG(extack, "Not enough memory for entry");
  926. return -ENOMEM;
  927. }
  928. err = parse_sched_entry(q, n, entry, i, extack);
  929. if (err < 0) {
  930. kfree(entry);
  931. return err;
  932. }
  933. list_add_tail(&entry->list, &sched->entries);
  934. i++;
  935. }
  936. sched->num_entries = i;
  937. return i;
  938. }
  939. static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
  940. struct sched_gate_list *new,
  941. struct netlink_ext_ack *extack)
  942. {
  943. int err = 0;
  944. if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) {
  945. NL_SET_ERR_MSG(extack, "Adding a single entry is not supported");
  946. return -ENOTSUPP;
  947. }
  948. if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
  949. new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
  950. if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION])
  951. new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]);
  952. if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME])
  953. new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]);
  954. if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
  955. err = parse_sched_list(q, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST],
  956. new, extack);
  957. if (err < 0)
  958. return err;
  959. if (!new->cycle_time) {
  960. struct sched_entry *entry;
  961. ktime_t cycle = 0;
  962. list_for_each_entry(entry, &new->entries, list)
  963. cycle = ktime_add_ns(cycle, entry->interval);
  964. if (cycle < 0 || cycle > INT_MAX) {
  965. NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
  966. return -EINVAL;
  967. }
  968. new->cycle_time = cycle;
  969. }
  970. if (new->cycle_time < new->num_entries * length_to_duration(q, ETH_ZLEN)) {
  971. NL_SET_ERR_MSG(extack, "'cycle_time' is too small");
  972. return -EINVAL;
  973. }
  974. taprio_calculate_gate_durations(q, new);
  975. return 0;
  976. }
  977. static int taprio_parse_mqprio_opt(struct net_device *dev,
  978. struct tc_mqprio_qopt *qopt,
  979. struct netlink_ext_ack *extack,
  980. u32 taprio_flags)
  981. {
  982. bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags);
  983. if (!qopt) {
  984. if (!dev->num_tc) {
  985. NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
  986. return -EINVAL;
  987. }
  988. return 0;
  989. }
  990. /* taprio imposes that traffic classes map 1:n to tx queues */
  991. if (qopt->num_tc > dev->num_tx_queues) {
  992. NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
  993. return -EINVAL;
  994. }
  995. /* For some reason, in txtime-assist mode, we allow TXQ ranges for
  996. * different TCs to overlap, and just validate the TXQ ranges.
  997. */
  998. return mqprio_validate_qopt(dev, qopt, true, allow_overlapping_txqs,
  999. extack);
  1000. }
  1001. static int taprio_get_start_time(struct Qdisc *sch,
  1002. struct sched_gate_list *sched,
  1003. ktime_t *start)
  1004. {
  1005. struct taprio_sched *q = qdisc_priv(sch);
  1006. ktime_t now, base, cycle;
  1007. s64 n;
  1008. base = sched_base_time(sched);
  1009. now = taprio_get_time(q);
  1010. if (ktime_after(base, now)) {
  1011. *start = base;
  1012. return 0;
  1013. }
  1014. cycle = sched->cycle_time;
  1015. /* The qdisc is expected to have at least one sched_entry. Moreover,
  1016. * any entry must have 'interval' > 0. Thus if the cycle time is zero,
  1017. * something went really wrong. In that case, we should warn about this
  1018. * inconsistent state and return error.
  1019. */
  1020. if (WARN_ON(!cycle))
  1021. return -EFAULT;
  1022. /* Schedule the start time for the beginning of the next
  1023. * cycle.
  1024. */
  1025. n = div64_s64(ktime_sub_ns(now, base), cycle);
  1026. *start = ktime_add_ns(base, (n + 1) * cycle);
  1027. return 0;
  1028. }
  1029. static void setup_first_end_time(struct taprio_sched *q,
  1030. struct sched_gate_list *sched, ktime_t base)
  1031. {
  1032. struct net_device *dev = qdisc_dev(q->root);
  1033. int num_tc = netdev_get_num_tc(dev);
  1034. struct sched_entry *first;
  1035. ktime_t cycle;
  1036. int tc;
  1037. first = list_first_entry(&sched->entries,
  1038. struct sched_entry, list);
  1039. cycle = sched->cycle_time;
  1040. /* FIXME: find a better place to do this */
  1041. sched->cycle_end_time = ktime_add_ns(base, cycle);
  1042. first->end_time = ktime_add_ns(base, first->interval);
  1043. taprio_set_budgets(q, sched, first);
  1044. for (tc = 0; tc < num_tc; tc++) {
  1045. if (first->gate_duration[tc] == sched->cycle_time)
  1046. first->gate_close_time[tc] = KTIME_MAX;
  1047. else
  1048. first->gate_close_time[tc] = ktime_add_ns(base, first->gate_duration[tc]);
  1049. }
  1050. rcu_assign_pointer(q->current_entry, NULL);
  1051. }
  1052. static void taprio_start_sched(struct Qdisc *sch,
  1053. ktime_t start, struct sched_gate_list *new)
  1054. {
  1055. struct taprio_sched *q = qdisc_priv(sch);
  1056. ktime_t expires;
  1057. if (FULL_OFFLOAD_IS_ENABLED(q->flags))
  1058. return;
  1059. expires = hrtimer_get_expires(&q->advance_timer);
  1060. if (expires == 0)
  1061. expires = KTIME_MAX;
  1062. /* If the new schedule starts before the next expiration, we
  1063. * reprogram it to the earliest one, so we change the admin
  1064. * schedule to the operational one at the right time.
  1065. */
  1066. start = min_t(ktime_t, start, expires);
  1067. hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
  1068. }
  1069. static void taprio_set_picos_per_byte(struct net_device *dev,
  1070. struct taprio_sched *q,
  1071. struct netlink_ext_ack *extack)
  1072. {
  1073. struct ethtool_link_ksettings ecmd;
  1074. int speed = SPEED_10;
  1075. int picos_per_byte;
  1076. int err;
  1077. err = __ethtool_get_link_ksettings(dev, &ecmd);
  1078. if (err < 0)
  1079. goto skip;
  1080. if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN)
  1081. speed = ecmd.base.speed;
  1082. skip:
  1083. picos_per_byte = (USEC_PER_SEC * 8) / speed;
  1084. if (picos_per_byte < TAPRIO_PICOS_PER_BYTE_MIN) {
  1085. if (!extack)
  1086. pr_warn("Link speed %d is too high. Schedule may be inaccurate.\n",
  1087. speed);
  1088. NL_SET_ERR_MSG_FMT_MOD(extack,
  1089. "Link speed %d is too high. Schedule may be inaccurate.",
  1090. speed);
  1091. picos_per_byte = TAPRIO_PICOS_PER_BYTE_MIN;
  1092. }
  1093. atomic64_set(&q->picos_per_byte, picos_per_byte);
  1094. netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
  1095. dev->name, (long long)atomic64_read(&q->picos_per_byte),
  1096. ecmd.base.speed);
  1097. }
  1098. static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
  1099. void *ptr)
  1100. {
  1101. struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  1102. struct sched_gate_list *oper, *admin;
  1103. struct qdisc_size_table *stab;
  1104. struct taprio_sched *q;
  1105. ASSERT_RTNL();
  1106. if (event != NETDEV_UP && event != NETDEV_CHANGE)
  1107. return NOTIFY_DONE;
  1108. list_for_each_entry(q, &taprio_list, taprio_list) {
  1109. if (dev != qdisc_dev(q->root))
  1110. continue;
  1111. taprio_set_picos_per_byte(dev, q, NULL);
  1112. stab = rtnl_dereference(q->root->stab);
  1113. rcu_read_lock();
  1114. oper = rcu_dereference(q->oper_sched);
  1115. if (oper)
  1116. taprio_update_queue_max_sdu(q, oper, stab);
  1117. admin = rcu_dereference(q->admin_sched);
  1118. if (admin)
  1119. taprio_update_queue_max_sdu(q, admin, stab);
  1120. rcu_read_unlock();
  1121. break;
  1122. }
  1123. return NOTIFY_DONE;
  1124. }
  1125. static void setup_txtime(struct taprio_sched *q,
  1126. struct sched_gate_list *sched, ktime_t base)
  1127. {
  1128. struct sched_entry *entry;
  1129. u64 interval = 0;
  1130. list_for_each_entry(entry, &sched->entries, list) {
  1131. entry->next_txtime = ktime_add_ns(base, interval);
  1132. interval += entry->interval;
  1133. }
  1134. }
  1135. static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries)
  1136. {
  1137. struct __tc_taprio_qopt_offload *__offload;
  1138. __offload = kzalloc_flex(*__offload, offload.entries, num_entries);
  1139. if (!__offload)
  1140. return NULL;
  1141. refcount_set(&__offload->users, 1);
  1142. return &__offload->offload;
  1143. }
  1144. struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload
  1145. *offload)
  1146. {
  1147. struct __tc_taprio_qopt_offload *__offload;
  1148. __offload = container_of(offload, struct __tc_taprio_qopt_offload,
  1149. offload);
  1150. refcount_inc(&__offload->users);
  1151. return offload;
  1152. }
  1153. EXPORT_SYMBOL_GPL(taprio_offload_get);
  1154. void taprio_offload_free(struct tc_taprio_qopt_offload *offload)
  1155. {
  1156. struct __tc_taprio_qopt_offload *__offload;
  1157. __offload = container_of(offload, struct __tc_taprio_qopt_offload,
  1158. offload);
  1159. if (!refcount_dec_and_test(&__offload->users))
  1160. return;
  1161. kfree(__offload);
  1162. }
  1163. EXPORT_SYMBOL_GPL(taprio_offload_free);
  1164. /* The function will only serve to keep the pointers to the "oper" and "admin"
  1165. * schedules valid in relation to their base times, so when calling dump() the
  1166. * users looks at the right schedules.
  1167. * When using full offload, the admin configuration is promoted to oper at the
  1168. * base_time in the PHC time domain. But because the system time is not
  1169. * necessarily in sync with that, we can't just trigger a hrtimer to call
  1170. * switch_schedules at the right hardware time.
  1171. * At the moment we call this by hand right away from taprio, but in the future
  1172. * it will be useful to create a mechanism for drivers to notify taprio of the
  1173. * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump().
  1174. * This is left as TODO.
  1175. */
  1176. static void taprio_offload_config_changed(struct taprio_sched *q)
  1177. {
  1178. struct sched_gate_list *oper, *admin;
  1179. oper = rtnl_dereference(q->oper_sched);
  1180. admin = rtnl_dereference(q->admin_sched);
  1181. switch_schedules(q, &admin, &oper);
  1182. }
  1183. static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask)
  1184. {
  1185. u32 i, queue_mask = 0;
  1186. for (i = 0; i < dev->num_tc; i++) {
  1187. u32 offset, count;
  1188. if (!(tc_mask & BIT(i)))
  1189. continue;
  1190. offset = dev->tc_to_txq[i].offset;
  1191. count = dev->tc_to_txq[i].count;
  1192. queue_mask |= GENMASK(offset + count - 1, offset);
  1193. }
  1194. return queue_mask;
  1195. }
  1196. static void taprio_sched_to_offload(struct net_device *dev,
  1197. struct sched_gate_list *sched,
  1198. struct tc_taprio_qopt_offload *offload,
  1199. const struct tc_taprio_caps *caps)
  1200. {
  1201. struct sched_entry *entry;
  1202. int i = 0;
  1203. offload->base_time = sched->base_time;
  1204. offload->cycle_time = sched->cycle_time;
  1205. offload->cycle_time_extension = sched->cycle_time_extension;
  1206. list_for_each_entry(entry, &sched->entries, list) {
  1207. struct tc_taprio_sched_entry *e = &offload->entries[i];
  1208. e->command = entry->command;
  1209. e->interval = entry->interval;
  1210. if (caps->gate_mask_per_txq)
  1211. e->gate_mask = tc_map_to_queue_mask(dev,
  1212. entry->gate_mask);
  1213. else
  1214. e->gate_mask = entry->gate_mask;
  1215. i++;
  1216. }
  1217. offload->num_entries = i;
  1218. }
  1219. static void taprio_detect_broken_mqprio(struct taprio_sched *q)
  1220. {
  1221. struct net_device *dev = qdisc_dev(q->root);
  1222. struct tc_taprio_caps caps;
  1223. qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO,
  1224. &caps, sizeof(caps));
  1225. q->broken_mqprio = caps.broken_mqprio;
  1226. if (q->broken_mqprio)
  1227. static_branch_inc(&taprio_have_broken_mqprio);
  1228. else
  1229. static_branch_inc(&taprio_have_working_mqprio);
  1230. q->detected_mqprio = true;
  1231. }
  1232. static void taprio_cleanup_broken_mqprio(struct taprio_sched *q)
  1233. {
  1234. if (!q->detected_mqprio)
  1235. return;
  1236. if (q->broken_mqprio)
  1237. static_branch_dec(&taprio_have_broken_mqprio);
  1238. else
  1239. static_branch_dec(&taprio_have_working_mqprio);
  1240. }
  1241. static int taprio_enable_offload(struct net_device *dev,
  1242. struct taprio_sched *q,
  1243. struct sched_gate_list *sched,
  1244. struct netlink_ext_ack *extack)
  1245. {
  1246. const struct net_device_ops *ops = dev->netdev_ops;
  1247. struct tc_taprio_qopt_offload *offload;
  1248. struct tc_taprio_caps caps;
  1249. int tc, err = 0;
  1250. if (!ops->ndo_setup_tc) {
  1251. NL_SET_ERR_MSG(extack,
  1252. "Device does not support taprio offload");
  1253. return -EOPNOTSUPP;
  1254. }
  1255. qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO,
  1256. &caps, sizeof(caps));
  1257. if (!caps.supports_queue_max_sdu) {
  1258. for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
  1259. if (q->max_sdu[tc]) {
  1260. NL_SET_ERR_MSG_MOD(extack,
  1261. "Device does not handle queueMaxSDU");
  1262. return -EOPNOTSUPP;
  1263. }
  1264. }
  1265. }
  1266. offload = taprio_offload_alloc(sched->num_entries);
  1267. if (!offload) {
  1268. NL_SET_ERR_MSG(extack,
  1269. "Not enough memory for enabling offload mode");
  1270. return -ENOMEM;
  1271. }
  1272. offload->cmd = TAPRIO_CMD_REPLACE;
  1273. offload->extack = extack;
  1274. mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt);
  1275. offload->mqprio.extack = extack;
  1276. taprio_sched_to_offload(dev, sched, offload, &caps);
  1277. mqprio_fp_to_offload(q->fp, &offload->mqprio);
  1278. for (tc = 0; tc < TC_MAX_QUEUE; tc++)
  1279. offload->max_sdu[tc] = q->max_sdu[tc];
  1280. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
  1281. if (err < 0) {
  1282. NL_SET_ERR_MSG_WEAK(extack,
  1283. "Device failed to setup taprio offload");
  1284. goto done;
  1285. }
  1286. q->offloaded = true;
  1287. done:
  1288. /* The offload structure may linger around via a reference taken by the
  1289. * device driver, so clear up the netlink extack pointer so that the
  1290. * driver isn't tempted to dereference data which stopped being valid
  1291. */
  1292. offload->extack = NULL;
  1293. offload->mqprio.extack = NULL;
  1294. taprio_offload_free(offload);
  1295. return err;
  1296. }
  1297. static int taprio_disable_offload(struct net_device *dev,
  1298. struct taprio_sched *q,
  1299. struct netlink_ext_ack *extack)
  1300. {
  1301. const struct net_device_ops *ops = dev->netdev_ops;
  1302. struct tc_taprio_qopt_offload *offload;
  1303. int err;
  1304. if (!q->offloaded)
  1305. return 0;
  1306. offload = taprio_offload_alloc(0);
  1307. if (!offload) {
  1308. NL_SET_ERR_MSG(extack,
  1309. "Not enough memory to disable offload mode");
  1310. return -ENOMEM;
  1311. }
  1312. offload->cmd = TAPRIO_CMD_DESTROY;
  1313. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
  1314. if (err < 0) {
  1315. NL_SET_ERR_MSG(extack,
  1316. "Device failed to disable offload");
  1317. goto out;
  1318. }
  1319. q->offloaded = false;
  1320. out:
  1321. taprio_offload_free(offload);
  1322. return err;
  1323. }
  1324. /* If full offload is enabled, the only possible clockid is the net device's
  1325. * PHC. For that reason, specifying a clockid through netlink is incorrect.
  1326. * For txtime-assist, it is implicitly assumed that the device's PHC is kept
  1327. * in sync with the specified clockid via a user space daemon such as phc2sys.
  1328. * For both software taprio and txtime-assist, the clockid is used for the
  1329. * hrtimer that advances the schedule and hence mandatory.
  1330. */
  1331. static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
  1332. struct netlink_ext_ack *extack)
  1333. {
  1334. struct taprio_sched *q = qdisc_priv(sch);
  1335. struct net_device *dev = qdisc_dev(sch);
  1336. int err = -EINVAL;
  1337. if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1338. const struct ethtool_ops *ops = dev->ethtool_ops;
  1339. struct kernel_ethtool_ts_info info = {
  1340. .cmd = ETHTOOL_GET_TS_INFO,
  1341. .phc_index = -1,
  1342. };
  1343. if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
  1344. NL_SET_ERR_MSG(extack,
  1345. "The 'clockid' cannot be specified for full offload");
  1346. goto out;
  1347. }
  1348. if (ops && ops->get_ts_info)
  1349. err = ops->get_ts_info(dev, &info);
  1350. if (err || info.phc_index < 0) {
  1351. NL_SET_ERR_MSG(extack,
  1352. "Device does not have a PTP clock");
  1353. err = -ENOTSUPP;
  1354. goto out;
  1355. }
  1356. } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
  1357. int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
  1358. enum tk_offsets tk_offset;
  1359. /* We only support static clockids and we don't allow
  1360. * for it to be modified after the first init.
  1361. */
  1362. if (clockid < 0 ||
  1363. (q->clockid != -1 && q->clockid != clockid)) {
  1364. NL_SET_ERR_MSG(extack,
  1365. "Changing the 'clockid' of a running schedule is not supported");
  1366. err = -ENOTSUPP;
  1367. goto out;
  1368. }
  1369. switch (clockid) {
  1370. case CLOCK_REALTIME:
  1371. tk_offset = TK_OFFS_REAL;
  1372. break;
  1373. case CLOCK_MONOTONIC:
  1374. tk_offset = TK_OFFS_MAX;
  1375. break;
  1376. case CLOCK_BOOTTIME:
  1377. tk_offset = TK_OFFS_BOOT;
  1378. break;
  1379. case CLOCK_TAI:
  1380. tk_offset = TK_OFFS_TAI;
  1381. break;
  1382. default:
  1383. NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
  1384. err = -EINVAL;
  1385. goto out;
  1386. }
  1387. /* This pairs with READ_ONCE() in taprio_mono_to_any */
  1388. WRITE_ONCE(q->tk_offset, tk_offset);
  1389. q->clockid = clockid;
  1390. } else {
  1391. NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
  1392. goto out;
  1393. }
  1394. /* Everything went ok, return success. */
  1395. err = 0;
  1396. out:
  1397. return err;
  1398. }
  1399. static int taprio_parse_tc_entry(struct Qdisc *sch,
  1400. struct nlattr *opt,
  1401. u32 max_sdu[TC_QOPT_MAX_QUEUE],
  1402. u32 fp[TC_QOPT_MAX_QUEUE],
  1403. unsigned long *seen_tcs,
  1404. struct netlink_ext_ack *extack)
  1405. {
  1406. struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { };
  1407. struct net_device *dev = qdisc_dev(sch);
  1408. int err, tc;
  1409. u32 val;
  1410. err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt,
  1411. taprio_tc_policy, extack);
  1412. if (err < 0)
  1413. return err;
  1414. if (NL_REQ_ATTR_CHECK(extack, opt, tb, TCA_TAPRIO_TC_ENTRY_INDEX)) {
  1415. NL_SET_ERR_MSG_MOD(extack, "TC entry index missing");
  1416. return -EINVAL;
  1417. }
  1418. tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]);
  1419. if (*seen_tcs & BIT(tc)) {
  1420. NL_SET_ERR_MSG_ATTR(extack, tb[TCA_TAPRIO_TC_ENTRY_INDEX],
  1421. "Duplicate tc entry");
  1422. return -EINVAL;
  1423. }
  1424. *seen_tcs |= BIT(tc);
  1425. if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) {
  1426. val = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]);
  1427. if (val > dev->max_mtu) {
  1428. NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
  1429. return -ERANGE;
  1430. }
  1431. max_sdu[tc] = val;
  1432. }
  1433. if (tb[TCA_TAPRIO_TC_ENTRY_FP])
  1434. fp[tc] = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_FP]);
  1435. return 0;
  1436. }
  1437. static int taprio_parse_tc_entries(struct Qdisc *sch,
  1438. struct nlattr *opt,
  1439. struct netlink_ext_ack *extack)
  1440. {
  1441. struct taprio_sched *q = qdisc_priv(sch);
  1442. struct net_device *dev = qdisc_dev(sch);
  1443. u32 max_sdu[TC_QOPT_MAX_QUEUE];
  1444. bool have_preemption = false;
  1445. unsigned long seen_tcs = 0;
  1446. u32 fp[TC_QOPT_MAX_QUEUE];
  1447. struct nlattr *n;
  1448. int tc, rem;
  1449. int err = 0;
  1450. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
  1451. max_sdu[tc] = q->max_sdu[tc];
  1452. fp[tc] = q->fp[tc];
  1453. }
  1454. nla_for_each_nested_type(n, TCA_TAPRIO_ATTR_TC_ENTRY, opt, rem) {
  1455. err = taprio_parse_tc_entry(sch, n, max_sdu, fp, &seen_tcs,
  1456. extack);
  1457. if (err)
  1458. return err;
  1459. }
  1460. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
  1461. q->max_sdu[tc] = max_sdu[tc];
  1462. q->fp[tc] = fp[tc];
  1463. if (fp[tc] != TC_FP_EXPRESS)
  1464. have_preemption = true;
  1465. }
  1466. if (have_preemption) {
  1467. if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1468. NL_SET_ERR_MSG(extack,
  1469. "Preemption only supported with full offload");
  1470. return -EOPNOTSUPP;
  1471. }
  1472. if (!ethtool_dev_mm_supported(dev)) {
  1473. NL_SET_ERR_MSG(extack,
  1474. "Device does not support preemption");
  1475. return -EOPNOTSUPP;
  1476. }
  1477. }
  1478. return err;
  1479. }
  1480. static int taprio_mqprio_cmp(const struct net_device *dev,
  1481. const struct tc_mqprio_qopt *mqprio)
  1482. {
  1483. int i;
  1484. if (!mqprio || mqprio->num_tc != dev->num_tc)
  1485. return -1;
  1486. for (i = 0; i < mqprio->num_tc; i++)
  1487. if (dev->tc_to_txq[i].count != mqprio->count[i] ||
  1488. dev->tc_to_txq[i].offset != mqprio->offset[i])
  1489. return -1;
  1490. for (i = 0; i <= TC_BITMASK; i++)
  1491. if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i])
  1492. return -1;
  1493. return 0;
  1494. }
  1495. static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
  1496. struct netlink_ext_ack *extack)
  1497. {
  1498. struct qdisc_size_table *stab = rtnl_dereference(sch->stab);
  1499. struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
  1500. struct sched_gate_list *oper, *admin, *new_admin;
  1501. struct taprio_sched *q = qdisc_priv(sch);
  1502. struct net_device *dev = qdisc_dev(sch);
  1503. struct tc_mqprio_qopt *mqprio = NULL;
  1504. unsigned long flags;
  1505. u32 taprio_flags;
  1506. ktime_t start;
  1507. int i, err;
  1508. err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt,
  1509. taprio_policy, extack);
  1510. if (err < 0)
  1511. return err;
  1512. if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
  1513. mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
  1514. /* The semantics of the 'flags' argument in relation to 'change()'
  1515. * requests, are interpreted following two rules (which are applied in
  1516. * this order): (1) an omitted 'flags' argument is interpreted as
  1517. * zero; (2) the 'flags' of a "running" taprio instance cannot be
  1518. * changed.
  1519. */
  1520. taprio_flags = nla_get_u32_default(tb[TCA_TAPRIO_ATTR_FLAGS], 0);
  1521. /* txtime-assist and full offload are mutually exclusive */
  1522. if ((taprio_flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
  1523. (taprio_flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) {
  1524. NL_SET_ERR_MSG_ATTR(extack, tb[TCA_TAPRIO_ATTR_FLAGS],
  1525. "TXTIME_ASSIST and FULL_OFFLOAD are mutually exclusive");
  1526. return -EINVAL;
  1527. }
  1528. if (q->flags != TAPRIO_FLAGS_INVALID && q->flags != taprio_flags) {
  1529. NL_SET_ERR_MSG_MOD(extack,
  1530. "Changing 'flags' of a running schedule is not supported");
  1531. return -EOPNOTSUPP;
  1532. }
  1533. q->flags = taprio_flags;
  1534. /* Needed for length_to_duration() during netlink attribute parsing */
  1535. taprio_set_picos_per_byte(dev, q, extack);
  1536. err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
  1537. if (err < 0)
  1538. return err;
  1539. err = taprio_parse_tc_entries(sch, opt, extack);
  1540. if (err)
  1541. return err;
  1542. new_admin = kzalloc_obj(*new_admin);
  1543. if (!new_admin) {
  1544. NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule");
  1545. return -ENOMEM;
  1546. }
  1547. INIT_LIST_HEAD(&new_admin->entries);
  1548. oper = rtnl_dereference(q->oper_sched);
  1549. admin = rtnl_dereference(q->admin_sched);
  1550. /* no changes - no new mqprio settings */
  1551. if (!taprio_mqprio_cmp(dev, mqprio))
  1552. mqprio = NULL;
  1553. if (mqprio && (oper || admin)) {
  1554. NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported");
  1555. err = -ENOTSUPP;
  1556. goto free_sched;
  1557. }
  1558. if (mqprio) {
  1559. err = netdev_set_num_tc(dev, mqprio->num_tc);
  1560. if (err)
  1561. goto free_sched;
  1562. for (i = 0; i < mqprio->num_tc; i++) {
  1563. netdev_set_tc_queue(dev, i,
  1564. mqprio->count[i],
  1565. mqprio->offset[i]);
  1566. q->cur_txq[i] = mqprio->offset[i];
  1567. }
  1568. /* Always use supplied priority mappings */
  1569. for (i = 0; i <= TC_BITMASK; i++)
  1570. netdev_set_prio_tc_map(dev, i,
  1571. mqprio->prio_tc_map[i]);
  1572. }
  1573. err = parse_taprio_schedule(q, tb, new_admin, extack);
  1574. if (err < 0)
  1575. goto free_sched;
  1576. if (new_admin->num_entries == 0) {
  1577. NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule");
  1578. err = -EINVAL;
  1579. goto free_sched;
  1580. }
  1581. err = taprio_parse_clockid(sch, tb, extack);
  1582. if (err < 0)
  1583. goto free_sched;
  1584. taprio_update_queue_max_sdu(q, new_admin, stab);
  1585. if (FULL_OFFLOAD_IS_ENABLED(q->flags))
  1586. err = taprio_enable_offload(dev, q, new_admin, extack);
  1587. else
  1588. err = taprio_disable_offload(dev, q, extack);
  1589. if (err)
  1590. goto free_sched;
  1591. /* Protects against enqueue()/dequeue() */
  1592. spin_lock_bh(qdisc_lock(sch));
  1593. if (tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]) {
  1594. if (!TXTIME_ASSIST_IS_ENABLED(q->flags)) {
  1595. NL_SET_ERR_MSG_MOD(extack, "txtime-delay can only be set when txtime-assist mode is enabled");
  1596. err = -EINVAL;
  1597. goto unlock;
  1598. }
  1599. q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
  1600. }
  1601. if (!TXTIME_ASSIST_IS_ENABLED(q->flags) &&
  1602. !FULL_OFFLOAD_IS_ENABLED(q->flags) &&
  1603. !hrtimer_active(&q->advance_timer)) {
  1604. hrtimer_setup(&q->advance_timer, advance_sched, q->clockid, HRTIMER_MODE_ABS);
  1605. }
  1606. err = taprio_get_start_time(sch, new_admin, &start);
  1607. if (err < 0) {
  1608. NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
  1609. goto unlock;
  1610. }
  1611. setup_txtime(q, new_admin, start);
  1612. if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
  1613. if (!oper) {
  1614. rcu_assign_pointer(q->oper_sched, new_admin);
  1615. err = 0;
  1616. new_admin = NULL;
  1617. goto unlock;
  1618. }
  1619. /* Not going to race against advance_sched(), but still */
  1620. admin = rcu_replace_pointer(q->admin_sched, new_admin,
  1621. lockdep_rtnl_is_held());
  1622. if (admin)
  1623. call_rcu(&admin->rcu, taprio_free_sched_cb);
  1624. } else {
  1625. setup_first_end_time(q, new_admin, start);
  1626. /* Protects against advance_sched() */
  1627. spin_lock_irqsave(&q->current_entry_lock, flags);
  1628. taprio_start_sched(sch, start, new_admin);
  1629. admin = rcu_replace_pointer(q->admin_sched, new_admin,
  1630. lockdep_rtnl_is_held());
  1631. if (admin)
  1632. call_rcu(&admin->rcu, taprio_free_sched_cb);
  1633. spin_unlock_irqrestore(&q->current_entry_lock, flags);
  1634. if (FULL_OFFLOAD_IS_ENABLED(q->flags))
  1635. taprio_offload_config_changed(q);
  1636. }
  1637. new_admin = NULL;
  1638. err = 0;
  1639. if (!stab)
  1640. NL_SET_ERR_MSG_MOD(extack,
  1641. "Size table not specified, frame length estimations may be inaccurate");
  1642. unlock:
  1643. spin_unlock_bh(qdisc_lock(sch));
  1644. free_sched:
  1645. if (new_admin)
  1646. call_rcu(&new_admin->rcu, taprio_free_sched_cb);
  1647. return err;
  1648. }
  1649. static void taprio_reset(struct Qdisc *sch)
  1650. {
  1651. struct taprio_sched *q = qdisc_priv(sch);
  1652. struct net_device *dev = qdisc_dev(sch);
  1653. int i;
  1654. hrtimer_cancel(&q->advance_timer);
  1655. if (q->qdiscs) {
  1656. for (i = 0; i < dev->num_tx_queues; i++)
  1657. if (q->qdiscs[i])
  1658. qdisc_reset(q->qdiscs[i]);
  1659. }
  1660. }
  1661. static void taprio_destroy(struct Qdisc *sch)
  1662. {
  1663. struct taprio_sched *q = qdisc_priv(sch);
  1664. struct net_device *dev = qdisc_dev(sch);
  1665. struct sched_gate_list *oper, *admin;
  1666. unsigned int i;
  1667. list_del(&q->taprio_list);
  1668. /* Note that taprio_reset() might not be called if an error
  1669. * happens in qdisc_create(), after taprio_init() has been called.
  1670. */
  1671. hrtimer_cancel(&q->advance_timer);
  1672. qdisc_synchronize(sch);
  1673. taprio_disable_offload(dev, q, NULL);
  1674. if (q->qdiscs) {
  1675. for (i = 0; i < dev->num_tx_queues; i++)
  1676. qdisc_put(q->qdiscs[i]);
  1677. kfree(q->qdiscs);
  1678. }
  1679. q->qdiscs = NULL;
  1680. netdev_reset_tc(dev);
  1681. oper = rtnl_dereference(q->oper_sched);
  1682. admin = rtnl_dereference(q->admin_sched);
  1683. if (oper)
  1684. call_rcu(&oper->rcu, taprio_free_sched_cb);
  1685. if (admin)
  1686. call_rcu(&admin->rcu, taprio_free_sched_cb);
  1687. taprio_cleanup_broken_mqprio(q);
  1688. }
  1689. static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
  1690. struct netlink_ext_ack *extack)
  1691. {
  1692. struct taprio_sched *q = qdisc_priv(sch);
  1693. struct net_device *dev = qdisc_dev(sch);
  1694. int i, tc;
  1695. spin_lock_init(&q->current_entry_lock);
  1696. hrtimer_setup(&q->advance_timer, advance_sched, CLOCK_TAI, HRTIMER_MODE_ABS);
  1697. q->root = sch;
  1698. /* We only support static clockids. Use an invalid value as default
  1699. * and get the valid one on taprio_change().
  1700. */
  1701. q->clockid = -1;
  1702. q->flags = TAPRIO_FLAGS_INVALID;
  1703. list_add(&q->taprio_list, &taprio_list);
  1704. if (sch->parent != TC_H_ROOT) {
  1705. NL_SET_ERR_MSG_MOD(extack, "Can only be attached as root qdisc");
  1706. return -EOPNOTSUPP;
  1707. }
  1708. if (!netif_is_multiqueue(dev)) {
  1709. NL_SET_ERR_MSG_MOD(extack, "Multi-queue device is required");
  1710. return -EOPNOTSUPP;
  1711. }
  1712. q->qdiscs = kzalloc_objs(q->qdiscs[0], dev->num_tx_queues);
  1713. if (!q->qdiscs)
  1714. return -ENOMEM;
  1715. if (!opt)
  1716. return -EINVAL;
  1717. for (i = 0; i < dev->num_tx_queues; i++) {
  1718. struct netdev_queue *dev_queue;
  1719. struct Qdisc *qdisc;
  1720. dev_queue = netdev_get_tx_queue(dev, i);
  1721. qdisc = qdisc_create_dflt(dev_queue,
  1722. &pfifo_qdisc_ops,
  1723. TC_H_MAKE(TC_H_MAJ(sch->handle),
  1724. TC_H_MIN(i + 1)),
  1725. extack);
  1726. if (!qdisc)
  1727. return -ENOMEM;
  1728. if (i < dev->real_num_tx_queues)
  1729. qdisc_hash_add(qdisc, false);
  1730. q->qdiscs[i] = qdisc;
  1731. }
  1732. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
  1733. q->fp[tc] = TC_FP_EXPRESS;
  1734. taprio_detect_broken_mqprio(q);
  1735. return taprio_change(sch, opt, extack);
  1736. }
  1737. static void taprio_attach(struct Qdisc *sch)
  1738. {
  1739. struct taprio_sched *q = qdisc_priv(sch);
  1740. struct net_device *dev = qdisc_dev(sch);
  1741. unsigned int ntx;
  1742. /* Attach underlying qdisc */
  1743. for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
  1744. struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
  1745. struct Qdisc *old, *dev_queue_qdisc;
  1746. if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1747. struct Qdisc *qdisc = q->qdiscs[ntx];
  1748. /* In offload mode, the root taprio qdisc is bypassed
  1749. * and the netdev TX queues see the children directly
  1750. */
  1751. qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
  1752. dev_queue_qdisc = qdisc;
  1753. } else {
  1754. /* In software mode, attach the root taprio qdisc
  1755. * to all netdev TX queues, so that dev_qdisc_enqueue()
  1756. * goes through taprio_enqueue().
  1757. */
  1758. dev_queue_qdisc = sch;
  1759. }
  1760. old = dev_graft_qdisc(dev_queue, dev_queue_qdisc);
  1761. /* The qdisc's refcount requires to be elevated once
  1762. * for each netdev TX queue it is grafted onto
  1763. */
  1764. qdisc_refcount_inc(dev_queue_qdisc);
  1765. if (old)
  1766. qdisc_put(old);
  1767. }
  1768. }
  1769. static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
  1770. unsigned long cl)
  1771. {
  1772. struct net_device *dev = qdisc_dev(sch);
  1773. unsigned long ntx = cl - 1;
  1774. if (ntx >= dev->num_tx_queues)
  1775. return NULL;
  1776. return netdev_get_tx_queue(dev, ntx);
  1777. }
  1778. static int taprio_graft(struct Qdisc *sch, unsigned long cl,
  1779. struct Qdisc *new, struct Qdisc **old,
  1780. struct netlink_ext_ack *extack)
  1781. {
  1782. struct taprio_sched *q = qdisc_priv(sch);
  1783. struct net_device *dev = qdisc_dev(sch);
  1784. struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
  1785. if (!dev_queue)
  1786. return -EINVAL;
  1787. if (dev->flags & IFF_UP)
  1788. dev_deactivate(dev);
  1789. /* In offload mode, the child Qdisc is directly attached to the netdev
  1790. * TX queue, and thus, we need to keep its refcount elevated in order
  1791. * to counteract qdisc_graft()'s call to qdisc_put() once per TX queue.
  1792. * However, save the reference to the new qdisc in the private array in
  1793. * both software and offload cases, to have an up-to-date reference to
  1794. * our children.
  1795. */
  1796. *old = q->qdiscs[cl - 1];
  1797. if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1798. WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old);
  1799. if (new)
  1800. qdisc_refcount_inc(new);
  1801. if (*old)
  1802. qdisc_put(*old);
  1803. }
  1804. q->qdiscs[cl - 1] = new;
  1805. if (new)
  1806. new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
  1807. if (dev->flags & IFF_UP)
  1808. dev_activate(dev);
  1809. return 0;
  1810. }
  1811. static int dump_entry(struct sk_buff *msg,
  1812. const struct sched_entry *entry)
  1813. {
  1814. struct nlattr *item;
  1815. item = nla_nest_start_noflag(msg, TCA_TAPRIO_SCHED_ENTRY);
  1816. if (!item)
  1817. return -ENOSPC;
  1818. if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index))
  1819. goto nla_put_failure;
  1820. if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command))
  1821. goto nla_put_failure;
  1822. if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
  1823. entry->gate_mask))
  1824. goto nla_put_failure;
  1825. if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
  1826. entry->interval))
  1827. goto nla_put_failure;
  1828. return nla_nest_end(msg, item);
  1829. nla_put_failure:
  1830. nla_nest_cancel(msg, item);
  1831. return -1;
  1832. }
  1833. static int dump_schedule(struct sk_buff *msg,
  1834. const struct sched_gate_list *root)
  1835. {
  1836. struct nlattr *entry_list;
  1837. struct sched_entry *entry;
  1838. if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
  1839. root->base_time, TCA_TAPRIO_PAD))
  1840. return -1;
  1841. if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME,
  1842. root->cycle_time, TCA_TAPRIO_PAD))
  1843. return -1;
  1844. if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION,
  1845. root->cycle_time_extension, TCA_TAPRIO_PAD))
  1846. return -1;
  1847. entry_list = nla_nest_start_noflag(msg,
  1848. TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
  1849. if (!entry_list)
  1850. goto error_nest;
  1851. list_for_each_entry(entry, &root->entries, list) {
  1852. if (dump_entry(msg, entry) < 0)
  1853. goto error_nest;
  1854. }
  1855. nla_nest_end(msg, entry_list);
  1856. return 0;
  1857. error_nest:
  1858. nla_nest_cancel(msg, entry_list);
  1859. return -1;
  1860. }
  1861. static int taprio_dump_tc_entries(struct sk_buff *skb,
  1862. struct taprio_sched *q,
  1863. struct sched_gate_list *sched)
  1864. {
  1865. struct nlattr *n;
  1866. int tc;
  1867. for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
  1868. n = nla_nest_start(skb, TCA_TAPRIO_ATTR_TC_ENTRY);
  1869. if (!n)
  1870. return -EMSGSIZE;
  1871. if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_INDEX, tc))
  1872. goto nla_put_failure;
  1873. if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_MAX_SDU,
  1874. sched->max_sdu[tc]))
  1875. goto nla_put_failure;
  1876. if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_FP, q->fp[tc]))
  1877. goto nla_put_failure;
  1878. nla_nest_end(skb, n);
  1879. }
  1880. return 0;
  1881. nla_put_failure:
  1882. nla_nest_cancel(skb, n);
  1883. return -EMSGSIZE;
  1884. }
  1885. static int taprio_put_stat(struct sk_buff *skb, u64 val, u16 attrtype)
  1886. {
  1887. if (val == TAPRIO_STAT_NOT_SET)
  1888. return 0;
  1889. if (nla_put_u64_64bit(skb, attrtype, val, TCA_TAPRIO_OFFLOAD_STATS_PAD))
  1890. return -EMSGSIZE;
  1891. return 0;
  1892. }
  1893. static int taprio_dump_xstats(struct Qdisc *sch, struct gnet_dump *d,
  1894. struct tc_taprio_qopt_offload *offload,
  1895. struct tc_taprio_qopt_stats *stats)
  1896. {
  1897. struct net_device *dev = qdisc_dev(sch);
  1898. const struct net_device_ops *ops;
  1899. struct sk_buff *skb = d->skb;
  1900. struct nlattr *xstats;
  1901. int err;
  1902. ops = qdisc_dev(sch)->netdev_ops;
  1903. /* FIXME I could use qdisc_offload_dump_helper(), but that messes
  1904. * with sch->flags depending on whether the device reports taprio
  1905. * stats, and I'm not sure whether that's a good idea, considering
  1906. * that stats are optional to the offload itself
  1907. */
  1908. if (!ops->ndo_setup_tc)
  1909. return 0;
  1910. memset(stats, 0xff, sizeof(*stats));
  1911. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
  1912. if (err == -EOPNOTSUPP)
  1913. return 0;
  1914. if (err)
  1915. return err;
  1916. xstats = nla_nest_start(skb, TCA_STATS_APP);
  1917. if (!xstats)
  1918. goto err;
  1919. if (taprio_put_stat(skb, stats->window_drops,
  1920. TCA_TAPRIO_OFFLOAD_STATS_WINDOW_DROPS) ||
  1921. taprio_put_stat(skb, stats->tx_overruns,
  1922. TCA_TAPRIO_OFFLOAD_STATS_TX_OVERRUNS))
  1923. goto err_cancel;
  1924. nla_nest_end(skb, xstats);
  1925. return 0;
  1926. err_cancel:
  1927. nla_nest_cancel(skb, xstats);
  1928. err:
  1929. return -EMSGSIZE;
  1930. }
  1931. static int taprio_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
  1932. {
  1933. struct tc_taprio_qopt_offload offload = {
  1934. .cmd = TAPRIO_CMD_STATS,
  1935. };
  1936. return taprio_dump_xstats(sch, d, &offload, &offload.stats);
  1937. }
  1938. static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
  1939. {
  1940. struct taprio_sched *q = qdisc_priv(sch);
  1941. struct net_device *dev = qdisc_dev(sch);
  1942. struct sched_gate_list *oper, *admin;
  1943. struct tc_mqprio_qopt opt = { 0 };
  1944. struct nlattr *nest, *sched_nest;
  1945. mqprio_qopt_reconstruct(dev, &opt);
  1946. nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
  1947. if (!nest)
  1948. goto start_error;
  1949. if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
  1950. goto options_error;
  1951. if (!FULL_OFFLOAD_IS_ENABLED(q->flags) &&
  1952. nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
  1953. goto options_error;
  1954. if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags))
  1955. goto options_error;
  1956. if (q->txtime_delay &&
  1957. nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
  1958. goto options_error;
  1959. rcu_read_lock();
  1960. oper = rtnl_dereference(q->oper_sched);
  1961. admin = rtnl_dereference(q->admin_sched);
  1962. if (oper && taprio_dump_tc_entries(skb, q, oper))
  1963. goto options_error_rcu;
  1964. if (oper && dump_schedule(skb, oper))
  1965. goto options_error_rcu;
  1966. if (!admin)
  1967. goto done;
  1968. sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED);
  1969. if (!sched_nest)
  1970. goto options_error_rcu;
  1971. if (dump_schedule(skb, admin))
  1972. goto admin_error;
  1973. nla_nest_end(skb, sched_nest);
  1974. done:
  1975. rcu_read_unlock();
  1976. return nla_nest_end(skb, nest);
  1977. admin_error:
  1978. nla_nest_cancel(skb, sched_nest);
  1979. options_error_rcu:
  1980. rcu_read_unlock();
  1981. options_error:
  1982. nla_nest_cancel(skb, nest);
  1983. start_error:
  1984. return -ENOSPC;
  1985. }
  1986. static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
  1987. {
  1988. struct taprio_sched *q = qdisc_priv(sch);
  1989. struct net_device *dev = qdisc_dev(sch);
  1990. unsigned int ntx = cl - 1;
  1991. if (ntx >= dev->num_tx_queues)
  1992. return NULL;
  1993. return q->qdiscs[ntx];
  1994. }
  1995. static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
  1996. {
  1997. unsigned int ntx = TC_H_MIN(classid);
  1998. if (!taprio_queue_get(sch, ntx))
  1999. return 0;
  2000. return ntx;
  2001. }
  2002. static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
  2003. struct sk_buff *skb, struct tcmsg *tcm)
  2004. {
  2005. struct Qdisc *child = taprio_leaf(sch, cl);
  2006. tcm->tcm_parent = TC_H_ROOT;
  2007. tcm->tcm_handle |= TC_H_MIN(cl);
  2008. tcm->tcm_info = child->handle;
  2009. return 0;
  2010. }
  2011. static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
  2012. struct gnet_dump *d)
  2013. __releases(d->lock)
  2014. __acquires(d->lock)
  2015. {
  2016. struct Qdisc *child = taprio_leaf(sch, cl);
  2017. struct tc_taprio_qopt_offload offload = {
  2018. .cmd = TAPRIO_CMD_QUEUE_STATS,
  2019. .queue_stats = {
  2020. .queue = cl - 1,
  2021. },
  2022. };
  2023. if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 ||
  2024. qdisc_qstats_copy(d, child) < 0)
  2025. return -1;
  2026. return taprio_dump_xstats(sch, d, &offload, &offload.queue_stats.stats);
  2027. }
  2028. static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
  2029. {
  2030. struct net_device *dev = qdisc_dev(sch);
  2031. unsigned long ntx;
  2032. if (arg->stop)
  2033. return;
  2034. arg->count = arg->skip;
  2035. for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
  2036. if (!tc_qdisc_stats_dump(sch, ntx + 1, arg))
  2037. break;
  2038. }
  2039. }
  2040. static struct netdev_queue *taprio_select_queue(struct Qdisc *sch,
  2041. struct tcmsg *tcm)
  2042. {
  2043. return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
  2044. }
  2045. static const struct Qdisc_class_ops taprio_class_ops = {
  2046. .graft = taprio_graft,
  2047. .leaf = taprio_leaf,
  2048. .find = taprio_find,
  2049. .walk = taprio_walk,
  2050. .dump = taprio_dump_class,
  2051. .dump_stats = taprio_dump_class_stats,
  2052. .select_queue = taprio_select_queue,
  2053. };
  2054. static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
  2055. .cl_ops = &taprio_class_ops,
  2056. .id = "taprio",
  2057. .priv_size = sizeof(struct taprio_sched),
  2058. .init = taprio_init,
  2059. .change = taprio_change,
  2060. .destroy = taprio_destroy,
  2061. .reset = taprio_reset,
  2062. .attach = taprio_attach,
  2063. .peek = taprio_peek,
  2064. .dequeue = taprio_dequeue,
  2065. .enqueue = taprio_enqueue,
  2066. .dump = taprio_dump,
  2067. .dump_stats = taprio_dump_stats,
  2068. .owner = THIS_MODULE,
  2069. };
  2070. MODULE_ALIAS_NET_SCH("taprio");
  2071. static struct notifier_block taprio_device_notifier = {
  2072. .notifier_call = taprio_dev_notifier,
  2073. };
  2074. static int __init taprio_module_init(void)
  2075. {
  2076. int err = register_netdevice_notifier(&taprio_device_notifier);
  2077. if (err)
  2078. return err;
  2079. return register_qdisc(&taprio_qdisc_ops);
  2080. }
  2081. static void __exit taprio_module_exit(void)
  2082. {
  2083. unregister_qdisc(&taprio_qdisc_ops);
  2084. unregister_netdevice_notifier(&taprio_device_notifier);
  2085. }
  2086. module_init(taprio_module_init);
  2087. module_exit(taprio_module_exit);
  2088. MODULE_LICENSE("GPL");
  2089. MODULE_DESCRIPTION("Time Aware Priority qdisc");