h264.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * NVIDIA Tegra Video decoder driver
  4. *
  5. * Copyright (C) 2016-2022 Dmitry Osipenko <digetx@gmail.com>
  6. *
  7. */
  8. #include <linux/iopoll.h>
  9. #include <linux/pm_runtime.h>
  10. #include <linux/reset.h>
  11. #include <linux/slab.h>
  12. #include <media/v4l2-h264.h>
  13. #include "trace.h"
  14. #include "vde.h"
  15. #define FLAG_B_FRAME 0x1
  16. #define FLAG_REFERENCE 0x2
  17. struct tegra_vde_h264_decoder_ctx {
  18. unsigned int dpb_frames_nb;
  19. unsigned int dpb_ref_frames_with_earlier_poc_nb;
  20. unsigned int baseline_profile;
  21. unsigned int level_idc;
  22. unsigned int log2_max_pic_order_cnt_lsb;
  23. unsigned int log2_max_frame_num;
  24. unsigned int pic_order_cnt_type;
  25. unsigned int direct_8x8_inference_flag;
  26. unsigned int pic_width_in_mbs;
  27. unsigned int pic_height_in_mbs;
  28. unsigned int pic_init_qp;
  29. unsigned int deblocking_filter_control_present_flag;
  30. unsigned int constrained_intra_pred_flag;
  31. unsigned int chroma_qp_index_offset;
  32. unsigned int pic_order_present_flag;
  33. unsigned int num_ref_idx_l0_active_minus1;
  34. unsigned int num_ref_idx_l1_active_minus1;
  35. };
  36. struct h264_reflists {
  37. struct v4l2_h264_reference p[V4L2_H264_NUM_DPB_ENTRIES];
  38. struct v4l2_h264_reference b0[V4L2_H264_NUM_DPB_ENTRIES];
  39. struct v4l2_h264_reference b1[V4L2_H264_NUM_DPB_ENTRIES];
  40. };
  41. static int tegra_vde_wait_mbe(struct tegra_vde *vde)
  42. {
  43. u32 tmp;
  44. return readl_relaxed_poll_timeout(vde->mbe + 0x8C, tmp,
  45. tmp >= 0x10, 1, 100);
  46. }
  47. static int tegra_vde_setup_mbe_frame_idx(struct tegra_vde *vde,
  48. unsigned int refs_nb,
  49. bool setup_refs)
  50. {
  51. u32 value, frame_idx_enb_mask = 0;
  52. unsigned int frame_idx;
  53. unsigned int idx;
  54. int err;
  55. tegra_vde_writel(vde, 0xD0000000 | (0 << 23), vde->mbe, 0x80);
  56. tegra_vde_writel(vde, 0xD0200000 | (0 << 23), vde->mbe, 0x80);
  57. err = tegra_vde_wait_mbe(vde);
  58. if (err)
  59. return err;
  60. if (!setup_refs)
  61. return 0;
  62. for (idx = 0, frame_idx = 1; idx < refs_nb; idx++, frame_idx++) {
  63. tegra_vde_writel(vde, 0xD0000000 | (frame_idx << 23),
  64. vde->mbe, 0x80);
  65. tegra_vde_writel(vde, 0xD0200000 | (frame_idx << 23),
  66. vde->mbe, 0x80);
  67. frame_idx_enb_mask |= frame_idx << (6 * (idx % 4));
  68. if (idx % 4 == 3 || idx == refs_nb - 1) {
  69. value = 0xC0000000;
  70. value |= (idx >> 2) << 24;
  71. value |= frame_idx_enb_mask;
  72. tegra_vde_writel(vde, value, vde->mbe, 0x80);
  73. err = tegra_vde_wait_mbe(vde);
  74. if (err)
  75. return err;
  76. frame_idx_enb_mask = 0;
  77. }
  78. }
  79. return 0;
  80. }
  81. static void tegra_vde_mbe_set_0xa_reg(struct tegra_vde *vde, int reg, u32 val)
  82. {
  83. tegra_vde_writel(vde, 0xA0000000 | (reg << 24) | (val & 0xFFFF),
  84. vde->mbe, 0x80);
  85. tegra_vde_writel(vde, 0xA0000000 | ((reg + 1) << 24) | (val >> 16),
  86. vde->mbe, 0x80);
  87. }
  88. static int tegra_vde_wait_bsev(struct tegra_vde *vde, bool wait_dma)
  89. {
  90. struct device *dev = vde->dev;
  91. u32 value;
  92. int err;
  93. err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
  94. !(value & BIT(2)), 1, 100);
  95. if (err) {
  96. dev_err(dev, "BSEV unknown bit timeout\n");
  97. return err;
  98. }
  99. err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
  100. (value & BSE_ICMDQUE_EMPTY), 1, 100);
  101. if (err) {
  102. dev_err(dev, "BSEV ICMDQUE flush timeout\n");
  103. return err;
  104. }
  105. if (!wait_dma)
  106. return 0;
  107. err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
  108. !(value & BSE_DMA_BUSY), 1, 1000);
  109. if (err) {
  110. dev_err(dev, "BSEV DMA timeout\n");
  111. return err;
  112. }
  113. return 0;
  114. }
  115. static int tegra_vde_push_to_bsev_icmdqueue(struct tegra_vde *vde,
  116. u32 value, bool wait_dma)
  117. {
  118. tegra_vde_writel(vde, value, vde->bsev, ICMDQUE_WR);
  119. return tegra_vde_wait_bsev(vde, wait_dma);
  120. }
  121. static void tegra_vde_setup_frameid(struct tegra_vde *vde,
  122. struct tegra_video_frame *frame,
  123. unsigned int frameid,
  124. u32 mbs_width, u32 mbs_height)
  125. {
  126. u32 y_addr = frame ? frame->y_addr : 0x6CDEAD00;
  127. u32 cb_addr = frame ? frame->cb_addr : 0x6CDEAD00;
  128. u32 cr_addr = frame ? frame->cr_addr : 0x6CDEAD00;
  129. u32 value1 = frame ? ((frame->luma_atoms_pitch << 16) | mbs_height) : 0;
  130. u32 value2 = frame ? ((frame->chroma_atoms_pitch << 6) | 1) : 0;
  131. tegra_vde_writel(vde, y_addr >> 8, vde->frameid, 0x000 + frameid * 4);
  132. tegra_vde_writel(vde, cb_addr >> 8, vde->frameid, 0x100 + frameid * 4);
  133. tegra_vde_writel(vde, cr_addr >> 8, vde->frameid, 0x180 + frameid * 4);
  134. tegra_vde_writel(vde, value1, vde->frameid, 0x080 + frameid * 4);
  135. tegra_vde_writel(vde, value2, vde->frameid, 0x280 + frameid * 4);
  136. }
  137. static void tegra_setup_frameidx(struct tegra_vde *vde,
  138. struct tegra_video_frame *frames,
  139. unsigned int frames_nb,
  140. u32 mbs_width, u32 mbs_height)
  141. {
  142. unsigned int idx;
  143. for (idx = 0; idx < frames_nb; idx++)
  144. tegra_vde_setup_frameid(vde, &frames[idx], idx,
  145. mbs_width, mbs_height);
  146. for (; idx < 17; idx++)
  147. tegra_vde_setup_frameid(vde, NULL, idx, 0, 0);
  148. }
  149. static void tegra_vde_setup_iram_entry(struct tegra_vde *vde,
  150. unsigned int table,
  151. unsigned int row,
  152. u32 value1, u32 value2)
  153. {
  154. u32 *iram_tables = vde->iram;
  155. trace_vde_setup_iram_entry(table, row, value1, value2);
  156. iram_tables[0x20 * table + row * 2 + 0] = value1;
  157. iram_tables[0x20 * table + row * 2 + 1] = value2;
  158. }
  159. static void tegra_vde_setup_iram_tables(struct tegra_vde *vde,
  160. struct tegra_video_frame *dpb_frames,
  161. unsigned int ref_frames_nb,
  162. unsigned int with_earlier_poc_nb)
  163. {
  164. struct tegra_video_frame *frame;
  165. int with_later_poc_nb;
  166. u32 value, aux_addr;
  167. unsigned int i, k;
  168. trace_vde_ref_l0(dpb_frames[0].frame_num);
  169. for (i = 0; i < 16; i++) {
  170. if (i < ref_frames_nb) {
  171. frame = &dpb_frames[i + 1];
  172. aux_addr = frame->aux_addr;
  173. value = (i + 1) << 26;
  174. value |= !(frame->flags & FLAG_B_FRAME) << 25;
  175. value |= 1 << 24;
  176. value |= frame->frame_num;
  177. } else {
  178. aux_addr = 0x6ADEAD00;
  179. value = 0x3f;
  180. }
  181. tegra_vde_setup_iram_entry(vde, 0, i, value, aux_addr);
  182. tegra_vde_setup_iram_entry(vde, 1, i, value, aux_addr);
  183. tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
  184. tegra_vde_setup_iram_entry(vde, 3, i, value, aux_addr);
  185. }
  186. if (!(dpb_frames[0].flags & FLAG_B_FRAME))
  187. return;
  188. if (with_earlier_poc_nb >= ref_frames_nb)
  189. return;
  190. with_later_poc_nb = ref_frames_nb - with_earlier_poc_nb;
  191. trace_vde_ref_l1(with_later_poc_nb, with_earlier_poc_nb);
  192. for (i = 0, k = with_earlier_poc_nb; i < with_later_poc_nb; i++, k++) {
  193. frame = &dpb_frames[k + 1];
  194. aux_addr = frame->aux_addr;
  195. value = (k + 1) << 26;
  196. value |= !(frame->flags & FLAG_B_FRAME) << 25;
  197. value |= 1 << 24;
  198. value |= frame->frame_num;
  199. tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
  200. }
  201. for (k = 0; i < ref_frames_nb; i++, k++) {
  202. frame = &dpb_frames[k + 1];
  203. aux_addr = frame->aux_addr;
  204. value = (k + 1) << 26;
  205. value |= !(frame->flags & FLAG_B_FRAME) << 25;
  206. value |= 1 << 24;
  207. value |= frame->frame_num;
  208. tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
  209. }
  210. }
  211. static int tegra_vde_setup_hw_context(struct tegra_vde *vde,
  212. struct tegra_vde_h264_decoder_ctx *ctx,
  213. struct tegra_video_frame *dpb_frames,
  214. dma_addr_t bitstream_data_addr,
  215. size_t bitstream_data_size,
  216. unsigned int macroblocks_nb)
  217. {
  218. struct device *dev = vde->dev;
  219. u32 value;
  220. int err;
  221. tegra_vde_set_bits(vde, 0x000A, vde->sxe, 0xF0);
  222. tegra_vde_set_bits(vde, 0x000B, vde->bsev, CMDQUE_CONTROL);
  223. tegra_vde_set_bits(vde, 0x8002, vde->mbe, 0x50);
  224. tegra_vde_set_bits(vde, 0x000A, vde->mbe, 0xA0);
  225. tegra_vde_set_bits(vde, 0x000A, vde->ppe, 0x14);
  226. tegra_vde_set_bits(vde, 0x000A, vde->ppe, 0x28);
  227. tegra_vde_set_bits(vde, 0x0A00, vde->mce, 0x08);
  228. tegra_vde_set_bits(vde, 0x000A, vde->tfe, 0x00);
  229. tegra_vde_set_bits(vde, 0x0005, vde->vdma, 0x04);
  230. tegra_vde_writel(vde, 0x00000000, vde->vdma, 0x1C);
  231. tegra_vde_writel(vde, 0x00000000, vde->vdma, 0x00);
  232. tegra_vde_writel(vde, 0x00000007, vde->vdma, 0x04);
  233. tegra_vde_writel(vde, 0x00000007, vde->frameid, 0x200);
  234. tegra_vde_writel(vde, 0x00000005, vde->tfe, 0x04);
  235. tegra_vde_writel(vde, 0x00000000, vde->mbe, 0x84);
  236. tegra_vde_writel(vde, 0x00000010, vde->sxe, 0x08);
  237. tegra_vde_writel(vde, 0x00000150, vde->sxe, 0x54);
  238. tegra_vde_writel(vde, 0x0000054C, vde->sxe, 0x58);
  239. tegra_vde_writel(vde, 0x00000E34, vde->sxe, 0x5C);
  240. tegra_vde_writel(vde, 0x063C063C, vde->mce, 0x10);
  241. tegra_vde_writel(vde, 0x0003FC00, vde->bsev, INTR_STATUS);
  242. tegra_vde_writel(vde, 0x0000150D, vde->bsev, BSE_CONFIG);
  243. tegra_vde_writel(vde, 0x00000100, vde->bsev, BSE_INT_ENB);
  244. tegra_vde_writel(vde, 0x00000000, vde->bsev, 0x98);
  245. tegra_vde_writel(vde, 0x00000060, vde->bsev, 0x9C);
  246. memset(vde->iram + 128, 0, macroblocks_nb / 2);
  247. tegra_setup_frameidx(vde, dpb_frames, ctx->dpb_frames_nb,
  248. ctx->pic_width_in_mbs, ctx->pic_height_in_mbs);
  249. tegra_vde_setup_iram_tables(vde, dpb_frames,
  250. ctx->dpb_frames_nb - 1,
  251. ctx->dpb_ref_frames_with_earlier_poc_nb);
  252. /*
  253. * The IRAM mapping is write-combine, ensure that CPU buffers have
  254. * been flushed at this point.
  255. */
  256. wmb();
  257. tegra_vde_writel(vde, 0x00000000, vde->bsev, 0x8C);
  258. tegra_vde_writel(vde, bitstream_data_addr + bitstream_data_size,
  259. vde->bsev, 0x54);
  260. vde->bitstream_data_addr = bitstream_data_addr;
  261. value = ctx->pic_width_in_mbs << 11 | ctx->pic_height_in_mbs << 3;
  262. tegra_vde_writel(vde, value, vde->bsev, 0x88);
  263. err = tegra_vde_wait_bsev(vde, false);
  264. if (err)
  265. return err;
  266. err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x800003FC, false);
  267. if (err)
  268. return err;
  269. value = 0x01500000;
  270. value |= ((vde->iram_lists_addr + 512) >> 2) & 0xFFFF;
  271. err = tegra_vde_push_to_bsev_icmdqueue(vde, value, true);
  272. if (err)
  273. return err;
  274. err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x840F054C, false);
  275. if (err)
  276. return err;
  277. err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x80000080, false);
  278. if (err)
  279. return err;
  280. value = 0x0E340000 | ((vde->iram_lists_addr >> 2) & 0xFFFF);
  281. err = tegra_vde_push_to_bsev_icmdqueue(vde, value, true);
  282. if (err)
  283. return err;
  284. value = 0x00800005;
  285. value |= ctx->pic_width_in_mbs << 11;
  286. value |= ctx->pic_height_in_mbs << 3;
  287. tegra_vde_writel(vde, value, vde->sxe, 0x10);
  288. value = !ctx->baseline_profile << 17;
  289. value |= ctx->level_idc << 13;
  290. value |= ctx->log2_max_pic_order_cnt_lsb << 7;
  291. value |= ctx->pic_order_cnt_type << 5;
  292. value |= ctx->log2_max_frame_num;
  293. tegra_vde_writel(vde, value, vde->sxe, 0x40);
  294. value = ctx->pic_init_qp << 25;
  295. value |= !!(ctx->deblocking_filter_control_present_flag) << 2;
  296. value |= !!ctx->pic_order_present_flag;
  297. tegra_vde_writel(vde, value, vde->sxe, 0x44);
  298. value = ctx->chroma_qp_index_offset;
  299. value |= ctx->num_ref_idx_l0_active_minus1 << 5;
  300. value |= ctx->num_ref_idx_l1_active_minus1 << 10;
  301. value |= !!ctx->constrained_intra_pred_flag << 15;
  302. tegra_vde_writel(vde, value, vde->sxe, 0x48);
  303. value = 0x0C000000;
  304. value |= !!(dpb_frames[0].flags & FLAG_B_FRAME) << 24;
  305. tegra_vde_writel(vde, value, vde->sxe, 0x4C);
  306. value = 0x03800000;
  307. value |= bitstream_data_size & GENMASK(19, 15);
  308. tegra_vde_writel(vde, value, vde->sxe, 0x68);
  309. tegra_vde_writel(vde, bitstream_data_addr, vde->sxe, 0x6C);
  310. if (vde->soc->supports_ref_pic_marking)
  311. tegra_vde_writel(vde, vde->secure_bo->dma_addr, vde->sxe, 0x7c);
  312. value = 0x10000005;
  313. value |= ctx->pic_width_in_mbs << 11;
  314. value |= ctx->pic_height_in_mbs << 3;
  315. tegra_vde_writel(vde, value, vde->mbe, 0x80);
  316. value = 0x26800000;
  317. value |= ctx->level_idc << 4;
  318. value |= !ctx->baseline_profile << 1;
  319. value |= !!ctx->direct_8x8_inference_flag;
  320. tegra_vde_writel(vde, value, vde->mbe, 0x80);
  321. tegra_vde_writel(vde, 0xF4000001, vde->mbe, 0x80);
  322. tegra_vde_writel(vde, 0x20000000, vde->mbe, 0x80);
  323. tegra_vde_writel(vde, 0xF4000101, vde->mbe, 0x80);
  324. value = 0x20000000;
  325. value |= ctx->chroma_qp_index_offset << 8;
  326. tegra_vde_writel(vde, value, vde->mbe, 0x80);
  327. err = tegra_vde_setup_mbe_frame_idx(vde,
  328. ctx->dpb_frames_nb - 1,
  329. ctx->pic_order_cnt_type == 0);
  330. if (err) {
  331. dev_err(dev, "MBE frames setup failed %d\n", err);
  332. return err;
  333. }
  334. tegra_vde_mbe_set_0xa_reg(vde, 0, 0x000009FC);
  335. tegra_vde_mbe_set_0xa_reg(vde, 2, 0x61DEAD00);
  336. tegra_vde_mbe_set_0xa_reg(vde, 4, 0x62DEAD00);
  337. tegra_vde_mbe_set_0xa_reg(vde, 6, 0x63DEAD00);
  338. tegra_vde_mbe_set_0xa_reg(vde, 8, dpb_frames[0].aux_addr);
  339. value = 0xFC000000;
  340. value |= !!(dpb_frames[0].flags & FLAG_B_FRAME) << 2;
  341. if (!ctx->baseline_profile)
  342. value |= !!(dpb_frames[0].flags & FLAG_REFERENCE) << 1;
  343. tegra_vde_writel(vde, value, vde->mbe, 0x80);
  344. err = tegra_vde_wait_mbe(vde);
  345. if (err) {
  346. dev_err(dev, "MBE programming failed %d\n", err);
  347. return err;
  348. }
  349. return 0;
  350. }
  351. static void tegra_vde_decode_frame(struct tegra_vde *vde,
  352. unsigned int macroblocks_nb)
  353. {
  354. reinit_completion(&vde->decode_completion);
  355. tegra_vde_writel(vde, 0x00000001, vde->bsev, 0x8C);
  356. tegra_vde_writel(vde, 0x20000000 | (macroblocks_nb - 1),
  357. vde->sxe, 0x00);
  358. }
  359. static int tegra_vde_validate_h264_ctx(struct device *dev,
  360. struct tegra_vde_h264_decoder_ctx *ctx)
  361. {
  362. if (ctx->dpb_frames_nb == 0 || ctx->dpb_frames_nb > 17) {
  363. dev_err(dev, "Bad DPB size %u\n", ctx->dpb_frames_nb);
  364. return -EINVAL;
  365. }
  366. if (ctx->level_idc > 15) {
  367. dev_err(dev, "Bad level value %u\n", ctx->level_idc);
  368. return -EINVAL;
  369. }
  370. if (ctx->pic_init_qp > 52) {
  371. dev_err(dev, "Bad pic_init_qp value %u\n", ctx->pic_init_qp);
  372. return -EINVAL;
  373. }
  374. if (ctx->log2_max_pic_order_cnt_lsb > 16) {
  375. dev_err(dev, "Bad log2_max_pic_order_cnt_lsb value %u\n",
  376. ctx->log2_max_pic_order_cnt_lsb);
  377. return -EINVAL;
  378. }
  379. if (ctx->log2_max_frame_num > 16) {
  380. dev_err(dev, "Bad log2_max_frame_num value %u\n",
  381. ctx->log2_max_frame_num);
  382. return -EINVAL;
  383. }
  384. if (ctx->chroma_qp_index_offset > 31) {
  385. dev_err(dev, "Bad chroma_qp_index_offset value %u\n",
  386. ctx->chroma_qp_index_offset);
  387. return -EINVAL;
  388. }
  389. if (ctx->pic_order_cnt_type > 2) {
  390. dev_err(dev, "Bad pic_order_cnt_type value %u\n",
  391. ctx->pic_order_cnt_type);
  392. return -EINVAL;
  393. }
  394. if (ctx->num_ref_idx_l0_active_minus1 > 15) {
  395. dev_err(dev, "Bad num_ref_idx_l0_active_minus1 value %u\n",
  396. ctx->num_ref_idx_l0_active_minus1);
  397. return -EINVAL;
  398. }
  399. if (ctx->num_ref_idx_l1_active_minus1 > 15) {
  400. dev_err(dev, "Bad num_ref_idx_l1_active_minus1 value %u\n",
  401. ctx->num_ref_idx_l1_active_minus1);
  402. return -EINVAL;
  403. }
  404. if (!ctx->pic_width_in_mbs || ctx->pic_width_in_mbs > 127) {
  405. dev_err(dev, "Bad pic_width_in_mbs value %u\n",
  406. ctx->pic_width_in_mbs);
  407. return -EINVAL;
  408. }
  409. if (!ctx->pic_height_in_mbs || ctx->pic_height_in_mbs > 127) {
  410. dev_err(dev, "Bad pic_height_in_mbs value %u\n",
  411. ctx->pic_height_in_mbs);
  412. return -EINVAL;
  413. }
  414. return 0;
  415. }
  416. static int tegra_vde_decode_begin(struct tegra_vde *vde,
  417. struct tegra_vde_h264_decoder_ctx *ctx,
  418. struct tegra_video_frame *dpb_frames,
  419. dma_addr_t bitstream_data_addr,
  420. size_t bitstream_data_size)
  421. {
  422. struct device *dev = vde->dev;
  423. unsigned int macroblocks_nb;
  424. int err;
  425. err = mutex_lock_interruptible(&vde->lock);
  426. if (err)
  427. return err;
  428. err = pm_runtime_resume_and_get(dev);
  429. if (err < 0)
  430. goto unlock;
  431. /*
  432. * We rely on the VDE registers reset value, otherwise VDE
  433. * causes bus lockup.
  434. */
  435. err = reset_control_assert(vde->rst_mc);
  436. if (err) {
  437. dev_err(dev, "DEC start: Failed to assert MC reset: %d\n",
  438. err);
  439. goto put_runtime_pm;
  440. }
  441. err = reset_control_reset(vde->rst);
  442. if (err) {
  443. dev_err(dev, "DEC start: Failed to reset HW: %d\n", err);
  444. goto put_runtime_pm;
  445. }
  446. err = reset_control_deassert(vde->rst_mc);
  447. if (err) {
  448. dev_err(dev, "DEC start: Failed to deassert MC reset: %d\n",
  449. err);
  450. goto put_runtime_pm;
  451. }
  452. macroblocks_nb = ctx->pic_width_in_mbs * ctx->pic_height_in_mbs;
  453. err = tegra_vde_setup_hw_context(vde, ctx, dpb_frames,
  454. bitstream_data_addr,
  455. bitstream_data_size,
  456. macroblocks_nb);
  457. if (err)
  458. goto put_runtime_pm;
  459. tegra_vde_decode_frame(vde, macroblocks_nb);
  460. return 0;
  461. put_runtime_pm:
  462. pm_runtime_put_autosuspend(dev);
  463. unlock:
  464. mutex_unlock(&vde->lock);
  465. return err;
  466. }
  467. static void tegra_vde_decode_abort(struct tegra_vde *vde)
  468. {
  469. struct device *dev = vde->dev;
  470. int err;
  471. /*
  472. * At first reset memory client to avoid resetting VDE HW in the
  473. * middle of DMA which could result into memory corruption or hang
  474. * the whole system.
  475. */
  476. err = reset_control_assert(vde->rst_mc);
  477. if (err)
  478. dev_err(dev, "DEC end: Failed to assert MC reset: %d\n", err);
  479. err = reset_control_assert(vde->rst);
  480. if (err)
  481. dev_err(dev, "DEC end: Failed to assert HW reset: %d\n", err);
  482. pm_runtime_put_autosuspend(dev);
  483. mutex_unlock(&vde->lock);
  484. }
  485. static int tegra_vde_decode_end(struct tegra_vde *vde)
  486. {
  487. unsigned int read_bytes, macroblocks_nb;
  488. struct device *dev = vde->dev;
  489. dma_addr_t bsev_ptr;
  490. long time_left;
  491. int ret;
  492. time_left = wait_for_completion_interruptible_timeout(
  493. &vde->decode_completion, msecs_to_jiffies(1000));
  494. if (time_left < 0) {
  495. ret = time_left;
  496. } else if (time_left == 0) {
  497. bsev_ptr = tegra_vde_readl(vde, vde->bsev, 0x10);
  498. macroblocks_nb = tegra_vde_readl(vde, vde->sxe, 0xC8) & 0x1FFF;
  499. read_bytes = bsev_ptr ? bsev_ptr - vde->bitstream_data_addr : 0;
  500. dev_err(dev, "Decoding failed: read 0x%X bytes, %u macroblocks parsed\n",
  501. read_bytes, macroblocks_nb);
  502. ret = -EIO;
  503. } else {
  504. ret = 0;
  505. }
  506. tegra_vde_decode_abort(vde);
  507. return ret;
  508. }
  509. static struct vb2_buffer *get_ref_buf(struct tegra_ctx *ctx,
  510. struct vb2_v4l2_buffer *dst,
  511. unsigned int dpb_idx)
  512. {
  513. const struct v4l2_h264_dpb_entry *dpb = ctx->h264.decode_params->dpb;
  514. struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q;
  515. struct vb2_buffer *vb = NULL;
  516. if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
  517. vb = vb2_find_buffer(cap_q, dpb[dpb_idx].reference_ts);
  518. /*
  519. * If a DPB entry is unused or invalid, address of current destination
  520. * buffer is returned.
  521. */
  522. if (!vb)
  523. return &dst->vb2_buf;
  524. return vb;
  525. }
  526. static int tegra_vde_validate_vb_size(struct tegra_ctx *ctx,
  527. struct vb2_buffer *vb,
  528. unsigned int plane_id,
  529. size_t min_size)
  530. {
  531. u64 offset = vb->planes[plane_id].data_offset;
  532. struct device *dev = ctx->vde->dev;
  533. if (offset + min_size > vb2_plane_size(vb, plane_id)) {
  534. dev_err(dev, "Too small plane[%u] size %lu @0x%llX, should be at least %zu\n",
  535. plane_id, vb2_plane_size(vb, plane_id), offset, min_size);
  536. return -EINVAL;
  537. }
  538. return 0;
  539. }
  540. static int tegra_vde_h264_setup_frame(struct tegra_ctx *ctx,
  541. struct tegra_vde_h264_decoder_ctx *h264,
  542. struct v4l2_h264_reflist_builder *b,
  543. struct vb2_buffer *vb,
  544. unsigned int ref_id,
  545. unsigned int id)
  546. {
  547. struct v4l2_pix_format_mplane *pixfmt = &ctx->decoded_fmt.fmt.pix_mp;
  548. struct tegra_m2m_buffer *tb = vb_to_tegra_buf(vb);
  549. struct tegra_ctx_h264 *h = &ctx->h264;
  550. struct tegra_vde *vde = ctx->vde;
  551. struct device *dev = vde->dev;
  552. unsigned int cstride, lstride;
  553. unsigned int flags = 0;
  554. size_t lsize, csize;
  555. int err, frame_num;
  556. lsize = h264->pic_width_in_mbs * 16 * h264->pic_height_in_mbs * 16;
  557. csize = h264->pic_width_in_mbs * 8 * h264->pic_height_in_mbs * 8;
  558. lstride = pixfmt->plane_fmt[0].bytesperline;
  559. cstride = pixfmt->plane_fmt[1].bytesperline;
  560. err = tegra_vde_validate_vb_size(ctx, vb, 0, lsize);
  561. if (err)
  562. return err;
  563. err = tegra_vde_validate_vb_size(ctx, vb, 1, csize);
  564. if (err)
  565. return err;
  566. err = tegra_vde_validate_vb_size(ctx, vb, 2, csize);
  567. if (err)
  568. return err;
  569. if (!tb->aux || tb->aux->size < csize) {
  570. dev_err(dev, "Too small aux size %zd, should be at least %zu\n",
  571. tb->aux ? tb->aux->size : -1, csize);
  572. return -EINVAL;
  573. }
  574. if (id == 0) {
  575. frame_num = h->decode_params->frame_num;
  576. if (h->decode_params->nal_ref_idc)
  577. flags |= FLAG_REFERENCE;
  578. } else {
  579. frame_num = b->refs[ref_id].frame_num;
  580. }
  581. if (tb->b_frame)
  582. flags |= FLAG_B_FRAME;
  583. vde->frames[id].flags = flags;
  584. vde->frames[id].y_addr = tb->dma_addr[0];
  585. vde->frames[id].cb_addr = tb->dma_addr[1];
  586. vde->frames[id].cr_addr = tb->dma_addr[2];
  587. vde->frames[id].aux_addr = tb->aux->dma_addr;
  588. vde->frames[id].frame_num = frame_num & 0x7fffff;
  589. vde->frames[id].luma_atoms_pitch = lstride / VDE_ATOM;
  590. vde->frames[id].chroma_atoms_pitch = cstride / VDE_ATOM;
  591. return 0;
  592. }
  593. static int tegra_vde_h264_setup_frames(struct tegra_ctx *ctx,
  594. struct tegra_vde_h264_decoder_ctx *h264)
  595. {
  596. struct vb2_v4l2_buffer *src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
  597. struct vb2_v4l2_buffer *dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
  598. const struct v4l2_h264_dpb_entry *dpb = ctx->h264.decode_params->dpb;
  599. struct tegra_m2m_buffer *tb = vb_to_tegra_buf(&dst->vb2_buf);
  600. struct tegra_ctx_h264 *h = &ctx->h264;
  601. struct v4l2_h264_reflist_builder b;
  602. struct v4l2_h264_reference *dpb_id;
  603. struct h264_reflists reflists;
  604. struct vb2_buffer *ref;
  605. unsigned int i;
  606. int err;
  607. /*
  608. * Tegra hardware requires information about frame's type, assuming
  609. * that frame consists of the same type slices. Userspace must tag
  610. * frame's type appropriately.
  611. *
  612. * Decoding of a non-uniform frames isn't supported by hardware and
  613. * require software preprocessing that we don't implement. Decoding
  614. * is expected to fail in this case. Such video streams are rare in
  615. * practice, so not a big deal.
  616. *
  617. * If userspace doesn't tell us frame's type, then we will try decode
  618. * as-is.
  619. */
  620. v4l2_m2m_buf_copy_metadata(src, dst);
  621. if (h->decode_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BFRAME)
  622. tb->b_frame = true;
  623. else
  624. tb->b_frame = false;
  625. err = tegra_vde_h264_setup_frame(ctx, h264, NULL, &dst->vb2_buf, 0,
  626. h264->dpb_frames_nb++);
  627. if (err)
  628. return err;
  629. if (!(h->decode_params->flags & (V4L2_H264_DECODE_PARAM_FLAG_PFRAME |
  630. V4L2_H264_DECODE_PARAM_FLAG_BFRAME)))
  631. return 0;
  632. v4l2_h264_init_reflist_builder(&b, h->decode_params, h->sps, dpb);
  633. if (h->decode_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BFRAME) {
  634. v4l2_h264_build_b_ref_lists(&b, reflists.b0, reflists.b1);
  635. dpb_id = reflists.b0;
  636. } else {
  637. v4l2_h264_build_p_ref_list(&b, reflists.p);
  638. dpb_id = reflists.p;
  639. }
  640. for (i = 0; i < b.num_valid; i++) {
  641. int dpb_idx = dpb_id[i].index;
  642. ref = get_ref_buf(ctx, dst, dpb_idx);
  643. err = tegra_vde_h264_setup_frame(ctx, h264, &b, ref, dpb_idx,
  644. h264->dpb_frames_nb++);
  645. if (err)
  646. return err;
  647. if (b.refs[dpb_idx].top_field_order_cnt < b.cur_pic_order_count)
  648. h264->dpb_ref_frames_with_earlier_poc_nb++;
  649. }
  650. return 0;
  651. }
  652. static unsigned int to_tegra_vde_h264_level_idc(unsigned int level_idc)
  653. {
  654. switch (level_idc) {
  655. case 11:
  656. return 2;
  657. case 12:
  658. return 3;
  659. case 13:
  660. return 4;
  661. case 20:
  662. return 5;
  663. case 21:
  664. return 6;
  665. case 22:
  666. return 7;
  667. case 30:
  668. return 8;
  669. case 31:
  670. return 9;
  671. case 32:
  672. return 10;
  673. case 40:
  674. return 11;
  675. case 41:
  676. return 12;
  677. case 42:
  678. return 13;
  679. case 50:
  680. return 14;
  681. default:
  682. break;
  683. }
  684. return 15;
  685. }
  686. static int tegra_vde_h264_setup_context(struct tegra_ctx *ctx,
  687. struct tegra_vde_h264_decoder_ctx *h264)
  688. {
  689. struct tegra_ctx_h264 *h = &ctx->h264;
  690. struct tegra_vde *vde = ctx->vde;
  691. struct device *dev = vde->dev;
  692. int err;
  693. memset(h264, 0, sizeof(*h264));
  694. memset(vde->frames, 0, sizeof(vde->frames));
  695. tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS);
  696. tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_SPS);
  697. tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_PPS);
  698. /* CABAC unsupported by hardware, requires software preprocessing */
  699. if (h->pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
  700. return -EOPNOTSUPP;
  701. if (h->decode_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
  702. return -EOPNOTSUPP;
  703. if (h->sps->profile_idc == 66)
  704. h264->baseline_profile = 1;
  705. if (h->sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
  706. h264->direct_8x8_inference_flag = 1;
  707. if (h->pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
  708. h264->constrained_intra_pred_flag = 1;
  709. if (h->pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT)
  710. h264->deblocking_filter_control_present_flag = 1;
  711. if (h->pps->flags & V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT)
  712. h264->pic_order_present_flag = 1;
  713. h264->level_idc = to_tegra_vde_h264_level_idc(h->sps->level_idc);
  714. h264->log2_max_pic_order_cnt_lsb = h->sps->log2_max_pic_order_cnt_lsb_minus4 + 4;
  715. h264->log2_max_frame_num = h->sps->log2_max_frame_num_minus4 + 4;
  716. h264->pic_order_cnt_type = h->sps->pic_order_cnt_type;
  717. h264->pic_width_in_mbs = h->sps->pic_width_in_mbs_minus1 + 1;
  718. h264->pic_height_in_mbs = h->sps->pic_height_in_map_units_minus1 + 1;
  719. h264->num_ref_idx_l0_active_minus1 = h->pps->num_ref_idx_l0_default_active_minus1;
  720. h264->num_ref_idx_l1_active_minus1 = h->pps->num_ref_idx_l1_default_active_minus1;
  721. h264->chroma_qp_index_offset = h->pps->chroma_qp_index_offset & 0x1f;
  722. h264->pic_init_qp = h->pps->pic_init_qp_minus26 + 26;
  723. err = tegra_vde_h264_setup_frames(ctx, h264);
  724. if (err)
  725. return err;
  726. err = tegra_vde_validate_h264_ctx(dev, h264);
  727. if (err)
  728. return err;
  729. return 0;
  730. }
  731. int tegra_vde_h264_decode_run(struct tegra_ctx *ctx)
  732. {
  733. struct vb2_v4l2_buffer *src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
  734. struct tegra_m2m_buffer *bitstream = vb_to_tegra_buf(&src->vb2_buf);
  735. size_t bitstream_size = vb2_get_plane_payload(&src->vb2_buf, 0);
  736. struct tegra_vde_h264_decoder_ctx h264;
  737. struct tegra_vde *vde = ctx->vde;
  738. int err;
  739. err = tegra_vde_h264_setup_context(ctx, &h264);
  740. if (err)
  741. return err;
  742. err = tegra_vde_decode_begin(vde, &h264, vde->frames,
  743. bitstream->dma_addr[0],
  744. bitstream_size);
  745. if (err)
  746. return err;
  747. return 0;
  748. }
  749. int tegra_vde_h264_decode_wait(struct tegra_ctx *ctx)
  750. {
  751. return tegra_vde_decode_end(ctx->vde);
  752. }