xz_dec_bcj.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642
  1. // SPDX-License-Identifier: 0BSD
  2. /*
  3. * Branch/Call/Jump (BCJ) filter decoders
  4. *
  5. * Authors: Lasse Collin <lasse.collin@tukaani.org>
  6. * Igor Pavlov <https://7-zip.org/>
  7. */
  8. #include "xz_private.h"
  9. /*
  10. * The rest of the file is inside this ifdef. It makes things a little more
  11. * convenient when building without support for any BCJ filters.
  12. */
  13. #ifdef XZ_DEC_BCJ
  14. struct xz_dec_bcj {
  15. /* Type of the BCJ filter being used */
  16. enum {
  17. BCJ_X86 = 4, /* x86 or x86-64 */
  18. BCJ_POWERPC = 5, /* Big endian only */
  19. BCJ_ARM = 7, /* Little endian only */
  20. BCJ_ARMTHUMB = 8, /* Little endian only */
  21. BCJ_SPARC = 9, /* Big or little endian */
  22. BCJ_ARM64 = 10, /* AArch64 */
  23. BCJ_RISCV = 11 /* RV32GQC_Zfh, RV64GQC_Zfh */
  24. } type;
  25. /*
  26. * Return value of the next filter in the chain. We need to preserve
  27. * this information across calls, because we must not call the next
  28. * filter anymore once it has returned XZ_STREAM_END.
  29. */
  30. enum xz_ret ret;
  31. /* True if we are operating in single-call mode. */
  32. bool single_call;
  33. /*
  34. * Absolute position relative to the beginning of the uncompressed
  35. * data (in a single .xz Block). We care only about the lowest 32
  36. * bits so this doesn't need to be uint64_t even with big files.
  37. */
  38. uint32_t pos;
  39. /* x86 filter state */
  40. uint32_t x86_prev_mask;
  41. /* Temporary space to hold the variables from struct xz_buf */
  42. uint8_t *out;
  43. size_t out_pos;
  44. size_t out_size;
  45. struct {
  46. /* Amount of already filtered data in the beginning of buf */
  47. size_t filtered;
  48. /* Total amount of data currently stored in buf */
  49. size_t size;
  50. /*
  51. * Buffer to hold a mix of filtered and unfiltered data. This
  52. * needs to be big enough to hold Alignment + 2 * Look-ahead:
  53. *
  54. * Type Alignment Look-ahead
  55. * x86 1 4
  56. * PowerPC 4 0
  57. * IA-64 16 0
  58. * ARM 4 0
  59. * ARM-Thumb 2 2
  60. * SPARC 4 0
  61. */
  62. uint8_t buf[16];
  63. } temp;
  64. };
  65. #ifdef XZ_DEC_X86
  66. /*
  67. * This is used to test the most significant byte of a memory address
  68. * in an x86 instruction.
  69. */
  70. static inline int bcj_x86_test_msbyte(uint8_t b)
  71. {
  72. return b == 0x00 || b == 0xFF;
  73. }
  74. static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  75. {
  76. static const bool mask_to_allowed_status[8]
  77. = { true, true, true, false, true, false, false, false };
  78. static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
  79. size_t i;
  80. size_t prev_pos = (size_t)-1;
  81. uint32_t prev_mask = s->x86_prev_mask;
  82. uint32_t src;
  83. uint32_t dest;
  84. uint32_t j;
  85. uint8_t b;
  86. if (size <= 4)
  87. return 0;
  88. size -= 4;
  89. for (i = 0; i < size; ++i) {
  90. if ((buf[i] & 0xFE) != 0xE8)
  91. continue;
  92. prev_pos = i - prev_pos;
  93. if (prev_pos > 3) {
  94. prev_mask = 0;
  95. } else {
  96. prev_mask = (prev_mask << (prev_pos - 1)) & 7;
  97. if (prev_mask != 0) {
  98. b = buf[i + 4 - mask_to_bit_num[prev_mask]];
  99. if (!mask_to_allowed_status[prev_mask]
  100. || bcj_x86_test_msbyte(b)) {
  101. prev_pos = i;
  102. prev_mask = (prev_mask << 1) | 1;
  103. continue;
  104. }
  105. }
  106. }
  107. prev_pos = i;
  108. if (bcj_x86_test_msbyte(buf[i + 4])) {
  109. src = get_unaligned_le32(buf + i + 1);
  110. while (true) {
  111. dest = src - (s->pos + (uint32_t)i + 5);
  112. if (prev_mask == 0)
  113. break;
  114. j = mask_to_bit_num[prev_mask] * 8;
  115. b = (uint8_t)(dest >> (24 - j));
  116. if (!bcj_x86_test_msbyte(b))
  117. break;
  118. src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
  119. }
  120. dest &= 0x01FFFFFF;
  121. dest |= (uint32_t)0 - (dest & 0x01000000);
  122. put_unaligned_le32(dest, buf + i + 1);
  123. i += 4;
  124. } else {
  125. prev_mask = (prev_mask << 1) | 1;
  126. }
  127. }
  128. prev_pos = i - prev_pos;
  129. s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
  130. return i;
  131. }
  132. #endif
  133. #ifdef XZ_DEC_POWERPC
  134. static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  135. {
  136. size_t i;
  137. uint32_t instr;
  138. size &= ~(size_t)3;
  139. for (i = 0; i < size; i += 4) {
  140. instr = get_unaligned_be32(buf + i);
  141. if ((instr & 0xFC000003) == 0x48000001) {
  142. instr &= 0x03FFFFFC;
  143. instr -= s->pos + (uint32_t)i;
  144. instr &= 0x03FFFFFC;
  145. instr |= 0x48000001;
  146. put_unaligned_be32(instr, buf + i);
  147. }
  148. }
  149. return i;
  150. }
  151. #endif
  152. #ifdef XZ_DEC_ARM
  153. static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  154. {
  155. size_t i;
  156. uint32_t addr;
  157. size &= ~(size_t)3;
  158. for (i = 0; i < size; i += 4) {
  159. if (buf[i + 3] == 0xEB) {
  160. addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
  161. | ((uint32_t)buf[i + 2] << 16);
  162. addr <<= 2;
  163. addr -= s->pos + (uint32_t)i + 8;
  164. addr >>= 2;
  165. buf[i] = (uint8_t)addr;
  166. buf[i + 1] = (uint8_t)(addr >> 8);
  167. buf[i + 2] = (uint8_t)(addr >> 16);
  168. }
  169. }
  170. return i;
  171. }
  172. #endif
  173. #ifdef XZ_DEC_ARMTHUMB
  174. static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  175. {
  176. size_t i;
  177. uint32_t addr;
  178. if (size < 4)
  179. return 0;
  180. size -= 4;
  181. for (i = 0; i <= size; i += 2) {
  182. if ((buf[i + 1] & 0xF8) == 0xF0
  183. && (buf[i + 3] & 0xF8) == 0xF8) {
  184. addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
  185. | ((uint32_t)buf[i] << 11)
  186. | (((uint32_t)buf[i + 3] & 0x07) << 8)
  187. | (uint32_t)buf[i + 2];
  188. addr <<= 1;
  189. addr -= s->pos + (uint32_t)i + 4;
  190. addr >>= 1;
  191. buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
  192. buf[i] = (uint8_t)(addr >> 11);
  193. buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
  194. buf[i + 2] = (uint8_t)addr;
  195. i += 2;
  196. }
  197. }
  198. return i;
  199. }
  200. #endif
  201. #ifdef XZ_DEC_SPARC
  202. static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  203. {
  204. size_t i;
  205. uint32_t instr;
  206. size &= ~(size_t)3;
  207. for (i = 0; i < size; i += 4) {
  208. instr = get_unaligned_be32(buf + i);
  209. if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
  210. instr <<= 2;
  211. instr -= s->pos + (uint32_t)i;
  212. instr >>= 2;
  213. instr = ((uint32_t)0x40000000 - (instr & 0x400000))
  214. | 0x40000000 | (instr & 0x3FFFFF);
  215. put_unaligned_be32(instr, buf + i);
  216. }
  217. }
  218. return i;
  219. }
  220. #endif
  221. #ifdef XZ_DEC_ARM64
  222. static size_t bcj_arm64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  223. {
  224. size_t i;
  225. uint32_t instr;
  226. uint32_t addr;
  227. size &= ~(size_t)3;
  228. for (i = 0; i < size; i += 4) {
  229. instr = get_unaligned_le32(buf + i);
  230. if ((instr >> 26) == 0x25) {
  231. /* BL instruction */
  232. addr = instr - ((s->pos + (uint32_t)i) >> 2);
  233. instr = 0x94000000 | (addr & 0x03FFFFFF);
  234. put_unaligned_le32(instr, buf + i);
  235. } else if ((instr & 0x9F000000) == 0x90000000) {
  236. /* ADRP instruction */
  237. addr = ((instr >> 29) & 3) | ((instr >> 3) & 0x1FFFFC);
  238. /* Only convert values in the range +/-512 MiB. */
  239. if ((addr + 0x020000) & 0x1C0000)
  240. continue;
  241. addr -= (s->pos + (uint32_t)i) >> 12;
  242. instr &= 0x9000001F;
  243. instr |= (addr & 3) << 29;
  244. instr |= (addr & 0x03FFFC) << 3;
  245. instr |= (0U - (addr & 0x020000)) & 0xE00000;
  246. put_unaligned_le32(instr, buf + i);
  247. }
  248. }
  249. return i;
  250. }
  251. #endif
  252. #ifdef XZ_DEC_RISCV
  253. static size_t bcj_riscv(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  254. {
  255. size_t i;
  256. uint32_t b1;
  257. uint32_t b2;
  258. uint32_t b3;
  259. uint32_t instr;
  260. uint32_t instr2;
  261. uint32_t instr2_rs1;
  262. uint32_t addr;
  263. if (size < 8)
  264. return 0;
  265. size -= 8;
  266. for (i = 0; i <= size; i += 2) {
  267. instr = buf[i];
  268. if (instr == 0xEF) {
  269. /* JAL */
  270. b1 = buf[i + 1];
  271. if ((b1 & 0x0D) != 0)
  272. continue;
  273. b2 = buf[i + 2];
  274. b3 = buf[i + 3];
  275. addr = ((b1 & 0xF0) << 13) | (b2 << 9) | (b3 << 1);
  276. addr -= s->pos + (uint32_t)i;
  277. buf[i + 1] = (uint8_t)((b1 & 0x0F)
  278. | ((addr >> 8) & 0xF0));
  279. buf[i + 2] = (uint8_t)(((addr >> 16) & 0x0F)
  280. | ((addr >> 7) & 0x10)
  281. | ((addr << 4) & 0xE0));
  282. buf[i + 3] = (uint8_t)(((addr >> 4) & 0x7F)
  283. | ((addr >> 13) & 0x80));
  284. i += 4 - 2;
  285. } else if ((instr & 0x7F) == 0x17) {
  286. /* AUIPC */
  287. instr |= (uint32_t)buf[i + 1] << 8;
  288. instr |= (uint32_t)buf[i + 2] << 16;
  289. instr |= (uint32_t)buf[i + 3] << 24;
  290. if (instr & 0xE80) {
  291. /* AUIPC's rd doesn't equal x0 or x2. */
  292. instr2 = get_unaligned_le32(buf + i + 4);
  293. if (((instr << 8) ^ (instr2 - 3)) & 0xF8003) {
  294. i += 6 - 2;
  295. continue;
  296. }
  297. addr = (instr & 0xFFFFF000) + (instr2 >> 20);
  298. instr = 0x17 | (2 << 7) | (instr2 << 12);
  299. instr2 = addr;
  300. } else {
  301. /* AUIPC's rd equals x0 or x2. */
  302. instr2_rs1 = instr >> 27;
  303. if ((uint32_t)((instr - 0x3117) << 18)
  304. >= (instr2_rs1 & 0x1D)) {
  305. i += 4 - 2;
  306. continue;
  307. }
  308. addr = get_unaligned_be32(buf + i + 4);
  309. addr -= s->pos + (uint32_t)i;
  310. instr2 = (instr >> 12) | (addr << 20);
  311. instr = 0x17 | (instr2_rs1 << 7)
  312. | ((addr + 0x800) & 0xFFFFF000);
  313. }
  314. put_unaligned_le32(instr, buf + i);
  315. put_unaligned_le32(instr2, buf + i + 4);
  316. i += 8 - 2;
  317. }
  318. }
  319. return i;
  320. }
  321. #endif
  322. /*
  323. * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
  324. * of data that got filtered.
  325. *
  326. * NOTE: This is implemented as a switch statement to avoid using function
  327. * pointers, which could be problematic in the kernel boot code, which must
  328. * avoid pointers to static data (at least on x86).
  329. */
  330. static void bcj_apply(struct xz_dec_bcj *s,
  331. uint8_t *buf, size_t *pos, size_t size)
  332. {
  333. size_t filtered;
  334. buf += *pos;
  335. size -= *pos;
  336. switch (s->type) {
  337. #ifdef XZ_DEC_X86
  338. case BCJ_X86:
  339. filtered = bcj_x86(s, buf, size);
  340. break;
  341. #endif
  342. #ifdef XZ_DEC_POWERPC
  343. case BCJ_POWERPC:
  344. filtered = bcj_powerpc(s, buf, size);
  345. break;
  346. #endif
  347. #ifdef XZ_DEC_ARM
  348. case BCJ_ARM:
  349. filtered = bcj_arm(s, buf, size);
  350. break;
  351. #endif
  352. #ifdef XZ_DEC_ARMTHUMB
  353. case BCJ_ARMTHUMB:
  354. filtered = bcj_armthumb(s, buf, size);
  355. break;
  356. #endif
  357. #ifdef XZ_DEC_SPARC
  358. case BCJ_SPARC:
  359. filtered = bcj_sparc(s, buf, size);
  360. break;
  361. #endif
  362. #ifdef XZ_DEC_ARM64
  363. case BCJ_ARM64:
  364. filtered = bcj_arm64(s, buf, size);
  365. break;
  366. #endif
  367. #ifdef XZ_DEC_RISCV
  368. case BCJ_RISCV:
  369. filtered = bcj_riscv(s, buf, size);
  370. break;
  371. #endif
  372. default:
  373. /* Never reached but silence compiler warnings. */
  374. filtered = 0;
  375. break;
  376. }
  377. *pos += filtered;
  378. s->pos += filtered;
  379. }
  380. /*
  381. * Flush pending filtered data from temp to the output buffer.
  382. * Move the remaining mixture of possibly filtered and unfiltered
  383. * data to the beginning of temp.
  384. */
  385. static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
  386. {
  387. size_t copy_size;
  388. copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
  389. memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
  390. b->out_pos += copy_size;
  391. s->temp.filtered -= copy_size;
  392. s->temp.size -= copy_size;
  393. memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
  394. }
  395. /*
  396. * The BCJ filter functions are primitive in sense that they process the
  397. * data in chunks of 1-16 bytes. To hide this issue, this function does
  398. * some buffering.
  399. */
  400. enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, struct xz_dec_lzma2 *lzma2,
  401. struct xz_buf *b)
  402. {
  403. size_t out_start;
  404. /*
  405. * Flush pending already filtered data to the output buffer. Return
  406. * immediately if we couldn't flush everything, or if the next
  407. * filter in the chain had already returned XZ_STREAM_END.
  408. */
  409. if (s->temp.filtered > 0) {
  410. bcj_flush(s, b);
  411. if (s->temp.filtered > 0)
  412. return XZ_OK;
  413. if (s->ret == XZ_STREAM_END)
  414. return XZ_STREAM_END;
  415. }
  416. /*
  417. * If we have more output space than what is currently pending in
  418. * temp, copy the unfiltered data from temp to the output buffer
  419. * and try to fill the output buffer by decoding more data from the
  420. * next filter in the chain. Apply the BCJ filter on the new data
  421. * in the output buffer. If everything cannot be filtered, copy it
  422. * to temp and rewind the output buffer position accordingly.
  423. *
  424. * This needs to be always run when temp.size == 0 to handle a special
  425. * case where the output buffer is full and the next filter has no
  426. * more output coming but hasn't returned XZ_STREAM_END yet.
  427. */
  428. if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
  429. out_start = b->out_pos;
  430. memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
  431. b->out_pos += s->temp.size;
  432. s->ret = xz_dec_lzma2_run(lzma2, b);
  433. if (s->ret != XZ_STREAM_END
  434. && (s->ret != XZ_OK || s->single_call))
  435. return s->ret;
  436. bcj_apply(s, b->out, &out_start, b->out_pos);
  437. /*
  438. * As an exception, if the next filter returned XZ_STREAM_END,
  439. * we can do that too, since the last few bytes that remain
  440. * unfiltered are meant to remain unfiltered.
  441. */
  442. if (s->ret == XZ_STREAM_END)
  443. return XZ_STREAM_END;
  444. s->temp.size = b->out_pos - out_start;
  445. b->out_pos -= s->temp.size;
  446. memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
  447. /*
  448. * If there wasn't enough input to the next filter to fill
  449. * the output buffer with unfiltered data, there's no point
  450. * to try decoding more data to temp.
  451. */
  452. if (b->out_pos + s->temp.size < b->out_size)
  453. return XZ_OK;
  454. }
  455. /*
  456. * We have unfiltered data in temp. If the output buffer isn't full
  457. * yet, try to fill the temp buffer by decoding more data from the
  458. * next filter. Apply the BCJ filter on temp. Then we hopefully can
  459. * fill the actual output buffer by copying filtered data from temp.
  460. * A mix of filtered and unfiltered data may be left in temp; it will
  461. * be taken care on the next call to this function.
  462. */
  463. if (b->out_pos < b->out_size) {
  464. /* Make b->out{,_pos,_size} temporarily point to s->temp. */
  465. s->out = b->out;
  466. s->out_pos = b->out_pos;
  467. s->out_size = b->out_size;
  468. b->out = s->temp.buf;
  469. b->out_pos = s->temp.size;
  470. b->out_size = sizeof(s->temp.buf);
  471. s->ret = xz_dec_lzma2_run(lzma2, b);
  472. s->temp.size = b->out_pos;
  473. b->out = s->out;
  474. b->out_pos = s->out_pos;
  475. b->out_size = s->out_size;
  476. if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
  477. return s->ret;
  478. bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
  479. /*
  480. * If the next filter returned XZ_STREAM_END, we mark that
  481. * everything is filtered, since the last unfiltered bytes
  482. * of the stream are meant to be left as is.
  483. */
  484. if (s->ret == XZ_STREAM_END)
  485. s->temp.filtered = s->temp.size;
  486. bcj_flush(s, b);
  487. if (s->temp.filtered > 0)
  488. return XZ_OK;
  489. }
  490. return s->ret;
  491. }
  492. struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
  493. {
  494. struct xz_dec_bcj *s = kmalloc_obj(*s);
  495. if (s != NULL)
  496. s->single_call = single_call;
  497. return s;
  498. }
  499. enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
  500. {
  501. switch (id) {
  502. #ifdef XZ_DEC_X86
  503. case BCJ_X86:
  504. #endif
  505. #ifdef XZ_DEC_POWERPC
  506. case BCJ_POWERPC:
  507. #endif
  508. #ifdef XZ_DEC_ARM
  509. case BCJ_ARM:
  510. #endif
  511. #ifdef XZ_DEC_ARMTHUMB
  512. case BCJ_ARMTHUMB:
  513. #endif
  514. #ifdef XZ_DEC_SPARC
  515. case BCJ_SPARC:
  516. #endif
  517. #ifdef XZ_DEC_ARM64
  518. case BCJ_ARM64:
  519. #endif
  520. #ifdef XZ_DEC_RISCV
  521. case BCJ_RISCV:
  522. #endif
  523. break;
  524. default:
  525. /* Unsupported Filter ID */
  526. return XZ_OPTIONS_ERROR;
  527. }
  528. s->type = id;
  529. s->ret = XZ_OK;
  530. s->pos = 0;
  531. s->x86_prev_mask = 0;
  532. s->temp.filtered = 0;
  533. s->temp.size = 0;
  534. return XZ_OK;
  535. }
  536. #endif