unicode.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * linux/fs/hfsplus/unicode.c
  4. *
  5. * Copyright (C) 2001
  6. * Brad Boyer (flar@allandria.com)
  7. * (C) 2003 Ardis Technologies <roman@ardistech.com>
  8. *
  9. * Handler routines for unicode strings
  10. */
  11. #include <linux/types.h>
  12. #include <linux/nls.h>
  13. #include <kunit/visibility.h>
  14. #include "hfsplus_fs.h"
  15. #include "hfsplus_raw.h"
  16. /* Fold the case of a unicode char, given the 16 bit value */
  17. /* Returns folded char, or 0 if ignorable */
  18. static inline u16 case_fold(u16 c)
  19. {
  20. u16 tmp;
  21. tmp = hfsplus_case_fold_table[c >> 8];
  22. if (tmp)
  23. tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
  24. else
  25. tmp = c;
  26. return tmp;
  27. }
  28. /* Compare unicode strings, return values like normal strcmp */
  29. int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
  30. const struct hfsplus_unistr *s2)
  31. {
  32. u16 len1, len2, c1, c2;
  33. const hfsplus_unichr *p1, *p2;
  34. len1 = be16_to_cpu(s1->length);
  35. len2 = be16_to_cpu(s2->length);
  36. p1 = s1->unicode;
  37. p2 = s2->unicode;
  38. if (len1 > HFSPLUS_MAX_STRLEN) {
  39. len1 = HFSPLUS_MAX_STRLEN;
  40. pr_err("invalid length %u has been corrected to %d\n",
  41. be16_to_cpu(s1->length), len1);
  42. }
  43. if (len2 > HFSPLUS_MAX_STRLEN) {
  44. len2 = HFSPLUS_MAX_STRLEN;
  45. pr_err("invalid length %u has been corrected to %d\n",
  46. be16_to_cpu(s2->length), len2);
  47. }
  48. while (1) {
  49. c1 = c2 = 0;
  50. while (len1 && !c1) {
  51. c1 = case_fold(be16_to_cpu(*p1));
  52. p1++;
  53. len1--;
  54. }
  55. while (len2 && !c2) {
  56. c2 = case_fold(be16_to_cpu(*p2));
  57. p2++;
  58. len2--;
  59. }
  60. if (c1 != c2)
  61. return (c1 < c2) ? -1 : 1;
  62. if (!c1 && !c2)
  63. return 0;
  64. }
  65. }
  66. EXPORT_SYMBOL_IF_KUNIT(hfsplus_strcasecmp);
  67. /* Compare names as a sequence of 16-bit unsigned integers */
  68. int hfsplus_strcmp(const struct hfsplus_unistr *s1,
  69. const struct hfsplus_unistr *s2)
  70. {
  71. u16 len1, len2, c1, c2;
  72. const hfsplus_unichr *p1, *p2;
  73. int len;
  74. len1 = be16_to_cpu(s1->length);
  75. len2 = be16_to_cpu(s2->length);
  76. p1 = s1->unicode;
  77. p2 = s2->unicode;
  78. if (len1 > HFSPLUS_MAX_STRLEN) {
  79. len1 = HFSPLUS_MAX_STRLEN;
  80. pr_err("invalid length %u has been corrected to %d\n",
  81. be16_to_cpu(s1->length), len1);
  82. }
  83. if (len2 > HFSPLUS_MAX_STRLEN) {
  84. len2 = HFSPLUS_MAX_STRLEN;
  85. pr_err("invalid length %u has been corrected to %d\n",
  86. be16_to_cpu(s2->length), len2);
  87. }
  88. for (len = min(len1, len2); len > 0; len--) {
  89. c1 = be16_to_cpu(*p1);
  90. c2 = be16_to_cpu(*p2);
  91. if (c1 != c2)
  92. return c1 < c2 ? -1 : 1;
  93. p1++;
  94. p2++;
  95. }
  96. return len1 < len2 ? -1 :
  97. len1 > len2 ? 1 : 0;
  98. }
  99. EXPORT_SYMBOL_IF_KUNIT(hfsplus_strcmp);
  100. #define Hangul_SBase 0xac00
  101. #define Hangul_LBase 0x1100
  102. #define Hangul_VBase 0x1161
  103. #define Hangul_TBase 0x11a7
  104. #define Hangul_SCount 11172
  105. #define Hangul_LCount 19
  106. #define Hangul_VCount 21
  107. #define Hangul_TCount 28
  108. #define Hangul_NCount (Hangul_VCount * Hangul_TCount)
  109. static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
  110. {
  111. int i, s, e;
  112. s = 1;
  113. e = p[1];
  114. if (!e || cc < p[s * 2] || cc > p[e * 2])
  115. return NULL;
  116. do {
  117. i = (s + e) / 2;
  118. if (cc > p[i * 2])
  119. s = i + 1;
  120. else if (cc < p[i * 2])
  121. e = i - 1;
  122. else
  123. return hfsplus_compose_table + p[i * 2 + 1];
  124. } while (s <= e);
  125. return NULL;
  126. }
  127. static int hfsplus_uni2asc(struct super_block *sb,
  128. const struct hfsplus_unistr *ustr,
  129. int max_len, char *astr, int *len_p)
  130. {
  131. const hfsplus_unichr *ip;
  132. struct nls_table *nls = HFSPLUS_SB(sb)->nls;
  133. u8 *op;
  134. u16 cc, c0, c1;
  135. u16 *ce1, *ce2;
  136. int i, len, ustrlen, res, compose;
  137. op = astr;
  138. ip = ustr->unicode;
  139. ustrlen = be16_to_cpu(ustr->length);
  140. if (ustrlen > max_len) {
  141. ustrlen = max_len;
  142. pr_err("invalid length %u has been corrected to %d\n",
  143. be16_to_cpu(ustr->length), ustrlen);
  144. }
  145. len = *len_p;
  146. ce1 = NULL;
  147. compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
  148. while (ustrlen > 0) {
  149. c0 = be16_to_cpu(*ip++);
  150. ustrlen--;
  151. /* search for single decomposed char */
  152. if (likely(compose))
  153. ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
  154. if (ce1)
  155. cc = ce1[0];
  156. else
  157. cc = 0;
  158. if (cc) {
  159. /* start of a possibly decomposed Hangul char */
  160. if (cc != 0xffff)
  161. goto done;
  162. if (!ustrlen)
  163. goto same;
  164. c1 = be16_to_cpu(*ip) - Hangul_VBase;
  165. if (c1 < Hangul_VCount) {
  166. /* compose the Hangul char */
  167. cc = (c0 - Hangul_LBase) * Hangul_VCount;
  168. cc = (cc + c1) * Hangul_TCount;
  169. cc += Hangul_SBase;
  170. ip++;
  171. ustrlen--;
  172. if (!ustrlen)
  173. goto done;
  174. c1 = be16_to_cpu(*ip) - Hangul_TBase;
  175. if (c1 > 0 && c1 < Hangul_TCount) {
  176. cc += c1;
  177. ip++;
  178. ustrlen--;
  179. }
  180. goto done;
  181. }
  182. }
  183. while (1) {
  184. /* main loop for common case of not composed chars */
  185. if (!ustrlen)
  186. goto same;
  187. c1 = be16_to_cpu(*ip);
  188. if (likely(compose))
  189. ce1 = hfsplus_compose_lookup(
  190. hfsplus_compose_table, c1);
  191. if (ce1)
  192. break;
  193. switch (c0) {
  194. case 0:
  195. c0 = 0x2400;
  196. break;
  197. case '/':
  198. c0 = ':';
  199. break;
  200. }
  201. res = nls->uni2char(c0, op, len);
  202. if (res < 0) {
  203. if (res == -ENAMETOOLONG)
  204. goto out;
  205. *op = '?';
  206. res = 1;
  207. }
  208. op += res;
  209. len -= res;
  210. c0 = c1;
  211. ip++;
  212. ustrlen--;
  213. }
  214. ce2 = hfsplus_compose_lookup(ce1, c0);
  215. if (ce2) {
  216. i = 1;
  217. while (i < ustrlen) {
  218. ce1 = hfsplus_compose_lookup(ce2,
  219. be16_to_cpu(ip[i]));
  220. if (!ce1)
  221. break;
  222. i++;
  223. ce2 = ce1;
  224. }
  225. cc = ce2[0];
  226. if (cc) {
  227. ip += i;
  228. ustrlen -= i;
  229. goto done;
  230. }
  231. }
  232. same:
  233. switch (c0) {
  234. case 0:
  235. cc = 0x2400;
  236. break;
  237. case '/':
  238. cc = ':';
  239. break;
  240. default:
  241. cc = c0;
  242. }
  243. done:
  244. res = nls->uni2char(cc, op, len);
  245. if (res < 0) {
  246. if (res == -ENAMETOOLONG)
  247. goto out;
  248. *op = '?';
  249. res = 1;
  250. }
  251. op += res;
  252. len -= res;
  253. }
  254. res = 0;
  255. out:
  256. *len_p = (char *)op - astr;
  257. return res;
  258. }
  259. inline int hfsplus_uni2asc_str(struct super_block *sb,
  260. const struct hfsplus_unistr *ustr, char *astr,
  261. int *len_p)
  262. {
  263. return hfsplus_uni2asc(sb, ustr, HFSPLUS_MAX_STRLEN, astr, len_p);
  264. }
  265. EXPORT_SYMBOL_IF_KUNIT(hfsplus_uni2asc_str);
  266. inline int hfsplus_uni2asc_xattr_str(struct super_block *sb,
  267. const struct hfsplus_attr_unistr *ustr,
  268. char *astr, int *len_p)
  269. {
  270. return hfsplus_uni2asc(sb, (const struct hfsplus_unistr *)ustr,
  271. HFSPLUS_ATTR_MAX_STRLEN, astr, len_p);
  272. }
  273. EXPORT_SYMBOL_IF_KUNIT(hfsplus_uni2asc_xattr_str);
  274. /*
  275. * Convert one or more ASCII characters into a single unicode character.
  276. * Returns the number of ASCII characters corresponding to the unicode char.
  277. */
  278. static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
  279. wchar_t *uc)
  280. {
  281. int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
  282. if (size <= 0) {
  283. *uc = '?';
  284. size = 1;
  285. }
  286. switch (*uc) {
  287. case 0x2400:
  288. *uc = 0;
  289. break;
  290. case ':':
  291. *uc = '/';
  292. break;
  293. }
  294. return size;
  295. }
  296. /* Decomposes a non-Hangul unicode character. */
  297. static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
  298. {
  299. int off;
  300. off = hfsplus_decompose_table[(uc >> 12) & 0xf];
  301. if (off == 0 || off == 0xffff)
  302. return NULL;
  303. off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
  304. if (!off)
  305. return NULL;
  306. off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
  307. if (!off)
  308. return NULL;
  309. off = hfsplus_decompose_table[off + (uc & 0xf)];
  310. *size = off & 3;
  311. if (*size == 0)
  312. return NULL;
  313. return hfsplus_decompose_table + (off / 4);
  314. }
  315. /*
  316. * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
  317. * precomposed Hangul, otherwise return the length of the decomposition.
  318. *
  319. * This function was adapted from sample code from the Unicode Standard
  320. * Annex #15: Unicode Normalization Forms, version 3.2.0.
  321. *
  322. * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed
  323. * under the Terms of Use in http://www.unicode.org/copyright.html.
  324. */
  325. static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
  326. {
  327. int index;
  328. int l, v, t;
  329. index = uc - Hangul_SBase;
  330. if (index < 0 || index >= Hangul_SCount)
  331. return 0;
  332. l = Hangul_LBase + index / Hangul_NCount;
  333. v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
  334. t = Hangul_TBase + index % Hangul_TCount;
  335. result[0] = l;
  336. result[1] = v;
  337. if (t != Hangul_TBase) {
  338. result[2] = t;
  339. return 3;
  340. }
  341. return 2;
  342. }
  343. /* Decomposes a single unicode character. */
  344. static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
  345. {
  346. u16 *result;
  347. /* Hangul is handled separately */
  348. result = hangul_buffer;
  349. *size = hfsplus_try_decompose_hangul(uc, result);
  350. if (*size == 0)
  351. result = hfsplus_decompose_nonhangul(uc, size);
  352. return result;
  353. }
  354. int hfsplus_asc2uni(struct super_block *sb,
  355. struct hfsplus_unistr *ustr, int max_unistr_len,
  356. const char *astr, int len)
  357. {
  358. int size, dsize, decompose;
  359. u16 *dstr, outlen = 0;
  360. wchar_t c;
  361. u16 dhangul[3];
  362. decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
  363. while (outlen < max_unistr_len && len > 0) {
  364. size = asc2unichar(sb, astr, len, &c);
  365. if (decompose)
  366. dstr = decompose_unichar(c, &dsize, dhangul);
  367. else
  368. dstr = NULL;
  369. if (dstr) {
  370. if (outlen + dsize > max_unistr_len)
  371. break;
  372. do {
  373. ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
  374. } while (--dsize > 0);
  375. } else
  376. ustr->unicode[outlen++] = cpu_to_be16(c);
  377. astr += size;
  378. len -= size;
  379. }
  380. ustr->length = cpu_to_be16(outlen);
  381. if (len > 0)
  382. return -ENAMETOOLONG;
  383. return 0;
  384. }
  385. EXPORT_SYMBOL_IF_KUNIT(hfsplus_asc2uni);
  386. /*
  387. * Hash a string to an integer as appropriate for the HFS+ filesystem.
  388. * Composed unicode characters are decomposed and case-folding is performed
  389. * if the appropriate bits are (un)set on the superblock.
  390. */
  391. int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
  392. {
  393. struct super_block *sb = dentry->d_sb;
  394. const char *astr;
  395. const u16 *dstr;
  396. int casefold, decompose, size, len;
  397. unsigned long hash;
  398. wchar_t c;
  399. u16 c2;
  400. u16 dhangul[3];
  401. casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
  402. decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
  403. hash = init_name_hash(dentry);
  404. astr = str->name;
  405. len = str->len;
  406. while (len > 0) {
  407. int dsize;
  408. size = asc2unichar(sb, astr, len, &c);
  409. astr += size;
  410. len -= size;
  411. if (decompose)
  412. dstr = decompose_unichar(c, &dsize, dhangul);
  413. else
  414. dstr = NULL;
  415. if (dstr) {
  416. do {
  417. c2 = *dstr++;
  418. if (casefold)
  419. c2 = case_fold(c2);
  420. if (!casefold || c2)
  421. hash = partial_name_hash(c2, hash);
  422. } while (--dsize > 0);
  423. } else {
  424. c2 = c;
  425. if (casefold)
  426. c2 = case_fold(c2);
  427. if (!casefold || c2)
  428. hash = partial_name_hash(c2, hash);
  429. }
  430. }
  431. str->hash = end_name_hash(hash);
  432. return 0;
  433. }
  434. EXPORT_SYMBOL_IF_KUNIT(hfsplus_hash_dentry);
  435. /*
  436. * Compare strings with HFS+ filename ordering.
  437. * Composed unicode characters are decomposed and case-folding is performed
  438. * if the appropriate bits are (un)set on the superblock.
  439. */
  440. int hfsplus_compare_dentry(const struct dentry *dentry,
  441. unsigned int len, const char *str, const struct qstr *name)
  442. {
  443. struct super_block *sb = dentry->d_sb;
  444. int casefold, decompose, size;
  445. int dsize1, dsize2, len1, len2;
  446. const u16 *dstr1, *dstr2;
  447. const char *astr1, *astr2;
  448. u16 c1, c2;
  449. wchar_t c;
  450. u16 dhangul_1[3], dhangul_2[3];
  451. casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
  452. decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
  453. astr1 = str;
  454. len1 = len;
  455. astr2 = name->name;
  456. len2 = name->len;
  457. dsize1 = dsize2 = 0;
  458. dstr1 = dstr2 = NULL;
  459. while (len1 > 0 && len2 > 0) {
  460. if (!dsize1) {
  461. size = asc2unichar(sb, astr1, len1, &c);
  462. astr1 += size;
  463. len1 -= size;
  464. if (decompose)
  465. dstr1 = decompose_unichar(c, &dsize1,
  466. dhangul_1);
  467. if (!decompose || !dstr1) {
  468. c1 = c;
  469. dstr1 = &c1;
  470. dsize1 = 1;
  471. }
  472. }
  473. if (!dsize2) {
  474. size = asc2unichar(sb, astr2, len2, &c);
  475. astr2 += size;
  476. len2 -= size;
  477. if (decompose)
  478. dstr2 = decompose_unichar(c, &dsize2,
  479. dhangul_2);
  480. if (!decompose || !dstr2) {
  481. c2 = c;
  482. dstr2 = &c2;
  483. dsize2 = 1;
  484. }
  485. }
  486. c1 = *dstr1;
  487. c2 = *dstr2;
  488. if (casefold) {
  489. c1 = case_fold(c1);
  490. if (!c1) {
  491. dstr1++;
  492. dsize1--;
  493. continue;
  494. }
  495. c2 = case_fold(c2);
  496. if (!c2) {
  497. dstr2++;
  498. dsize2--;
  499. continue;
  500. }
  501. }
  502. if (c1 < c2)
  503. return -1;
  504. else if (c1 > c2)
  505. return 1;
  506. dstr1++;
  507. dsize1--;
  508. dstr2++;
  509. dsize2--;
  510. }
  511. if (len1 < len2)
  512. return -1;
  513. if (len1 > len2)
  514. return 1;
  515. return 0;
  516. }
  517. EXPORT_SYMBOL_IF_KUNIT(hfsplus_compare_dentry);