fcntl.c 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * linux/fs/fcntl.c
  4. *
  5. * Copyright (C) 1991, 1992 Linus Torvalds
  6. */
  7. #include <linux/syscalls.h>
  8. #include <linux/init.h>
  9. #include <linux/mm.h>
  10. #include <linux/sched/task.h>
  11. #include <linux/fs.h>
  12. #include <linux/filelock.h>
  13. #include <linux/file.h>
  14. #include <linux/capability.h>
  15. #include <linux/dnotify.h>
  16. #include <linux/slab.h>
  17. #include <linux/module.h>
  18. #include <linux/pipe_fs_i.h>
  19. #include <linux/security.h>
  20. #include <linux/ptrace.h>
  21. #include <linux/signal.h>
  22. #include <linux/rcupdate.h>
  23. #include <linux/pid_namespace.h>
  24. #include <linux/user_namespace.h>
  25. #include <linux/memfd.h>
  26. #include <linux/compat.h>
  27. #include <linux/mount.h>
  28. #include <linux/rw_hint.h>
  29. #include <linux/poll.h>
  30. #include <asm/siginfo.h>
  31. #include <linux/uaccess.h>
  32. #include "internal.h"
  33. #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
  34. static int setfl(int fd, struct file * filp, unsigned int arg)
  35. {
  36. struct inode * inode = file_inode(filp);
  37. int error = 0;
  38. /*
  39. * O_APPEND cannot be cleared if the file is marked as append-only
  40. * and the file is open for write.
  41. */
  42. if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
  43. return -EPERM;
  44. /* O_NOATIME can only be set by the owner or superuser */
  45. if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
  46. if (!inode_owner_or_capable(file_mnt_idmap(filp), inode))
  47. return -EPERM;
  48. /* required for strict SunOS emulation */
  49. if (O_NONBLOCK != O_NDELAY)
  50. if (arg & O_NDELAY)
  51. arg |= O_NONBLOCK;
  52. /* Pipe packetized mode is controlled by O_DIRECT flag */
  53. if (!S_ISFIFO(inode->i_mode) &&
  54. (arg & O_DIRECT) &&
  55. !(filp->f_mode & FMODE_CAN_ODIRECT))
  56. return -EINVAL;
  57. if (filp->f_op->check_flags)
  58. error = filp->f_op->check_flags(arg);
  59. if (error)
  60. return error;
  61. /*
  62. * ->fasync() is responsible for setting the FASYNC bit.
  63. */
  64. if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
  65. error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
  66. if (error < 0)
  67. goto out;
  68. if (error > 0)
  69. error = 0;
  70. }
  71. spin_lock(&filp->f_lock);
  72. filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
  73. filp->f_iocb_flags = iocb_flags(filp);
  74. spin_unlock(&filp->f_lock);
  75. out:
  76. return error;
  77. }
  78. /*
  79. * Allocate an file->f_owner struct if it doesn't exist, handling racing
  80. * allocations correctly.
  81. */
  82. int file_f_owner_allocate(struct file *file)
  83. {
  84. struct fown_struct *f_owner;
  85. f_owner = file_f_owner(file);
  86. if (f_owner)
  87. return 0;
  88. f_owner = kzalloc_obj(struct fown_struct);
  89. if (!f_owner)
  90. return -ENOMEM;
  91. rwlock_init(&f_owner->lock);
  92. f_owner->file = file;
  93. /* If someone else raced us, drop our allocation. */
  94. if (unlikely(cmpxchg(&file->f_owner, NULL, f_owner)))
  95. kfree(f_owner);
  96. return 0;
  97. }
  98. EXPORT_SYMBOL(file_f_owner_allocate);
  99. void file_f_owner_release(struct file *file)
  100. {
  101. struct fown_struct *f_owner;
  102. f_owner = file_f_owner(file);
  103. if (f_owner) {
  104. put_pid(f_owner->pid);
  105. kfree(f_owner);
  106. }
  107. }
  108. void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
  109. int force)
  110. {
  111. struct fown_struct *f_owner;
  112. f_owner = file_f_owner(filp);
  113. if (WARN_ON_ONCE(!f_owner))
  114. return;
  115. write_lock_irq(&f_owner->lock);
  116. if (force || !f_owner->pid) {
  117. put_pid(f_owner->pid);
  118. f_owner->pid = get_pid(pid);
  119. f_owner->pid_type = type;
  120. if (pid) {
  121. const struct cred *cred = current_cred();
  122. security_file_set_fowner(filp);
  123. f_owner->uid = cred->uid;
  124. f_owner->euid = cred->euid;
  125. }
  126. }
  127. write_unlock_irq(&f_owner->lock);
  128. }
  129. EXPORT_SYMBOL(__f_setown);
  130. int f_setown(struct file *filp, int who, int force)
  131. {
  132. enum pid_type type;
  133. struct pid *pid = NULL;
  134. int ret = 0;
  135. might_sleep();
  136. type = PIDTYPE_TGID;
  137. if (who < 0) {
  138. /* avoid overflow below */
  139. if (who == INT_MIN)
  140. return -EINVAL;
  141. type = PIDTYPE_PGID;
  142. who = -who;
  143. }
  144. ret = file_f_owner_allocate(filp);
  145. if (ret)
  146. return ret;
  147. rcu_read_lock();
  148. if (who) {
  149. pid = find_vpid(who);
  150. if (!pid)
  151. ret = -ESRCH;
  152. }
  153. if (!ret)
  154. __f_setown(filp, pid, type, force);
  155. rcu_read_unlock();
  156. return ret;
  157. }
  158. EXPORT_SYMBOL(f_setown);
  159. void f_delown(struct file *filp)
  160. {
  161. __f_setown(filp, NULL, PIDTYPE_TGID, 1);
  162. }
  163. pid_t f_getown(struct file *filp)
  164. {
  165. pid_t pid = 0;
  166. struct fown_struct *f_owner;
  167. f_owner = file_f_owner(filp);
  168. if (!f_owner)
  169. return pid;
  170. read_lock_irq(&f_owner->lock);
  171. rcu_read_lock();
  172. if (pid_task(f_owner->pid, f_owner->pid_type)) {
  173. pid = pid_vnr(f_owner->pid);
  174. if (f_owner->pid_type == PIDTYPE_PGID)
  175. pid = -pid;
  176. }
  177. rcu_read_unlock();
  178. read_unlock_irq(&f_owner->lock);
  179. return pid;
  180. }
  181. static int f_setown_ex(struct file *filp, unsigned long arg)
  182. {
  183. struct f_owner_ex __user *owner_p = (void __user *)arg;
  184. struct f_owner_ex owner;
  185. struct pid *pid;
  186. int type;
  187. int ret;
  188. ret = copy_from_user(&owner, owner_p, sizeof(owner));
  189. if (ret)
  190. return -EFAULT;
  191. switch (owner.type) {
  192. case F_OWNER_TID:
  193. type = PIDTYPE_PID;
  194. break;
  195. case F_OWNER_PID:
  196. type = PIDTYPE_TGID;
  197. break;
  198. case F_OWNER_PGRP:
  199. type = PIDTYPE_PGID;
  200. break;
  201. default:
  202. return -EINVAL;
  203. }
  204. ret = file_f_owner_allocate(filp);
  205. if (ret)
  206. return ret;
  207. rcu_read_lock();
  208. pid = find_vpid(owner.pid);
  209. if (owner.pid && !pid)
  210. ret = -ESRCH;
  211. else
  212. __f_setown(filp, pid, type, 1);
  213. rcu_read_unlock();
  214. return ret;
  215. }
  216. static int f_getown_ex(struct file *filp, unsigned long arg)
  217. {
  218. struct f_owner_ex __user *owner_p = (void __user *)arg;
  219. struct f_owner_ex owner = {};
  220. int ret = 0;
  221. struct fown_struct *f_owner;
  222. enum pid_type pid_type = PIDTYPE_PID;
  223. f_owner = file_f_owner(filp);
  224. if (f_owner) {
  225. read_lock_irq(&f_owner->lock);
  226. rcu_read_lock();
  227. if (pid_task(f_owner->pid, f_owner->pid_type))
  228. owner.pid = pid_vnr(f_owner->pid);
  229. rcu_read_unlock();
  230. pid_type = f_owner->pid_type;
  231. }
  232. switch (pid_type) {
  233. case PIDTYPE_PID:
  234. owner.type = F_OWNER_TID;
  235. break;
  236. case PIDTYPE_TGID:
  237. owner.type = F_OWNER_PID;
  238. break;
  239. case PIDTYPE_PGID:
  240. owner.type = F_OWNER_PGRP;
  241. break;
  242. default:
  243. WARN_ON(1);
  244. ret = -EINVAL;
  245. break;
  246. }
  247. if (f_owner)
  248. read_unlock_irq(&f_owner->lock);
  249. if (!ret) {
  250. ret = copy_to_user(owner_p, &owner, sizeof(owner));
  251. if (ret)
  252. ret = -EFAULT;
  253. }
  254. return ret;
  255. }
  256. #ifdef CONFIG_CHECKPOINT_RESTORE
  257. static int f_getowner_uids(struct file *filp, unsigned long arg)
  258. {
  259. struct user_namespace *user_ns = current_user_ns();
  260. struct fown_struct *f_owner;
  261. uid_t __user *dst = (void __user *)arg;
  262. uid_t src[2] = {0, 0};
  263. int err;
  264. f_owner = file_f_owner(filp);
  265. if (f_owner) {
  266. read_lock_irq(&f_owner->lock);
  267. src[0] = from_kuid(user_ns, f_owner->uid);
  268. src[1] = from_kuid(user_ns, f_owner->euid);
  269. read_unlock_irq(&f_owner->lock);
  270. }
  271. err = put_user(src[0], &dst[0]);
  272. err |= put_user(src[1], &dst[1]);
  273. return err;
  274. }
  275. #else
  276. static int f_getowner_uids(struct file *filp, unsigned long arg)
  277. {
  278. return -EINVAL;
  279. }
  280. #endif
  281. static bool rw_hint_valid(u64 hint)
  282. {
  283. BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET);
  284. BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE);
  285. BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT);
  286. BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM);
  287. BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG);
  288. BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME);
  289. switch (hint) {
  290. case RWH_WRITE_LIFE_NOT_SET:
  291. case RWH_WRITE_LIFE_NONE:
  292. case RWH_WRITE_LIFE_SHORT:
  293. case RWH_WRITE_LIFE_MEDIUM:
  294. case RWH_WRITE_LIFE_LONG:
  295. case RWH_WRITE_LIFE_EXTREME:
  296. return true;
  297. default:
  298. return false;
  299. }
  300. }
  301. static long fcntl_get_rw_hint(struct file *file, unsigned long arg)
  302. {
  303. struct inode *inode = file_inode(file);
  304. u64 __user *argp = (u64 __user *)arg;
  305. u64 hint = READ_ONCE(inode->i_write_hint);
  306. if (copy_to_user(argp, &hint, sizeof(*argp)))
  307. return -EFAULT;
  308. return 0;
  309. }
  310. static long fcntl_set_rw_hint(struct file *file, unsigned long arg)
  311. {
  312. struct inode *inode = file_inode(file);
  313. u64 __user *argp = (u64 __user *)arg;
  314. u64 hint;
  315. if (!inode_owner_or_capable(file_mnt_idmap(file), inode))
  316. return -EPERM;
  317. if (copy_from_user(&hint, argp, sizeof(hint)))
  318. return -EFAULT;
  319. if (!rw_hint_valid(hint))
  320. return -EINVAL;
  321. WRITE_ONCE(inode->i_write_hint, hint);
  322. /*
  323. * file->f_mapping->host may differ from inode. As an example,
  324. * blkdev_open() modifies file->f_mapping.
  325. */
  326. if (file->f_mapping->host != inode)
  327. WRITE_ONCE(file->f_mapping->host->i_write_hint, hint);
  328. return 0;
  329. }
  330. /* Is the file descriptor a dup of the file? */
  331. static long f_dupfd_query(int fd, struct file *filp)
  332. {
  333. CLASS(fd_raw, f)(fd);
  334. if (fd_empty(f))
  335. return -EBADF;
  336. /*
  337. * We can do the 'fdput()' immediately, as the only thing that
  338. * matters is the pointer value which isn't changed by the fdput.
  339. *
  340. * Technically we didn't need a ref at all, and 'fdget()' was
  341. * overkill, but given our lockless file pointer lookup, the
  342. * alternatives are complicated.
  343. */
  344. return fd_file(f) == filp;
  345. }
  346. /* Let the caller figure out whether a given file was just created. */
  347. static long f_created_query(const struct file *filp)
  348. {
  349. return !!(filp->f_mode & FMODE_CREATED);
  350. }
  351. static int f_owner_sig(struct file *filp, int signum, bool setsig)
  352. {
  353. int ret = 0;
  354. struct fown_struct *f_owner;
  355. might_sleep();
  356. if (setsig) {
  357. if (!valid_signal(signum))
  358. return -EINVAL;
  359. ret = file_f_owner_allocate(filp);
  360. if (ret)
  361. return ret;
  362. }
  363. f_owner = file_f_owner(filp);
  364. if (setsig)
  365. f_owner->signum = signum;
  366. else if (f_owner)
  367. ret = f_owner->signum;
  368. return ret;
  369. }
  370. static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
  371. struct file *filp)
  372. {
  373. void __user *argp = (void __user *)arg;
  374. struct delegation deleg;
  375. int argi = (int)arg;
  376. struct flock flock;
  377. long err = -EINVAL;
  378. switch (cmd) {
  379. case F_CREATED_QUERY:
  380. err = f_created_query(filp);
  381. break;
  382. case F_DUPFD:
  383. err = f_dupfd(argi, filp, 0);
  384. break;
  385. case F_DUPFD_CLOEXEC:
  386. err = f_dupfd(argi, filp, O_CLOEXEC);
  387. break;
  388. case F_DUPFD_QUERY:
  389. err = f_dupfd_query(argi, filp);
  390. break;
  391. case F_GETFD:
  392. err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
  393. break;
  394. case F_SETFD:
  395. err = 0;
  396. set_close_on_exec(fd, argi & FD_CLOEXEC);
  397. break;
  398. case F_GETFL:
  399. err = filp->f_flags;
  400. break;
  401. case F_SETFL:
  402. err = setfl(fd, filp, argi);
  403. break;
  404. #if BITS_PER_LONG != 32
  405. /* 32-bit arches must use fcntl64() */
  406. case F_OFD_GETLK:
  407. #endif
  408. case F_GETLK:
  409. if (copy_from_user(&flock, argp, sizeof(flock)))
  410. return -EFAULT;
  411. err = fcntl_getlk(filp, cmd, &flock);
  412. if (!err && copy_to_user(argp, &flock, sizeof(flock)))
  413. return -EFAULT;
  414. break;
  415. #if BITS_PER_LONG != 32
  416. /* 32-bit arches must use fcntl64() */
  417. case F_OFD_SETLK:
  418. case F_OFD_SETLKW:
  419. fallthrough;
  420. #endif
  421. case F_SETLK:
  422. case F_SETLKW:
  423. if (copy_from_user(&flock, argp, sizeof(flock)))
  424. return -EFAULT;
  425. err = fcntl_setlk(fd, filp, cmd, &flock);
  426. break;
  427. case F_GETOWN:
  428. /*
  429. * XXX If f_owner is a process group, the
  430. * negative return value will get converted
  431. * into an error. Oops. If we keep the
  432. * current syscall conventions, the only way
  433. * to fix this will be in libc.
  434. */
  435. err = f_getown(filp);
  436. force_successful_syscall_return();
  437. break;
  438. case F_SETOWN:
  439. err = f_setown(filp, argi, 1);
  440. break;
  441. case F_GETOWN_EX:
  442. err = f_getown_ex(filp, arg);
  443. break;
  444. case F_SETOWN_EX:
  445. err = f_setown_ex(filp, arg);
  446. break;
  447. case F_GETOWNER_UIDS:
  448. err = f_getowner_uids(filp, arg);
  449. break;
  450. case F_GETSIG:
  451. err = f_owner_sig(filp, 0, false);
  452. break;
  453. case F_SETSIG:
  454. err = f_owner_sig(filp, argi, true);
  455. break;
  456. case F_GETLEASE:
  457. err = fcntl_getlease(filp);
  458. break;
  459. case F_SETLEASE:
  460. err = fcntl_setlease(fd, filp, argi);
  461. break;
  462. case F_NOTIFY:
  463. err = fcntl_dirnotify(fd, filp, argi);
  464. break;
  465. case F_SETPIPE_SZ:
  466. case F_GETPIPE_SZ:
  467. err = pipe_fcntl(filp, cmd, argi);
  468. break;
  469. case F_ADD_SEALS:
  470. case F_GET_SEALS:
  471. err = memfd_fcntl(filp, cmd, argi);
  472. break;
  473. case F_GET_RW_HINT:
  474. err = fcntl_get_rw_hint(filp, arg);
  475. break;
  476. case F_SET_RW_HINT:
  477. err = fcntl_set_rw_hint(filp, arg);
  478. break;
  479. case F_GETDELEG:
  480. if (copy_from_user(&deleg, argp, sizeof(deleg)))
  481. return -EFAULT;
  482. err = fcntl_getdeleg(filp, &deleg);
  483. if (!err && copy_to_user(argp, &deleg, sizeof(deleg)))
  484. return -EFAULT;
  485. break;
  486. case F_SETDELEG:
  487. if (copy_from_user(&deleg, argp, sizeof(deleg)))
  488. return -EFAULT;
  489. err = fcntl_setdeleg(fd, filp, &deleg);
  490. break;
  491. default:
  492. break;
  493. }
  494. return err;
  495. }
  496. static int check_fcntl_cmd(unsigned cmd)
  497. {
  498. switch (cmd) {
  499. case F_CREATED_QUERY:
  500. case F_DUPFD:
  501. case F_DUPFD_CLOEXEC:
  502. case F_DUPFD_QUERY:
  503. case F_GETFD:
  504. case F_SETFD:
  505. case F_GETFL:
  506. return 1;
  507. }
  508. return 0;
  509. }
  510. SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
  511. {
  512. CLASS(fd_raw, f)(fd);
  513. long err;
  514. if (fd_empty(f))
  515. return -EBADF;
  516. if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) {
  517. if (!check_fcntl_cmd(cmd))
  518. return -EBADF;
  519. }
  520. err = security_file_fcntl(fd_file(f), cmd, arg);
  521. if (!err)
  522. err = do_fcntl(fd, cmd, arg, fd_file(f));
  523. return err;
  524. }
  525. #if BITS_PER_LONG == 32
  526. SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
  527. unsigned long, arg)
  528. {
  529. void __user *argp = (void __user *)arg;
  530. CLASS(fd_raw, f)(fd);
  531. struct flock64 flock;
  532. long err;
  533. if (fd_empty(f))
  534. return -EBADF;
  535. if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) {
  536. if (!check_fcntl_cmd(cmd))
  537. return -EBADF;
  538. }
  539. err = security_file_fcntl(fd_file(f), cmd, arg);
  540. if (err)
  541. return err;
  542. switch (cmd) {
  543. case F_GETLK64:
  544. case F_OFD_GETLK:
  545. err = -EFAULT;
  546. if (copy_from_user(&flock, argp, sizeof(flock)))
  547. break;
  548. err = fcntl_getlk64(fd_file(f), cmd, &flock);
  549. if (!err && copy_to_user(argp, &flock, sizeof(flock)))
  550. err = -EFAULT;
  551. break;
  552. case F_SETLK64:
  553. case F_SETLKW64:
  554. case F_OFD_SETLK:
  555. case F_OFD_SETLKW:
  556. err = -EFAULT;
  557. if (copy_from_user(&flock, argp, sizeof(flock)))
  558. break;
  559. err = fcntl_setlk64(fd, fd_file(f), cmd, &flock);
  560. break;
  561. default:
  562. err = do_fcntl(fd, cmd, arg, fd_file(f));
  563. break;
  564. }
  565. return err;
  566. }
  567. #endif
  568. #ifdef CONFIG_COMPAT
  569. /* careful - don't use anywhere else */
  570. #define copy_flock_fields(dst, src) \
  571. (dst)->l_type = (src)->l_type; \
  572. (dst)->l_whence = (src)->l_whence; \
  573. (dst)->l_start = (src)->l_start; \
  574. (dst)->l_len = (src)->l_len; \
  575. (dst)->l_pid = (src)->l_pid;
  576. static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl)
  577. {
  578. struct compat_flock fl;
  579. if (copy_from_user(&fl, ufl, sizeof(struct compat_flock)))
  580. return -EFAULT;
  581. copy_flock_fields(kfl, &fl);
  582. return 0;
  583. }
  584. static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl)
  585. {
  586. struct compat_flock64 fl;
  587. if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64)))
  588. return -EFAULT;
  589. copy_flock_fields(kfl, &fl);
  590. return 0;
  591. }
  592. static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl)
  593. {
  594. struct compat_flock fl;
  595. memset(&fl, 0, sizeof(struct compat_flock));
  596. copy_flock_fields(&fl, kfl);
  597. if (copy_to_user(ufl, &fl, sizeof(struct compat_flock)))
  598. return -EFAULT;
  599. return 0;
  600. }
  601. static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl)
  602. {
  603. struct compat_flock64 fl;
  604. BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start));
  605. BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len));
  606. memset(&fl, 0, sizeof(struct compat_flock64));
  607. copy_flock_fields(&fl, kfl);
  608. if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64)))
  609. return -EFAULT;
  610. return 0;
  611. }
  612. #undef copy_flock_fields
  613. static unsigned int
  614. convert_fcntl_cmd(unsigned int cmd)
  615. {
  616. switch (cmd) {
  617. case F_GETLK64:
  618. return F_GETLK;
  619. case F_SETLK64:
  620. return F_SETLK;
  621. case F_SETLKW64:
  622. return F_SETLKW;
  623. }
  624. return cmd;
  625. }
  626. /*
  627. * GETLK was successful and we need to return the data, but it needs to fit in
  628. * the compat structure.
  629. * l_start shouldn't be too big, unless the original start + end is greater than
  630. * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
  631. * -EOVERFLOW in that case. l_len could be too big, in which case we just
  632. * truncate it, and only allow the app to see that part of the conflicting lock
  633. * that might make sense to it anyway
  634. */
  635. static int fixup_compat_flock(struct flock *flock)
  636. {
  637. if (flock->l_start > COMPAT_OFF_T_MAX)
  638. return -EOVERFLOW;
  639. if (flock->l_len > COMPAT_OFF_T_MAX)
  640. flock->l_len = COMPAT_OFF_T_MAX;
  641. return 0;
  642. }
  643. static long do_compat_fcntl64(unsigned int fd, unsigned int cmd,
  644. compat_ulong_t arg)
  645. {
  646. CLASS(fd_raw, f)(fd);
  647. struct flock flock;
  648. long err;
  649. if (fd_empty(f))
  650. return -EBADF;
  651. if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) {
  652. if (!check_fcntl_cmd(cmd))
  653. return -EBADF;
  654. }
  655. err = security_file_fcntl(fd_file(f), cmd, arg);
  656. if (err)
  657. return err;
  658. switch (cmd) {
  659. case F_GETLK:
  660. err = get_compat_flock(&flock, compat_ptr(arg));
  661. if (err)
  662. break;
  663. err = fcntl_getlk(fd_file(f), convert_fcntl_cmd(cmd), &flock);
  664. if (err)
  665. break;
  666. err = fixup_compat_flock(&flock);
  667. if (!err)
  668. err = put_compat_flock(&flock, compat_ptr(arg));
  669. break;
  670. case F_GETLK64:
  671. case F_OFD_GETLK:
  672. err = get_compat_flock64(&flock, compat_ptr(arg));
  673. if (err)
  674. break;
  675. err = fcntl_getlk(fd_file(f), convert_fcntl_cmd(cmd), &flock);
  676. if (!err)
  677. err = put_compat_flock64(&flock, compat_ptr(arg));
  678. break;
  679. case F_SETLK:
  680. case F_SETLKW:
  681. err = get_compat_flock(&flock, compat_ptr(arg));
  682. if (err)
  683. break;
  684. err = fcntl_setlk(fd, fd_file(f), convert_fcntl_cmd(cmd), &flock);
  685. break;
  686. case F_SETLK64:
  687. case F_SETLKW64:
  688. case F_OFD_SETLK:
  689. case F_OFD_SETLKW:
  690. err = get_compat_flock64(&flock, compat_ptr(arg));
  691. if (err)
  692. break;
  693. err = fcntl_setlk(fd, fd_file(f), convert_fcntl_cmd(cmd), &flock);
  694. break;
  695. default:
  696. err = do_fcntl(fd, cmd, arg, fd_file(f));
  697. break;
  698. }
  699. return err;
  700. }
  701. COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
  702. compat_ulong_t, arg)
  703. {
  704. return do_compat_fcntl64(fd, cmd, arg);
  705. }
  706. COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
  707. compat_ulong_t, arg)
  708. {
  709. switch (cmd) {
  710. case F_GETLK64:
  711. case F_SETLK64:
  712. case F_SETLKW64:
  713. case F_OFD_GETLK:
  714. case F_OFD_SETLK:
  715. case F_OFD_SETLKW:
  716. return -EINVAL;
  717. }
  718. return do_compat_fcntl64(fd, cmd, arg);
  719. }
  720. #endif
  721. /* Table to convert sigio signal codes into poll band bitmaps */
  722. static const __poll_t band_table[NSIGPOLL] = {
  723. EPOLLIN | EPOLLRDNORM, /* POLL_IN */
  724. EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND, /* POLL_OUT */
  725. EPOLLIN | EPOLLRDNORM | EPOLLMSG, /* POLL_MSG */
  726. EPOLLERR, /* POLL_ERR */
  727. EPOLLPRI | EPOLLRDBAND, /* POLL_PRI */
  728. EPOLLHUP | EPOLLERR /* POLL_HUP */
  729. };
  730. static inline int sigio_perm(struct task_struct *p,
  731. struct fown_struct *fown, int sig)
  732. {
  733. const struct cred *cred;
  734. int ret;
  735. rcu_read_lock();
  736. cred = __task_cred(p);
  737. ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
  738. uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
  739. uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) &&
  740. !security_file_send_sigiotask(p, fown, sig));
  741. rcu_read_unlock();
  742. return ret;
  743. }
  744. static void send_sigio_to_task(struct task_struct *p,
  745. struct fown_struct *fown,
  746. int fd, int reason, enum pid_type type)
  747. {
  748. /*
  749. * F_SETSIG can change ->signum lockless in parallel, make
  750. * sure we read it once and use the same value throughout.
  751. */
  752. int signum = READ_ONCE(fown->signum);
  753. if (!sigio_perm(p, fown, signum))
  754. return;
  755. switch (signum) {
  756. default: {
  757. kernel_siginfo_t si;
  758. /* Queue a rt signal with the appropriate fd as its
  759. value. We use SI_SIGIO as the source, not
  760. SI_KERNEL, since kernel signals always get
  761. delivered even if we can't queue. Failure to
  762. queue in this case _should_ be reported; we fall
  763. back to SIGIO in that case. --sct */
  764. clear_siginfo(&si);
  765. si.si_signo = signum;
  766. si.si_errno = 0;
  767. si.si_code = reason;
  768. /*
  769. * Posix definies POLL_IN and friends to be signal
  770. * specific si_codes for SIG_POLL. Linux extended
  771. * these si_codes to other signals in a way that is
  772. * ambiguous if other signals also have signal
  773. * specific si_codes. In that case use SI_SIGIO instead
  774. * to remove the ambiguity.
  775. */
  776. if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
  777. si.si_code = SI_SIGIO;
  778. /* Make sure we are called with one of the POLL_*
  779. reasons, otherwise we could leak kernel stack into
  780. userspace. */
  781. BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL));
  782. if (reason - POLL_IN >= NSIGPOLL)
  783. si.si_band = ~0L;
  784. else
  785. si.si_band = mangle_poll(band_table[reason - POLL_IN]);
  786. si.si_fd = fd;
  787. if (!do_send_sig_info(signum, &si, p, type))
  788. break;
  789. }
  790. fallthrough; /* fall back on the old plain SIGIO signal */
  791. case 0:
  792. do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
  793. }
  794. }
  795. void send_sigio(struct fown_struct *fown, int fd, int band)
  796. {
  797. struct task_struct *p;
  798. enum pid_type type;
  799. unsigned long flags;
  800. struct pid *pid;
  801. read_lock_irqsave(&fown->lock, flags);
  802. type = fown->pid_type;
  803. pid = fown->pid;
  804. if (!pid)
  805. goto out_unlock_fown;
  806. if (type <= PIDTYPE_TGID) {
  807. rcu_read_lock();
  808. p = pid_task(pid, PIDTYPE_PID);
  809. if (p)
  810. send_sigio_to_task(p, fown, fd, band, type);
  811. rcu_read_unlock();
  812. } else {
  813. read_lock(&tasklist_lock);
  814. do_each_pid_task(pid, type, p) {
  815. send_sigio_to_task(p, fown, fd, band, type);
  816. } while_each_pid_task(pid, type, p);
  817. read_unlock(&tasklist_lock);
  818. }
  819. out_unlock_fown:
  820. read_unlock_irqrestore(&fown->lock, flags);
  821. }
  822. static void send_sigurg_to_task(struct task_struct *p,
  823. struct fown_struct *fown, enum pid_type type)
  824. {
  825. if (sigio_perm(p, fown, SIGURG))
  826. do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
  827. }
  828. int send_sigurg(struct file *file)
  829. {
  830. struct fown_struct *fown;
  831. struct task_struct *p;
  832. enum pid_type type;
  833. struct pid *pid;
  834. unsigned long flags;
  835. int ret = 0;
  836. fown = file_f_owner(file);
  837. if (!fown)
  838. return 0;
  839. read_lock_irqsave(&fown->lock, flags);
  840. type = fown->pid_type;
  841. pid = fown->pid;
  842. if (!pid)
  843. goto out_unlock_fown;
  844. ret = 1;
  845. if (type <= PIDTYPE_TGID) {
  846. rcu_read_lock();
  847. p = pid_task(pid, PIDTYPE_PID);
  848. if (p)
  849. send_sigurg_to_task(p, fown, type);
  850. rcu_read_unlock();
  851. } else {
  852. read_lock(&tasklist_lock);
  853. do_each_pid_task(pid, type, p) {
  854. send_sigurg_to_task(p, fown, type);
  855. } while_each_pid_task(pid, type, p);
  856. read_unlock(&tasklist_lock);
  857. }
  858. out_unlock_fown:
  859. read_unlock_irqrestore(&fown->lock, flags);
  860. return ret;
  861. }
  862. static DEFINE_SPINLOCK(fasync_lock);
  863. static struct kmem_cache *fasync_cache __ro_after_init;
  864. /*
  865. * Remove a fasync entry. If successfully removed, return
  866. * positive and clear the FASYNC flag. If no entry exists,
  867. * do nothing and return 0.
  868. *
  869. * NOTE! It is very important that the FASYNC flag always
  870. * match the state "is the filp on a fasync list".
  871. *
  872. */
  873. int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
  874. {
  875. struct fasync_struct *fa, **fp;
  876. int result = 0;
  877. spin_lock(&filp->f_lock);
  878. spin_lock(&fasync_lock);
  879. for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
  880. if (fa->fa_file != filp)
  881. continue;
  882. write_lock_irq(&fa->fa_lock);
  883. fa->fa_file = NULL;
  884. write_unlock_irq(&fa->fa_lock);
  885. *fp = fa->fa_next;
  886. kfree_rcu(fa, fa_rcu);
  887. filp->f_flags &= ~FASYNC;
  888. result = 1;
  889. break;
  890. }
  891. spin_unlock(&fasync_lock);
  892. spin_unlock(&filp->f_lock);
  893. return result;
  894. }
  895. struct fasync_struct *fasync_alloc(void)
  896. {
  897. return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
  898. }
  899. /*
  900. * NOTE! This can be used only for unused fasync entries:
  901. * entries that actually got inserted on the fasync list
  902. * need to be released by rcu - see fasync_remove_entry.
  903. */
  904. void fasync_free(struct fasync_struct *new)
  905. {
  906. kmem_cache_free(fasync_cache, new);
  907. }
  908. /*
  909. * Insert a new entry into the fasync list. Return the pointer to the
  910. * old one if we didn't use the new one.
  911. *
  912. * NOTE! It is very important that the FASYNC flag always
  913. * match the state "is the filp on a fasync list".
  914. */
  915. struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
  916. {
  917. struct fasync_struct *fa, **fp;
  918. spin_lock(&filp->f_lock);
  919. spin_lock(&fasync_lock);
  920. for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
  921. if (fa->fa_file != filp)
  922. continue;
  923. write_lock_irq(&fa->fa_lock);
  924. fa->fa_fd = fd;
  925. write_unlock_irq(&fa->fa_lock);
  926. goto out;
  927. }
  928. rwlock_init(&new->fa_lock);
  929. new->magic = FASYNC_MAGIC;
  930. new->fa_file = filp;
  931. new->fa_fd = fd;
  932. new->fa_next = *fapp;
  933. rcu_assign_pointer(*fapp, new);
  934. filp->f_flags |= FASYNC;
  935. out:
  936. spin_unlock(&fasync_lock);
  937. spin_unlock(&filp->f_lock);
  938. return fa;
  939. }
  940. /*
  941. * Add a fasync entry. Return negative on error, positive if
  942. * added, and zero if did nothing but change an existing one.
  943. */
  944. static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
  945. {
  946. struct fasync_struct *new;
  947. new = fasync_alloc();
  948. if (!new)
  949. return -ENOMEM;
  950. /*
  951. * fasync_insert_entry() returns the old (update) entry if
  952. * it existed.
  953. *
  954. * So free the (unused) new entry and return 0 to let the
  955. * caller know that we didn't add any new fasync entries.
  956. */
  957. if (fasync_insert_entry(fd, filp, fapp, new)) {
  958. fasync_free(new);
  959. return 0;
  960. }
  961. return 1;
  962. }
  963. /*
  964. * fasync_helper() is used by almost all character device drivers
  965. * to set up the fasync queue, and for regular files by the file
  966. * lease code. It returns negative on error, 0 if it did no changes
  967. * and positive if it added/deleted the entry.
  968. */
  969. int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
  970. {
  971. if (!on)
  972. return fasync_remove_entry(filp, fapp);
  973. return fasync_add_entry(fd, filp, fapp);
  974. }
  975. EXPORT_SYMBOL(fasync_helper);
  976. /*
  977. * rcu_read_lock() is held
  978. */
  979. static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
  980. {
  981. while (fa) {
  982. struct fown_struct *fown;
  983. unsigned long flags;
  984. if (fa->magic != FASYNC_MAGIC) {
  985. printk(KERN_ERR "kill_fasync: bad magic number in "
  986. "fasync_struct!\n");
  987. return;
  988. }
  989. read_lock_irqsave(&fa->fa_lock, flags);
  990. if (fa->fa_file) {
  991. fown = file_f_owner(fa->fa_file);
  992. if (!fown)
  993. goto next;
  994. /* Don't send SIGURG to processes which have not set a
  995. queued signum: SIGURG has its own default signalling
  996. mechanism. */
  997. if (!(sig == SIGURG && fown->signum == 0))
  998. send_sigio(fown, fa->fa_fd, band);
  999. }
  1000. next:
  1001. read_unlock_irqrestore(&fa->fa_lock, flags);
  1002. fa = rcu_dereference(fa->fa_next);
  1003. }
  1004. }
  1005. void kill_fasync(struct fasync_struct **fp, int sig, int band)
  1006. {
  1007. /* First a quick test without locking: usually
  1008. * the list is empty.
  1009. */
  1010. if (*fp) {
  1011. rcu_read_lock();
  1012. kill_fasync_rcu(rcu_dereference(*fp), sig, band);
  1013. rcu_read_unlock();
  1014. }
  1015. }
  1016. EXPORT_SYMBOL(kill_fasync);
  1017. static int __init fcntl_init(void)
  1018. {
  1019. /*
  1020. * Please add new bits here to ensure allocation uniqueness.
  1021. * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
  1022. * is defined as O_NONBLOCK on some platforms and not on others.
  1023. */
  1024. BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ !=
  1025. HWEIGHT32(
  1026. (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
  1027. __FMODE_EXEC));
  1028. fasync_cache = kmem_cache_create("fasync_cache",
  1029. sizeof(struct fasync_struct), 0,
  1030. SLAB_PANIC | SLAB_ACCOUNT, NULL);
  1031. return 0;
  1032. }
  1033. module_init(fcntl_init)