coredump.c 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/slab.h>
  3. #include <linux/file.h>
  4. #include <linux/fdtable.h>
  5. #include <linux/freezer.h>
  6. #include <linux/mm.h>
  7. #include <linux/stat.h>
  8. #include <linux/fcntl.h>
  9. #include <linux/swap.h>
  10. #include <linux/ctype.h>
  11. #include <linux/string.h>
  12. #include <linux/init.h>
  13. #include <linux/pagemap.h>
  14. #include <linux/perf_event.h>
  15. #include <linux/highmem.h>
  16. #include <linux/spinlock.h>
  17. #include <linux/key.h>
  18. #include <linux/personality.h>
  19. #include <linux/binfmts.h>
  20. #include <linux/coredump.h>
  21. #include <linux/sort.h>
  22. #include <linux/sched/coredump.h>
  23. #include <linux/sched/signal.h>
  24. #include <linux/sched/task_stack.h>
  25. #include <linux/utsname.h>
  26. #include <linux/pid_namespace.h>
  27. #include <linux/module.h>
  28. #include <linux/namei.h>
  29. #include <linux/mount.h>
  30. #include <linux/security.h>
  31. #include <linux/syscalls.h>
  32. #include <linux/tsacct_kern.h>
  33. #include <linux/cn_proc.h>
  34. #include <linux/audit.h>
  35. #include <linux/kmod.h>
  36. #include <linux/fsnotify.h>
  37. #include <linux/fs_struct.h>
  38. #include <linux/pipe_fs_i.h>
  39. #include <linux/oom.h>
  40. #include <linux/compat.h>
  41. #include <linux/fs.h>
  42. #include <linux/path.h>
  43. #include <linux/timekeeping.h>
  44. #include <linux/sysctl.h>
  45. #include <linux/elf.h>
  46. #include <linux/pidfs.h>
  47. #include <linux/net.h>
  48. #include <linux/socket.h>
  49. #include <net/af_unix.h>
  50. #include <net/net_namespace.h>
  51. #include <net/sock.h>
  52. #include <uapi/linux/pidfd.h>
  53. #include <uapi/linux/un.h>
  54. #include <uapi/linux/coredump.h>
  55. #include <linux/uaccess.h>
  56. #include <asm/mmu_context.h>
  57. #include <asm/tlb.h>
  58. #include <asm/exec.h>
  59. #include <trace/events/task.h>
  60. #include "internal.h"
  61. #include <trace/events/sched.h>
  62. static bool dump_vma_snapshot(struct coredump_params *cprm);
  63. static void free_vma_snapshot(struct coredump_params *cprm);
  64. #define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024)
  65. /* Define a reasonable max cap */
  66. #define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024)
  67. /*
  68. * File descriptor number for the pidfd for the thread-group leader of
  69. * the coredumping task installed into the usermode helper's file
  70. * descriptor table.
  71. */
  72. #define COREDUMP_PIDFD_NUMBER 3
  73. static int core_uses_pid;
  74. static unsigned int core_pipe_limit;
  75. static unsigned int core_sort_vma;
  76. static char core_pattern[CORENAME_MAX_SIZE] = "core";
  77. static int core_name_size = CORENAME_MAX_SIZE;
  78. unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT;
  79. static atomic_t core_pipe_count = ATOMIC_INIT(0);
  80. enum coredump_type_t {
  81. COREDUMP_FILE = 1,
  82. COREDUMP_PIPE = 2,
  83. COREDUMP_SOCK = 3,
  84. COREDUMP_SOCK_REQ = 4,
  85. };
  86. struct core_name {
  87. char *corename __counted_by_ptr(size);
  88. int used, size;
  89. unsigned int core_pipe_limit;
  90. bool core_dumped;
  91. enum coredump_type_t core_type;
  92. u64 mask;
  93. };
  94. static int expand_corename(struct core_name *cn, int size)
  95. {
  96. char *corename;
  97. size = kmalloc_size_roundup(size);
  98. corename = krealloc(cn->corename, size, GFP_KERNEL);
  99. if (!corename)
  100. return -ENOMEM;
  101. cn->corename = corename;
  102. cn->size = size;
  103. if (size > core_name_size) /* racy but harmless */
  104. core_name_size = size;
  105. return 0;
  106. }
  107. static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt,
  108. va_list arg)
  109. {
  110. int free, need;
  111. va_list arg_copy;
  112. again:
  113. free = cn->size - cn->used;
  114. va_copy(arg_copy, arg);
  115. need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy);
  116. va_end(arg_copy);
  117. if (need < free) {
  118. cn->used += need;
  119. return 0;
  120. }
  121. if (!expand_corename(cn, cn->size + need - free + 1))
  122. goto again;
  123. return -ENOMEM;
  124. }
  125. static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...)
  126. {
  127. va_list arg;
  128. int ret;
  129. va_start(arg, fmt);
  130. ret = cn_vprintf(cn, fmt, arg);
  131. va_end(arg);
  132. return ret;
  133. }
  134. static __printf(2, 3)
  135. int cn_esc_printf(struct core_name *cn, const char *fmt, ...)
  136. {
  137. int cur = cn->used;
  138. va_list arg;
  139. int ret;
  140. va_start(arg, fmt);
  141. ret = cn_vprintf(cn, fmt, arg);
  142. va_end(arg);
  143. if (ret == 0) {
  144. /*
  145. * Ensure that this coredump name component can't cause the
  146. * resulting corefile path to consist of a ".." or ".".
  147. */
  148. if ((cn->used - cur == 1 && cn->corename[cur] == '.') ||
  149. (cn->used - cur == 2 && cn->corename[cur] == '.'
  150. && cn->corename[cur+1] == '.'))
  151. cn->corename[cur] = '!';
  152. /*
  153. * Empty names are fishy and could be used to create a "//" in a
  154. * corefile name, causing the coredump to happen one directory
  155. * level too high. Enforce that all components of the core
  156. * pattern are at least one character long.
  157. */
  158. if (cn->used == cur)
  159. ret = cn_printf(cn, "!");
  160. }
  161. for (; cur < cn->used; ++cur) {
  162. if (cn->corename[cur] == '/')
  163. cn->corename[cur] = '!';
  164. }
  165. return ret;
  166. }
  167. static int cn_print_exe_file(struct core_name *cn, bool name_only)
  168. {
  169. struct file *exe_file;
  170. char *pathbuf, *path, *ptr;
  171. int ret;
  172. exe_file = get_mm_exe_file(current->mm);
  173. if (!exe_file)
  174. return cn_esc_printf(cn, "%s (path unknown)", current->comm);
  175. pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
  176. if (!pathbuf) {
  177. ret = -ENOMEM;
  178. goto put_exe_file;
  179. }
  180. path = file_path(exe_file, pathbuf, PATH_MAX);
  181. if (IS_ERR(path)) {
  182. ret = PTR_ERR(path);
  183. goto free_buf;
  184. }
  185. if (name_only) {
  186. ptr = strrchr(path, '/');
  187. if (ptr)
  188. path = ptr + 1;
  189. }
  190. ret = cn_esc_printf(cn, "%s", path);
  191. free_buf:
  192. kfree(pathbuf);
  193. put_exe_file:
  194. fput(exe_file);
  195. return ret;
  196. }
  197. /*
  198. * coredump_parse will inspect the pattern parameter, and output a name
  199. * into corename, which must have space for at least CORENAME_MAX_SIZE
  200. * bytes plus one byte for the zero terminator.
  201. */
  202. static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm,
  203. size_t **argv, int *argc)
  204. {
  205. const struct cred *cred = current_cred();
  206. const char *pat_ptr = core_pattern;
  207. bool was_space = false;
  208. int pid_in_pattern = 0;
  209. int err = 0;
  210. cn->mask = COREDUMP_KERNEL;
  211. if (core_pipe_limit)
  212. cn->mask |= COREDUMP_WAIT;
  213. cn->used = 0;
  214. cn->corename = NULL;
  215. cn->core_pipe_limit = 0;
  216. cn->core_dumped = false;
  217. if (*pat_ptr == '|')
  218. cn->core_type = COREDUMP_PIPE;
  219. else if (*pat_ptr == '@')
  220. cn->core_type = COREDUMP_SOCK;
  221. else
  222. cn->core_type = COREDUMP_FILE;
  223. if (expand_corename(cn, core_name_size))
  224. return false;
  225. cn->corename[0] = '\0';
  226. switch (cn->core_type) {
  227. case COREDUMP_PIPE: {
  228. int argvs = sizeof(core_pattern) / 2;
  229. (*argv) = kmalloc_objs(**argv, argvs);
  230. if (!(*argv))
  231. return false;
  232. (*argv)[(*argc)++] = 0;
  233. ++pat_ptr;
  234. if (!(*pat_ptr))
  235. return false;
  236. break;
  237. }
  238. case COREDUMP_SOCK: {
  239. /* skip the @ */
  240. pat_ptr++;
  241. if (!(*pat_ptr))
  242. return false;
  243. if (*pat_ptr == '@') {
  244. pat_ptr++;
  245. if (!(*pat_ptr))
  246. return false;
  247. cn->core_type = COREDUMP_SOCK_REQ;
  248. }
  249. err = cn_printf(cn, "%s", pat_ptr);
  250. if (err)
  251. return false;
  252. /* Require absolute paths. */
  253. if (cn->corename[0] != '/')
  254. return false;
  255. /*
  256. * Ensure we can uses spaces to indicate additional
  257. * parameters in the future.
  258. */
  259. if (strchr(cn->corename, ' ')) {
  260. coredump_report_failure("Coredump socket may not %s contain spaces", cn->corename);
  261. return false;
  262. }
  263. /* Must not contain ".." in the path. */
  264. if (name_contains_dotdot(cn->corename)) {
  265. coredump_report_failure("Coredump socket may not %s contain '..' spaces", cn->corename);
  266. return false;
  267. }
  268. if (strlen(cn->corename) >= UNIX_PATH_MAX) {
  269. coredump_report_failure("Coredump socket path %s too long", cn->corename);
  270. return false;
  271. }
  272. /*
  273. * Currently no need to parse any other options.
  274. * Relevant information can be retrieved from the peer
  275. * pidfd retrievable via SO_PEERPIDFD by the receiver or
  276. * via /proc/<pid>, using the SO_PEERPIDFD to guard
  277. * against pid recycling when opening /proc/<pid>.
  278. */
  279. return true;
  280. }
  281. case COREDUMP_FILE:
  282. break;
  283. default:
  284. WARN_ON_ONCE(true);
  285. return false;
  286. }
  287. /* Repeat as long as we have more pattern to process and more output
  288. space */
  289. while (*pat_ptr) {
  290. /*
  291. * Split on spaces before doing template expansion so that
  292. * %e and %E don't get split if they have spaces in them
  293. */
  294. if (cn->core_type == COREDUMP_PIPE) {
  295. if (isspace(*pat_ptr)) {
  296. if (cn->used != 0)
  297. was_space = true;
  298. pat_ptr++;
  299. continue;
  300. } else if (was_space) {
  301. was_space = false;
  302. err = cn_printf(cn, "%c", '\0');
  303. if (err)
  304. return false;
  305. (*argv)[(*argc)++] = cn->used;
  306. }
  307. }
  308. if (*pat_ptr != '%') {
  309. err = cn_printf(cn, "%c", *pat_ptr++);
  310. } else {
  311. switch (*++pat_ptr) {
  312. /* single % at the end, drop that */
  313. case 0:
  314. goto out;
  315. /* Double percent, output one percent */
  316. case '%':
  317. err = cn_printf(cn, "%c", '%');
  318. break;
  319. /* pid */
  320. case 'p':
  321. pid_in_pattern = 1;
  322. err = cn_printf(cn, "%d",
  323. task_tgid_vnr(current));
  324. break;
  325. /* global pid */
  326. case 'P':
  327. err = cn_printf(cn, "%d",
  328. task_tgid_nr(current));
  329. break;
  330. case 'i':
  331. err = cn_printf(cn, "%d",
  332. task_pid_vnr(current));
  333. break;
  334. case 'I':
  335. err = cn_printf(cn, "%d",
  336. task_pid_nr(current));
  337. break;
  338. /* uid */
  339. case 'u':
  340. err = cn_printf(cn, "%u",
  341. from_kuid(&init_user_ns,
  342. cred->uid));
  343. break;
  344. /* gid */
  345. case 'g':
  346. err = cn_printf(cn, "%u",
  347. from_kgid(&init_user_ns,
  348. cred->gid));
  349. break;
  350. case 'd':
  351. err = cn_printf(cn, "%d",
  352. __get_dumpable(cprm->mm_flags));
  353. break;
  354. /* signal that caused the coredump */
  355. case 's':
  356. err = cn_printf(cn, "%d",
  357. cprm->siginfo->si_signo);
  358. break;
  359. /* UNIX time of coredump */
  360. case 't': {
  361. time64_t time;
  362. time = ktime_get_real_seconds();
  363. err = cn_printf(cn, "%lld", time);
  364. break;
  365. }
  366. /* hostname */
  367. case 'h':
  368. down_read(&uts_sem);
  369. err = cn_esc_printf(cn, "%s",
  370. utsname()->nodename);
  371. up_read(&uts_sem);
  372. break;
  373. /* executable, could be changed by prctl PR_SET_NAME etc */
  374. case 'e':
  375. err = cn_esc_printf(cn, "%s", current->comm);
  376. break;
  377. /* file name of executable */
  378. case 'f':
  379. err = cn_print_exe_file(cn, true);
  380. break;
  381. case 'E':
  382. err = cn_print_exe_file(cn, false);
  383. break;
  384. /* core limit size */
  385. case 'c':
  386. err = cn_printf(cn, "%lu",
  387. rlimit(RLIMIT_CORE));
  388. break;
  389. /* CPU the task ran on */
  390. case 'C':
  391. err = cn_printf(cn, "%d", cprm->cpu);
  392. break;
  393. /* pidfd number */
  394. case 'F': {
  395. /*
  396. * Installing a pidfd only makes sense if
  397. * we actually spawn a usermode helper.
  398. */
  399. if (cn->core_type != COREDUMP_PIPE)
  400. break;
  401. /*
  402. * Note that we'll install a pidfd for the
  403. * thread-group leader. We know that task
  404. * linkage hasn't been removed yet and even if
  405. * this @current isn't the actual thread-group
  406. * leader we know that the thread-group leader
  407. * cannot be reaped until @current has exited.
  408. */
  409. cprm->pid = task_tgid(current);
  410. err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER);
  411. break;
  412. }
  413. default:
  414. break;
  415. }
  416. ++pat_ptr;
  417. }
  418. if (err)
  419. return false;
  420. }
  421. out:
  422. /* Backward compatibility with core_uses_pid:
  423. *
  424. * If core_pattern does not include a %p (as is the default)
  425. * and core_uses_pid is set, then .%pid will be appended to
  426. * the filename. Do not do this for piped commands. */
  427. if (cn->core_type == COREDUMP_FILE && !pid_in_pattern && core_uses_pid)
  428. return cn_printf(cn, ".%d", task_tgid_vnr(current)) == 0;
  429. return true;
  430. }
  431. static int zap_process(struct signal_struct *signal, int exit_code)
  432. {
  433. struct task_struct *t;
  434. int nr = 0;
  435. signal->flags = SIGNAL_GROUP_EXIT;
  436. signal->group_exit_code = exit_code;
  437. signal->group_stop_count = 0;
  438. __for_each_thread(signal, t) {
  439. task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
  440. if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
  441. sigaddset(&t->pending.signal, SIGKILL);
  442. signal_wake_up(t, 1);
  443. nr++;
  444. }
  445. }
  446. return nr;
  447. }
  448. static int zap_threads(struct task_struct *tsk,
  449. struct core_state *core_state, int exit_code)
  450. {
  451. struct signal_struct *signal = tsk->signal;
  452. int nr = -EAGAIN;
  453. spin_lock_irq(&tsk->sighand->siglock);
  454. if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) {
  455. /* Allow SIGKILL, see prepare_signal() */
  456. signal->core_state = core_state;
  457. nr = zap_process(signal, exit_code);
  458. clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
  459. tsk->flags |= PF_DUMPCORE;
  460. atomic_set(&core_state->nr_threads, nr);
  461. }
  462. spin_unlock_irq(&tsk->sighand->siglock);
  463. return nr;
  464. }
  465. static int coredump_wait(int exit_code, struct core_state *core_state)
  466. {
  467. struct task_struct *tsk = current;
  468. int core_waiters = -EBUSY;
  469. init_completion(&core_state->startup);
  470. core_state->dumper.task = tsk;
  471. core_state->dumper.next = NULL;
  472. core_waiters = zap_threads(tsk, core_state, exit_code);
  473. if (core_waiters > 0) {
  474. struct core_thread *ptr;
  475. wait_for_completion_state(&core_state->startup,
  476. TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
  477. /*
  478. * Wait for all the threads to become inactive, so that
  479. * all the thread context (extended register state, like
  480. * fpu etc) gets copied to the memory.
  481. */
  482. ptr = core_state->dumper.next;
  483. while (ptr != NULL) {
  484. wait_task_inactive(ptr->task, TASK_ANY);
  485. ptr = ptr->next;
  486. }
  487. }
  488. return core_waiters;
  489. }
  490. static void coredump_finish(bool core_dumped)
  491. {
  492. struct core_thread *curr, *next;
  493. struct task_struct *task;
  494. spin_lock_irq(&current->sighand->siglock);
  495. if (core_dumped && !__fatal_signal_pending(current))
  496. current->signal->group_exit_code |= 0x80;
  497. next = current->signal->core_state->dumper.next;
  498. current->signal->core_state = NULL;
  499. spin_unlock_irq(&current->sighand->siglock);
  500. while ((curr = next) != NULL) {
  501. next = curr->next;
  502. task = curr->task;
  503. /*
  504. * see coredump_task_exit(), curr->task must not see
  505. * ->task == NULL before we read ->next.
  506. */
  507. smp_mb();
  508. curr->task = NULL;
  509. wake_up_process(task);
  510. }
  511. }
  512. static bool dump_interrupted(void)
  513. {
  514. /*
  515. * SIGKILL or freezing() interrupt the coredumping. Perhaps we
  516. * can do try_to_freeze() and check __fatal_signal_pending(),
  517. * but then we need to teach dump_write() to restart and clear
  518. * TIF_SIGPENDING.
  519. */
  520. return fatal_signal_pending(current) || freezing(current);
  521. }
  522. static void wait_for_dump_helpers(struct file *file)
  523. {
  524. struct pipe_inode_info *pipe = file->private_data;
  525. pipe_lock(pipe);
  526. pipe->readers++;
  527. pipe->writers--;
  528. wake_up_interruptible_sync(&pipe->rd_wait);
  529. kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
  530. pipe_unlock(pipe);
  531. /*
  532. * We actually want wait_event_freezable() but then we need
  533. * to clear TIF_SIGPENDING and improve dump_interrupted().
  534. */
  535. wait_event_interruptible(pipe->rd_wait, pipe->readers == 1);
  536. pipe_lock(pipe);
  537. pipe->readers--;
  538. pipe->writers++;
  539. pipe_unlock(pipe);
  540. }
  541. /*
  542. * umh_coredump_setup
  543. * helper function to customize the process used
  544. * to collect the core in userspace. Specifically
  545. * it sets up a pipe and installs it as fd 0 (stdin)
  546. * for the process. Returns 0 on success, or
  547. * PTR_ERR on failure.
  548. * Note that it also sets the core limit to 1. This
  549. * is a special value that we use to trap recursive
  550. * core dumps
  551. */
  552. static int umh_coredump_setup(struct subprocess_info *info, struct cred *new)
  553. {
  554. struct file *files[2];
  555. struct coredump_params *cp = (struct coredump_params *)info->data;
  556. int err;
  557. if (cp->pid) {
  558. struct file *pidfs_file __free(fput) = NULL;
  559. pidfs_file = pidfs_alloc_file(cp->pid, 0);
  560. if (IS_ERR(pidfs_file))
  561. return PTR_ERR(pidfs_file);
  562. pidfs_coredump(cp);
  563. /*
  564. * Usermode helpers are childen of either
  565. * system_dfl_wq or of kthreadd. So we know that
  566. * we're starting off with a clean file descriptor
  567. * table. So we should always be able to use
  568. * COREDUMP_PIDFD_NUMBER as our file descriptor value.
  569. */
  570. err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0);
  571. if (err < 0)
  572. return err;
  573. }
  574. err = create_pipe_files(files, 0);
  575. if (err)
  576. return err;
  577. cp->file = files[1];
  578. err = replace_fd(0, files[0], 0);
  579. fput(files[0]);
  580. if (err < 0)
  581. return err;
  582. /* and disallow core files too */
  583. current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
  584. return 0;
  585. }
  586. #ifdef CONFIG_UNIX
  587. static bool coredump_sock_connect(struct core_name *cn, struct coredump_params *cprm)
  588. {
  589. struct file *file __free(fput) = NULL;
  590. struct sockaddr_un addr = {
  591. .sun_family = AF_UNIX,
  592. };
  593. ssize_t addr_len;
  594. int retval;
  595. struct socket *socket;
  596. addr_len = strscpy(addr.sun_path, cn->corename);
  597. if (addr_len < 0)
  598. return false;
  599. addr_len += offsetof(struct sockaddr_un, sun_path) + 1;
  600. /*
  601. * It is possible that the userspace process which is supposed
  602. * to handle the coredump and is listening on the AF_UNIX socket
  603. * coredumps. Userspace should just mark itself non dumpable.
  604. */
  605. retval = sock_create_kern(&init_net, AF_UNIX, SOCK_STREAM, 0, &socket);
  606. if (retval < 0)
  607. return false;
  608. file = sock_alloc_file(socket, 0, NULL);
  609. if (IS_ERR(file))
  610. return false;
  611. /*
  612. * Set the thread-group leader pid which is used for the peer
  613. * credentials during connect() below. Then immediately register
  614. * it in pidfs...
  615. */
  616. cprm->pid = task_tgid(current);
  617. retval = pidfs_register_pid(cprm->pid);
  618. if (retval)
  619. return false;
  620. /*
  621. * ... and set the coredump information so userspace has it
  622. * available after connect()...
  623. */
  624. pidfs_coredump(cprm);
  625. retval = kernel_connect(socket, (struct sockaddr_unsized *)(&addr), addr_len,
  626. O_NONBLOCK | SOCK_COREDUMP);
  627. if (retval) {
  628. if (retval == -EAGAIN)
  629. coredump_report_failure("Coredump socket %s receive queue full", addr.sun_path);
  630. else
  631. coredump_report_failure("Coredump socket connection %s failed %d", addr.sun_path, retval);
  632. return false;
  633. }
  634. /* ... and validate that @sk_peer_pid matches @cprm.pid. */
  635. if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm->pid))
  636. return false;
  637. cprm->limit = RLIM_INFINITY;
  638. cprm->file = no_free_ptr(file);
  639. return true;
  640. }
  641. static inline bool coredump_sock_recv(struct file *file, struct coredump_ack *ack, size_t size, int flags)
  642. {
  643. struct msghdr msg = {};
  644. struct kvec iov = { .iov_base = ack, .iov_len = size };
  645. ssize_t ret;
  646. memset(ack, 0, size);
  647. ret = kernel_recvmsg(sock_from_file(file), &msg, &iov, 1, size, flags);
  648. return ret == size;
  649. }
  650. static inline bool coredump_sock_send(struct file *file, struct coredump_req *req)
  651. {
  652. struct msghdr msg = { .msg_flags = MSG_NOSIGNAL };
  653. struct kvec iov = { .iov_base = req, .iov_len = sizeof(*req) };
  654. ssize_t ret;
  655. ret = kernel_sendmsg(sock_from_file(file), &msg, &iov, 1, sizeof(*req));
  656. return ret == sizeof(*req);
  657. }
  658. static_assert(sizeof(enum coredump_mark) == sizeof(__u32));
  659. static inline bool coredump_sock_mark(struct file *file, enum coredump_mark mark)
  660. {
  661. struct msghdr msg = { .msg_flags = MSG_NOSIGNAL };
  662. struct kvec iov = { .iov_base = &mark, .iov_len = sizeof(mark) };
  663. ssize_t ret;
  664. ret = kernel_sendmsg(sock_from_file(file), &msg, &iov, 1, sizeof(mark));
  665. return ret == sizeof(mark);
  666. }
  667. static inline void coredump_sock_wait(struct file *file)
  668. {
  669. ssize_t n;
  670. /*
  671. * We use a simple read to wait for the coredump processing to
  672. * finish. Either the socket is closed or we get sent unexpected
  673. * data. In both cases, we're done.
  674. */
  675. n = __kernel_read(file, &(char){ 0 }, 1, NULL);
  676. if (n > 0)
  677. coredump_report_failure("Coredump socket had unexpected data");
  678. else if (n < 0)
  679. coredump_report_failure("Coredump socket failed");
  680. }
  681. static inline void coredump_sock_shutdown(struct file *file)
  682. {
  683. struct socket *socket;
  684. socket = sock_from_file(file);
  685. if (!socket)
  686. return;
  687. /* Let userspace know we're done processing the coredump. */
  688. kernel_sock_shutdown(socket, SHUT_WR);
  689. }
  690. static bool coredump_sock_request(struct core_name *cn, struct coredump_params *cprm)
  691. {
  692. struct coredump_req req = {
  693. .size = sizeof(struct coredump_req),
  694. .mask = COREDUMP_KERNEL | COREDUMP_USERSPACE |
  695. COREDUMP_REJECT | COREDUMP_WAIT,
  696. .size_ack = sizeof(struct coredump_ack),
  697. };
  698. struct coredump_ack ack = {};
  699. ssize_t usize;
  700. if (cn->core_type != COREDUMP_SOCK_REQ)
  701. return true;
  702. /* Let userspace know what we support. */
  703. if (!coredump_sock_send(cprm->file, &req))
  704. return false;
  705. /* Peek the size of the coredump_ack. */
  706. if (!coredump_sock_recv(cprm->file, &ack, sizeof(ack.size),
  707. MSG_PEEK | MSG_WAITALL))
  708. return false;
  709. /* Refuse unknown coredump_ack sizes. */
  710. usize = ack.size;
  711. if (usize < COREDUMP_ACK_SIZE_VER0) {
  712. coredump_sock_mark(cprm->file, COREDUMP_MARK_MINSIZE);
  713. return false;
  714. }
  715. if (usize > sizeof(ack)) {
  716. coredump_sock_mark(cprm->file, COREDUMP_MARK_MAXSIZE);
  717. return false;
  718. }
  719. /* Now retrieve the coredump_ack. */
  720. if (!coredump_sock_recv(cprm->file, &ack, usize, MSG_WAITALL))
  721. return false;
  722. if (ack.size != usize)
  723. return false;
  724. /* Refuse unknown coredump_ack flags. */
  725. if (ack.mask & ~req.mask) {
  726. coredump_sock_mark(cprm->file, COREDUMP_MARK_UNSUPPORTED);
  727. return false;
  728. }
  729. /* Refuse mutually exclusive options. */
  730. if (hweight64(ack.mask & (COREDUMP_USERSPACE | COREDUMP_KERNEL |
  731. COREDUMP_REJECT)) != 1) {
  732. coredump_sock_mark(cprm->file, COREDUMP_MARK_CONFLICTING);
  733. return false;
  734. }
  735. if (ack.spare) {
  736. coredump_sock_mark(cprm->file, COREDUMP_MARK_UNSUPPORTED);
  737. return false;
  738. }
  739. cn->mask = ack.mask;
  740. return coredump_sock_mark(cprm->file, COREDUMP_MARK_REQACK);
  741. }
  742. static bool coredump_socket(struct core_name *cn, struct coredump_params *cprm)
  743. {
  744. if (!coredump_sock_connect(cn, cprm))
  745. return false;
  746. return coredump_sock_request(cn, cprm);
  747. }
  748. #else
  749. static inline void coredump_sock_wait(struct file *file) { }
  750. static inline void coredump_sock_shutdown(struct file *file) { }
  751. static inline bool coredump_socket(struct core_name *cn, struct coredump_params *cprm) { return false; }
  752. #endif
  753. /* cprm->mm_flags contains a stable snapshot of dumpability flags. */
  754. static inline bool coredump_force_suid_safe(const struct coredump_params *cprm)
  755. {
  756. /* Require nonrelative corefile path and be extra careful. */
  757. return __get_dumpable(cprm->mm_flags) == SUID_DUMP_ROOT;
  758. }
  759. static bool coredump_file(struct core_name *cn, struct coredump_params *cprm,
  760. const struct linux_binfmt *binfmt)
  761. {
  762. struct mnt_idmap *idmap;
  763. struct inode *inode;
  764. struct file *file __free(fput) = NULL;
  765. int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW | O_LARGEFILE | O_EXCL;
  766. if (cprm->limit < binfmt->min_coredump)
  767. return false;
  768. if (coredump_force_suid_safe(cprm) && cn->corename[0] != '/') {
  769. coredump_report_failure("this process can only dump core to a fully qualified path, skipping core dump");
  770. return false;
  771. }
  772. /*
  773. * Unlink the file if it exists unless this is a SUID
  774. * binary - in that case, we're running around with root
  775. * privs and don't want to unlink another user's coredump.
  776. */
  777. if (!coredump_force_suid_safe(cprm)) {
  778. CLASS(filename_kernel, name)(cn->corename);
  779. /*
  780. * If it doesn't exist, that's fine. If there's some
  781. * other problem, we'll catch it at the filp_open().
  782. */
  783. filename_unlinkat(AT_FDCWD, name);
  784. }
  785. /*
  786. * There is a race between unlinking and creating the
  787. * file, but if that causes an EEXIST here, that's
  788. * fine - another process raced with us while creating
  789. * the corefile, and the other process won. To userspace,
  790. * what matters is that at least one of the two processes
  791. * writes its coredump successfully, not which one.
  792. */
  793. if (coredump_force_suid_safe(cprm)) {
  794. /*
  795. * Using user namespaces, normal user tasks can change
  796. * their current->fs->root to point to arbitrary
  797. * directories. Since the intention of the "only dump
  798. * with a fully qualified path" rule is to control where
  799. * coredumps may be placed using root privileges,
  800. * current->fs->root must not be used. Instead, use the
  801. * root directory of init_task.
  802. */
  803. struct path root;
  804. task_lock(&init_task);
  805. get_fs_root(init_task.fs, &root);
  806. task_unlock(&init_task);
  807. file = file_open_root(&root, cn->corename, open_flags, 0600);
  808. path_put(&root);
  809. } else {
  810. file = filp_open(cn->corename, open_flags, 0600);
  811. }
  812. if (IS_ERR(file))
  813. return false;
  814. inode = file_inode(file);
  815. if (inode->i_nlink > 1)
  816. return false;
  817. if (d_unhashed(file->f_path.dentry))
  818. return false;
  819. /*
  820. * AK: actually i see no reason to not allow this for named
  821. * pipes etc, but keep the previous behaviour for now.
  822. */
  823. if (!S_ISREG(inode->i_mode))
  824. return false;
  825. /*
  826. * Don't dump core if the filesystem changed owner or mode
  827. * of the file during file creation. This is an issue when
  828. * a process dumps core while its cwd is e.g. on a vfat
  829. * filesystem.
  830. */
  831. idmap = file_mnt_idmap(file);
  832. if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) {
  833. coredump_report_failure("Core dump to %s aborted: cannot preserve file owner", cn->corename);
  834. return false;
  835. }
  836. if ((inode->i_mode & 0677) != 0600) {
  837. coredump_report_failure("Core dump to %s aborted: cannot preserve file permissions", cn->corename);
  838. return false;
  839. }
  840. if (!(file->f_mode & FMODE_CAN_WRITE))
  841. return false;
  842. if (do_truncate(idmap, file->f_path.dentry, 0, 0, file))
  843. return false;
  844. cprm->file = no_free_ptr(file);
  845. return true;
  846. }
  847. static bool coredump_pipe(struct core_name *cn, struct coredump_params *cprm,
  848. size_t *argv, int argc)
  849. {
  850. int argi;
  851. char **helper_argv __free(kfree) = NULL;
  852. struct subprocess_info *sub_info;
  853. if (cprm->limit == 1) {
  854. /* See umh_coredump_setup() which sets RLIMIT_CORE = 1.
  855. *
  856. * Normally core limits are irrelevant to pipes, since
  857. * we're not writing to the file system, but we use
  858. * cprm.limit of 1 here as a special value, this is a
  859. * consistent way to catch recursive crashes.
  860. * We can still crash if the core_pattern binary sets
  861. * RLIM_CORE = !1, but it runs as root, and can do
  862. * lots of stupid things.
  863. *
  864. * Note that we use task_tgid_vnr here to grab the pid
  865. * of the process group leader. That way we get the
  866. * right pid if a thread in a multi-threaded
  867. * core_pattern process dies.
  868. */
  869. coredump_report_failure("RLIMIT_CORE is set to 1, aborting core");
  870. return false;
  871. }
  872. cprm->limit = RLIM_INFINITY;
  873. cn->core_pipe_limit = atomic_inc_return(&core_pipe_count);
  874. if (core_pipe_limit && (core_pipe_limit < cn->core_pipe_limit)) {
  875. coredump_report_failure("over core_pipe_limit, skipping core dump");
  876. return false;
  877. }
  878. helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), GFP_KERNEL);
  879. if (!helper_argv) {
  880. coredump_report_failure("%s failed to allocate memory", __func__);
  881. return false;
  882. }
  883. for (argi = 0; argi < argc; argi++)
  884. helper_argv[argi] = cn->corename + argv[argi];
  885. helper_argv[argi] = NULL;
  886. sub_info = call_usermodehelper_setup(helper_argv[0], helper_argv, NULL,
  887. GFP_KERNEL, umh_coredump_setup,
  888. NULL, cprm);
  889. if (!sub_info)
  890. return false;
  891. if (call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC)) {
  892. coredump_report_failure("|%s pipe failed", cn->corename);
  893. return false;
  894. }
  895. /*
  896. * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would
  897. * have this set to NULL.
  898. */
  899. if (!cprm->file) {
  900. coredump_report_failure("Core dump to |%s disabled", cn->corename);
  901. return false;
  902. }
  903. return true;
  904. }
  905. static bool coredump_write(struct core_name *cn,
  906. struct coredump_params *cprm,
  907. const struct linux_binfmt *binfmt)
  908. {
  909. if (dump_interrupted())
  910. return true;
  911. if (!dump_vma_snapshot(cprm))
  912. return false;
  913. file_start_write(cprm->file);
  914. cn->core_dumped = binfmt->core_dump(cprm);
  915. /*
  916. * Ensures that file size is big enough to contain the current
  917. * file postion. This prevents gdb from complaining about
  918. * a truncated file if the last "write" to the file was
  919. * dump_skip.
  920. */
  921. if (cprm->to_skip) {
  922. cprm->to_skip--;
  923. dump_emit(cprm, "", 1);
  924. }
  925. file_end_write(cprm->file);
  926. free_vma_snapshot(cprm);
  927. return true;
  928. }
  929. static void coredump_cleanup(struct core_name *cn, struct coredump_params *cprm)
  930. {
  931. if (cprm->file)
  932. filp_close(cprm->file, NULL);
  933. if (cn->core_pipe_limit) {
  934. VFS_WARN_ON_ONCE(cn->core_type != COREDUMP_PIPE);
  935. atomic_dec(&core_pipe_count);
  936. }
  937. kfree(cn->corename);
  938. coredump_finish(cn->core_dumped);
  939. }
  940. static inline bool coredump_skip(const struct coredump_params *cprm,
  941. const struct linux_binfmt *binfmt)
  942. {
  943. if (!binfmt)
  944. return true;
  945. if (!binfmt->core_dump)
  946. return true;
  947. if (!__get_dumpable(cprm->mm_flags))
  948. return true;
  949. return false;
  950. }
  951. static void do_coredump(struct core_name *cn, struct coredump_params *cprm,
  952. size_t **argv, int *argc, const struct linux_binfmt *binfmt)
  953. {
  954. if (!coredump_parse(cn, cprm, argv, argc)) {
  955. coredump_report_failure("format_corename failed, aborting core");
  956. return;
  957. }
  958. switch (cn->core_type) {
  959. case COREDUMP_FILE:
  960. if (!coredump_file(cn, cprm, binfmt))
  961. return;
  962. break;
  963. case COREDUMP_PIPE:
  964. if (!coredump_pipe(cn, cprm, *argv, *argc))
  965. return;
  966. break;
  967. case COREDUMP_SOCK_REQ:
  968. fallthrough;
  969. case COREDUMP_SOCK:
  970. if (!coredump_socket(cn, cprm))
  971. return;
  972. break;
  973. default:
  974. WARN_ON_ONCE(true);
  975. return;
  976. }
  977. /* Don't even generate the coredump. */
  978. if (cn->mask & COREDUMP_REJECT)
  979. return;
  980. /* get us an unshared descriptor table; almost always a no-op */
  981. /* The cell spufs coredump code reads the file descriptor tables */
  982. if (unshare_files())
  983. return;
  984. if ((cn->mask & COREDUMP_KERNEL) && !coredump_write(cn, cprm, binfmt))
  985. return;
  986. coredump_sock_shutdown(cprm->file);
  987. /* Let the parent know that a coredump was generated. */
  988. if (cn->mask & COREDUMP_USERSPACE)
  989. cn->core_dumped = true;
  990. /*
  991. * When core_pipe_limit is set we wait for the coredump server
  992. * or usermodehelper to finish before exiting so it can e.g.,
  993. * inspect /proc/<pid>.
  994. */
  995. if (cn->mask & COREDUMP_WAIT) {
  996. switch (cn->core_type) {
  997. case COREDUMP_PIPE:
  998. wait_for_dump_helpers(cprm->file);
  999. break;
  1000. case COREDUMP_SOCK_REQ:
  1001. fallthrough;
  1002. case COREDUMP_SOCK:
  1003. coredump_sock_wait(cprm->file);
  1004. break;
  1005. default:
  1006. break;
  1007. }
  1008. }
  1009. }
  1010. void vfs_coredump(const kernel_siginfo_t *siginfo)
  1011. {
  1012. size_t *argv __free(kfree) = NULL;
  1013. struct core_state core_state;
  1014. struct core_name cn;
  1015. const struct mm_struct *mm = current->mm;
  1016. const struct linux_binfmt *binfmt = mm->binfmt;
  1017. int argc = 0;
  1018. struct coredump_params cprm = {
  1019. .siginfo = siginfo,
  1020. .limit = rlimit(RLIMIT_CORE),
  1021. /*
  1022. * We must use the same mm->flags while dumping core to avoid
  1023. * inconsistency of bit flags, since this flag is not protected
  1024. * by any locks.
  1025. *
  1026. * Note that we only care about MMF_DUMP* flags.
  1027. */
  1028. .mm_flags = __mm_flags_get_dumpable(mm),
  1029. .vma_meta = NULL,
  1030. .cpu = raw_smp_processor_id(),
  1031. };
  1032. audit_core_dumps(siginfo->si_signo);
  1033. if (coredump_skip(&cprm, binfmt))
  1034. return;
  1035. CLASS(prepare_creds, cred)();
  1036. if (!cred)
  1037. return;
  1038. /*
  1039. * We cannot trust fsuid as being the "true" uid of the process
  1040. * nor do we know its entire history. We only know it was tainted
  1041. * so we dump it as root in mode 2, and only into a controlled
  1042. * environment (pipe handler or fully qualified path).
  1043. */
  1044. if (coredump_force_suid_safe(&cprm))
  1045. cred->fsuid = GLOBAL_ROOT_UID;
  1046. if (coredump_wait(siginfo->si_signo, &core_state) < 0)
  1047. return;
  1048. scoped_with_creds(cred)
  1049. do_coredump(&cn, &cprm, &argv, &argc, binfmt);
  1050. coredump_cleanup(&cn, &cprm);
  1051. return;
  1052. }
  1053. /*
  1054. * Core dumping helper functions. These are the only things you should
  1055. * do on a core-file: use only these functions to write out all the
  1056. * necessary info.
  1057. */
  1058. static int __dump_emit(struct coredump_params *cprm, const void *addr, int nr)
  1059. {
  1060. struct file *file = cprm->file;
  1061. loff_t pos = file->f_pos;
  1062. ssize_t n;
  1063. if (cprm->written + nr > cprm->limit)
  1064. return 0;
  1065. if (dump_interrupted())
  1066. return 0;
  1067. n = __kernel_write(file, addr, nr, &pos);
  1068. if (n != nr)
  1069. return 0;
  1070. file->f_pos = pos;
  1071. cprm->written += n;
  1072. cprm->pos += n;
  1073. return 1;
  1074. }
  1075. static int __dump_skip(struct coredump_params *cprm, size_t nr)
  1076. {
  1077. static char zeroes[PAGE_SIZE];
  1078. struct file *file = cprm->file;
  1079. if (file->f_mode & FMODE_LSEEK) {
  1080. if (dump_interrupted() || vfs_llseek(file, nr, SEEK_CUR) < 0)
  1081. return 0;
  1082. cprm->pos += nr;
  1083. return 1;
  1084. }
  1085. while (nr > PAGE_SIZE) {
  1086. if (!__dump_emit(cprm, zeroes, PAGE_SIZE))
  1087. return 0;
  1088. nr -= PAGE_SIZE;
  1089. }
  1090. return __dump_emit(cprm, zeroes, nr);
  1091. }
  1092. int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
  1093. {
  1094. if (cprm->to_skip) {
  1095. if (!__dump_skip(cprm, cprm->to_skip))
  1096. return 0;
  1097. cprm->to_skip = 0;
  1098. }
  1099. return __dump_emit(cprm, addr, nr);
  1100. }
  1101. EXPORT_SYMBOL(dump_emit);
  1102. void dump_skip_to(struct coredump_params *cprm, unsigned long pos)
  1103. {
  1104. cprm->to_skip = pos - cprm->pos;
  1105. }
  1106. EXPORT_SYMBOL(dump_skip_to);
  1107. void dump_skip(struct coredump_params *cprm, size_t nr)
  1108. {
  1109. cprm->to_skip += nr;
  1110. }
  1111. EXPORT_SYMBOL(dump_skip);
  1112. #ifdef CONFIG_ELF_CORE
  1113. static int dump_emit_page(struct coredump_params *cprm, struct page *page)
  1114. {
  1115. struct bio_vec bvec;
  1116. struct iov_iter iter;
  1117. struct file *file = cprm->file;
  1118. loff_t pos;
  1119. ssize_t n;
  1120. if (!page)
  1121. return 0;
  1122. if (cprm->to_skip) {
  1123. if (!__dump_skip(cprm, cprm->to_skip))
  1124. return 0;
  1125. cprm->to_skip = 0;
  1126. }
  1127. if (cprm->written + PAGE_SIZE > cprm->limit)
  1128. return 0;
  1129. if (dump_interrupted())
  1130. return 0;
  1131. pos = file->f_pos;
  1132. bvec_set_page(&bvec, page, PAGE_SIZE, 0);
  1133. iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE);
  1134. n = __kernel_write_iter(cprm->file, &iter, &pos);
  1135. if (n != PAGE_SIZE)
  1136. return 0;
  1137. file->f_pos = pos;
  1138. cprm->written += PAGE_SIZE;
  1139. cprm->pos += PAGE_SIZE;
  1140. return 1;
  1141. }
  1142. /*
  1143. * If we might get machine checks from kernel accesses during the
  1144. * core dump, let's get those errors early rather than during the
  1145. * IO. This is not performance-critical enough to warrant having
  1146. * all the machine check logic in the iovec paths.
  1147. */
  1148. #ifdef copy_mc_to_kernel
  1149. #define dump_page_alloc() alloc_page(GFP_KERNEL)
  1150. #define dump_page_free(x) __free_page(x)
  1151. static struct page *dump_page_copy(struct page *src, struct page *dst)
  1152. {
  1153. void *buf = kmap_local_page(src);
  1154. size_t left = copy_mc_to_kernel(page_address(dst), buf, PAGE_SIZE);
  1155. kunmap_local(buf);
  1156. return left ? NULL : dst;
  1157. }
  1158. #else
  1159. /* We just want to return non-NULL; it's never used. */
  1160. #define dump_page_alloc() ERR_PTR(-EINVAL)
  1161. #define dump_page_free(x) ((void)(x))
  1162. static inline struct page *dump_page_copy(struct page *src, struct page *dst)
  1163. {
  1164. return src;
  1165. }
  1166. #endif
  1167. int dump_user_range(struct coredump_params *cprm, unsigned long start,
  1168. unsigned long len)
  1169. {
  1170. unsigned long addr;
  1171. struct page *dump_page;
  1172. int locked, ret;
  1173. dump_page = dump_page_alloc();
  1174. if (!dump_page)
  1175. return 0;
  1176. ret = 0;
  1177. locked = 0;
  1178. for (addr = start; addr < start + len; addr += PAGE_SIZE) {
  1179. struct page *page;
  1180. if (!locked) {
  1181. if (mmap_read_lock_killable(current->mm))
  1182. goto out;
  1183. locked = 1;
  1184. }
  1185. /*
  1186. * To avoid having to allocate page tables for virtual address
  1187. * ranges that have never been used yet, and also to make it
  1188. * easy to generate sparse core files, use a helper that returns
  1189. * NULL when encountering an empty page table entry that would
  1190. * otherwise have been filled with the zero page.
  1191. */
  1192. page = get_dump_page(addr, &locked);
  1193. if (page) {
  1194. if (locked) {
  1195. mmap_read_unlock(current->mm);
  1196. locked = 0;
  1197. }
  1198. int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page));
  1199. put_page(page);
  1200. if (stop)
  1201. goto out;
  1202. } else {
  1203. dump_skip(cprm, PAGE_SIZE);
  1204. }
  1205. if (dump_interrupted())
  1206. goto out;
  1207. if (!need_resched())
  1208. continue;
  1209. if (locked) {
  1210. mmap_read_unlock(current->mm);
  1211. locked = 0;
  1212. }
  1213. cond_resched();
  1214. }
  1215. ret = 1;
  1216. out:
  1217. if (locked)
  1218. mmap_read_unlock(current->mm);
  1219. dump_page_free(dump_page);
  1220. return ret;
  1221. }
  1222. #endif
  1223. int dump_align(struct coredump_params *cprm, int align)
  1224. {
  1225. unsigned mod = (cprm->pos + cprm->to_skip) & (align - 1);
  1226. if (align & (align - 1))
  1227. return 0;
  1228. if (mod)
  1229. cprm->to_skip += align - mod;
  1230. return 1;
  1231. }
  1232. EXPORT_SYMBOL(dump_align);
  1233. #ifdef CONFIG_SYSCTL
  1234. void validate_coredump_safety(void)
  1235. {
  1236. if (suid_dumpable == SUID_DUMP_ROOT &&
  1237. core_pattern[0] != '/' && core_pattern[0] != '|' && core_pattern[0] != '@') {
  1238. coredump_report_failure("Unsafe core_pattern used with fs.suid_dumpable=2: "
  1239. "pipe handler or fully qualified core dump path required. "
  1240. "Set kernel.core_pattern before fs.suid_dumpable.");
  1241. }
  1242. }
  1243. static inline bool check_coredump_socket(void)
  1244. {
  1245. const char *p;
  1246. if (core_pattern[0] != '@')
  1247. return true;
  1248. /*
  1249. * Coredump socket must be located in the initial mount
  1250. * namespace. Don't give the impression that anything else is
  1251. * supported right now.
  1252. */
  1253. if (current->nsproxy->mnt_ns != init_task.nsproxy->mnt_ns)
  1254. return false;
  1255. /* Must be an absolute path... */
  1256. if (core_pattern[1] != '/') {
  1257. /* ... or the socket request protocol... */
  1258. if (core_pattern[1] != '@')
  1259. return false;
  1260. /* ... and if so must be an absolute path. */
  1261. if (core_pattern[2] != '/')
  1262. return false;
  1263. p = &core_pattern[2];
  1264. } else {
  1265. p = &core_pattern[1];
  1266. }
  1267. /* The path obviously cannot exceed UNIX_PATH_MAX. */
  1268. if (strlen(p) >= UNIX_PATH_MAX)
  1269. return false;
  1270. /* Must not contain ".." in the path. */
  1271. if (name_contains_dotdot(core_pattern))
  1272. return false;
  1273. return true;
  1274. }
  1275. static int proc_dostring_coredump(const struct ctl_table *table, int write,
  1276. void *buffer, size_t *lenp, loff_t *ppos)
  1277. {
  1278. int error;
  1279. ssize_t retval;
  1280. char old_core_pattern[CORENAME_MAX_SIZE];
  1281. if (!write)
  1282. return proc_dostring(table, write, buffer, lenp, ppos);
  1283. retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE);
  1284. error = proc_dostring(table, write, buffer, lenp, ppos);
  1285. if (error)
  1286. return error;
  1287. if (!check_coredump_socket()) {
  1288. strscpy(core_pattern, old_core_pattern, retval + 1);
  1289. return -EINVAL;
  1290. }
  1291. validate_coredump_safety();
  1292. return error;
  1293. }
  1294. static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT;
  1295. static const unsigned int core_file_note_size_max = CORE_FILE_NOTE_SIZE_MAX;
  1296. static char core_modes[] = {
  1297. "file\npipe"
  1298. #ifdef CONFIG_UNIX
  1299. "\nsocket"
  1300. #endif
  1301. };
  1302. static const struct ctl_table coredump_sysctls[] = {
  1303. {
  1304. .procname = "core_uses_pid",
  1305. .data = &core_uses_pid,
  1306. .maxlen = sizeof(int),
  1307. .mode = 0644,
  1308. .proc_handler = proc_dointvec,
  1309. },
  1310. {
  1311. .procname = "core_pattern",
  1312. .data = core_pattern,
  1313. .maxlen = CORENAME_MAX_SIZE,
  1314. .mode = 0644,
  1315. .proc_handler = proc_dostring_coredump,
  1316. },
  1317. {
  1318. .procname = "core_pipe_limit",
  1319. .data = &core_pipe_limit,
  1320. .maxlen = sizeof(unsigned int),
  1321. .mode = 0644,
  1322. .proc_handler = proc_dointvec_minmax,
  1323. .extra1 = SYSCTL_ZERO,
  1324. .extra2 = SYSCTL_INT_MAX,
  1325. },
  1326. {
  1327. .procname = "core_file_note_size_limit",
  1328. .data = &core_file_note_size_limit,
  1329. .maxlen = sizeof(unsigned int),
  1330. .mode = 0644,
  1331. .proc_handler = proc_douintvec_minmax,
  1332. .extra1 = (unsigned int *)&core_file_note_size_min,
  1333. .extra2 = (unsigned int *)&core_file_note_size_max,
  1334. },
  1335. {
  1336. .procname = "core_sort_vma",
  1337. .data = &core_sort_vma,
  1338. .maxlen = sizeof(int),
  1339. .mode = 0644,
  1340. .proc_handler = proc_douintvec_minmax,
  1341. .extra1 = SYSCTL_ZERO,
  1342. .extra2 = SYSCTL_ONE,
  1343. },
  1344. {
  1345. .procname = "core_modes",
  1346. .data = core_modes,
  1347. .maxlen = sizeof(core_modes) - 1,
  1348. .mode = 0444,
  1349. .proc_handler = proc_dostring,
  1350. },
  1351. };
  1352. static int __init init_fs_coredump_sysctls(void)
  1353. {
  1354. register_sysctl_init("kernel", coredump_sysctls);
  1355. return 0;
  1356. }
  1357. fs_initcall(init_fs_coredump_sysctls);
  1358. #endif /* CONFIG_SYSCTL */
  1359. /*
  1360. * The purpose of always_dump_vma() is to make sure that special kernel mappings
  1361. * that are useful for post-mortem analysis are included in every core dump.
  1362. * In that way we ensure that the core dump is fully interpretable later
  1363. * without matching up the same kernel and hardware config to see what PC values
  1364. * meant. These special mappings include - vDSO, vsyscall, and other
  1365. * architecture specific mappings
  1366. */
  1367. static bool always_dump_vma(struct vm_area_struct *vma)
  1368. {
  1369. /* Any vsyscall mappings? */
  1370. if (vma == get_gate_vma(vma->vm_mm))
  1371. return true;
  1372. /*
  1373. * Assume that all vmas with a .name op should always be dumped.
  1374. * If this changes, a new vm_ops field can easily be added.
  1375. */
  1376. if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
  1377. return true;
  1378. /*
  1379. * arch_vma_name() returns non-NULL for special architecture mappings,
  1380. * such as vDSO sections.
  1381. */
  1382. if (arch_vma_name(vma))
  1383. return true;
  1384. return false;
  1385. }
  1386. #define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1
  1387. /*
  1388. * Decide how much of @vma's contents should be included in a core dump.
  1389. */
  1390. static unsigned long vma_dump_size(struct vm_area_struct *vma,
  1391. unsigned long mm_flags)
  1392. {
  1393. #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
  1394. /* always dump the vdso and vsyscall sections */
  1395. if (always_dump_vma(vma))
  1396. goto whole;
  1397. if (vma->vm_flags & VM_DONTDUMP)
  1398. return 0;
  1399. /* support for DAX */
  1400. if (vma_is_dax(vma)) {
  1401. if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
  1402. goto whole;
  1403. if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
  1404. goto whole;
  1405. return 0;
  1406. }
  1407. /* Hugetlb memory check */
  1408. if (is_vm_hugetlb_page(vma)) {
  1409. if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
  1410. goto whole;
  1411. if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
  1412. goto whole;
  1413. return 0;
  1414. }
  1415. /* Do not dump I/O mapped devices or special mappings */
  1416. if (vma->vm_flags & VM_IO)
  1417. return 0;
  1418. /* By default, dump shared memory if mapped from an anonymous file. */
  1419. if (vma->vm_flags & VM_SHARED) {
  1420. if (file_inode(vma->vm_file)->i_nlink == 0 ?
  1421. FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
  1422. goto whole;
  1423. return 0;
  1424. }
  1425. /* Dump segments that have been written to. */
  1426. if ((!IS_ENABLED(CONFIG_MMU) || vma->anon_vma) && FILTER(ANON_PRIVATE))
  1427. goto whole;
  1428. if (vma->vm_file == NULL)
  1429. return 0;
  1430. if (FILTER(MAPPED_PRIVATE))
  1431. goto whole;
  1432. /*
  1433. * If this is the beginning of an executable file mapping,
  1434. * dump the first page to aid in determining what was mapped here.
  1435. */
  1436. if (FILTER(ELF_HEADERS) &&
  1437. vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
  1438. if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
  1439. return PAGE_SIZE;
  1440. /*
  1441. * ELF libraries aren't always executable.
  1442. * We'll want to check whether the mapping starts with the ELF
  1443. * magic, but not now - we're holding the mmap lock,
  1444. * so copy_from_user() doesn't work here.
  1445. * Use a placeholder instead, and fix it up later in
  1446. * dump_vma_snapshot().
  1447. */
  1448. return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER;
  1449. }
  1450. #undef FILTER
  1451. return 0;
  1452. whole:
  1453. return vma->vm_end - vma->vm_start;
  1454. }
  1455. /*
  1456. * Helper function for iterating across a vma list. It ensures that the caller
  1457. * will visit `gate_vma' prior to terminating the search.
  1458. */
  1459. static struct vm_area_struct *coredump_next_vma(struct vma_iterator *vmi,
  1460. struct vm_area_struct *vma,
  1461. struct vm_area_struct *gate_vma)
  1462. {
  1463. if (gate_vma && (vma == gate_vma))
  1464. return NULL;
  1465. vma = vma_next(vmi);
  1466. if (vma)
  1467. return vma;
  1468. return gate_vma;
  1469. }
  1470. static void free_vma_snapshot(struct coredump_params *cprm)
  1471. {
  1472. if (cprm->vma_meta) {
  1473. int i;
  1474. for (i = 0; i < cprm->vma_count; i++) {
  1475. struct file *file = cprm->vma_meta[i].file;
  1476. if (file)
  1477. fput(file);
  1478. }
  1479. kvfree(cprm->vma_meta);
  1480. cprm->vma_meta = NULL;
  1481. }
  1482. }
  1483. static int cmp_vma_size(const void *vma_meta_lhs_ptr, const void *vma_meta_rhs_ptr)
  1484. {
  1485. const struct core_vma_metadata *vma_meta_lhs = vma_meta_lhs_ptr;
  1486. const struct core_vma_metadata *vma_meta_rhs = vma_meta_rhs_ptr;
  1487. if (vma_meta_lhs->dump_size < vma_meta_rhs->dump_size)
  1488. return -1;
  1489. if (vma_meta_lhs->dump_size > vma_meta_rhs->dump_size)
  1490. return 1;
  1491. return 0;
  1492. }
  1493. /*
  1494. * Under the mmap_lock, take a snapshot of relevant information about the task's
  1495. * VMAs.
  1496. */
  1497. static bool dump_vma_snapshot(struct coredump_params *cprm)
  1498. {
  1499. struct vm_area_struct *gate_vma, *vma = NULL;
  1500. struct mm_struct *mm = current->mm;
  1501. VMA_ITERATOR(vmi, mm, 0);
  1502. int i = 0;
  1503. /*
  1504. * Once the stack expansion code is fixed to not change VMA bounds
  1505. * under mmap_lock in read mode, this can be changed to take the
  1506. * mmap_lock in read mode.
  1507. */
  1508. if (mmap_write_lock_killable(mm))
  1509. return false;
  1510. cprm->vma_data_size = 0;
  1511. gate_vma = get_gate_vma(mm);
  1512. cprm->vma_count = mm->map_count + (gate_vma ? 1 : 0);
  1513. cprm->vma_meta = kvmalloc_objs(*cprm->vma_meta, cprm->vma_count);
  1514. if (!cprm->vma_meta) {
  1515. mmap_write_unlock(mm);
  1516. return false;
  1517. }
  1518. while ((vma = coredump_next_vma(&vmi, vma, gate_vma)) != NULL) {
  1519. struct core_vma_metadata *m = cprm->vma_meta + i;
  1520. m->start = vma->vm_start;
  1521. m->end = vma->vm_end;
  1522. m->flags = vma->vm_flags;
  1523. m->dump_size = vma_dump_size(vma, cprm->mm_flags);
  1524. m->pgoff = vma->vm_pgoff;
  1525. m->file = vma->vm_file;
  1526. if (m->file)
  1527. get_file(m->file);
  1528. i++;
  1529. }
  1530. mmap_write_unlock(mm);
  1531. for (i = 0; i < cprm->vma_count; i++) {
  1532. struct core_vma_metadata *m = cprm->vma_meta + i;
  1533. if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) {
  1534. char elfmag[SELFMAG];
  1535. if (copy_from_user(elfmag, (void __user *)m->start, SELFMAG) ||
  1536. memcmp(elfmag, ELFMAG, SELFMAG) != 0) {
  1537. m->dump_size = 0;
  1538. } else {
  1539. m->dump_size = PAGE_SIZE;
  1540. }
  1541. }
  1542. cprm->vma_data_size += m->dump_size;
  1543. }
  1544. if (core_sort_vma)
  1545. sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta),
  1546. cmp_vma_size, NULL);
  1547. return true;
  1548. }