devcoredump.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright(c) 2014 Intel Mobile Communications GmbH
  4. * Copyright(c) 2015 Intel Deutschland GmbH
  5. *
  6. * Author: Johannes Berg <johannes@sipsolutions.net>
  7. */
  8. #include <linux/module.h>
  9. #include <linux/device.h>
  10. #include <linux/devcoredump.h>
  11. #include <linux/list.h>
  12. #include <linux/slab.h>
  13. #include <linux/fs.h>
  14. #include <linux/workqueue.h>
  15. static struct class devcd_class;
  16. /* global disable flag, for security purposes */
  17. static bool devcd_disabled;
  18. struct devcd_entry {
  19. struct device devcd_dev;
  20. void *data;
  21. size_t datalen;
  22. /*
  23. * There are 2 races for which mutex is required.
  24. *
  25. * The first race is between device creation and userspace writing to
  26. * schedule immediately destruction.
  27. *
  28. * This race is handled by arming the timer before device creation, but
  29. * when device creation fails the timer still exists.
  30. *
  31. * To solve this, hold the mutex during device_add(), and set
  32. * init_completed on success before releasing the mutex.
  33. *
  34. * That way the timer will never fire until device_add() is called,
  35. * it will do nothing if init_completed is not set. The timer is also
  36. * cancelled in that case.
  37. *
  38. * The second race involves multiple parallel invocations of devcd_free(),
  39. * add a deleted flag so only 1 can call the destructor.
  40. */
  41. struct mutex mutex;
  42. bool init_completed, deleted;
  43. struct module *owner;
  44. ssize_t (*read)(char *buffer, loff_t offset, size_t count,
  45. void *data, size_t datalen);
  46. void (*free)(void *data);
  47. /*
  48. * If nothing interferes and device_add() was returns success,
  49. * del_wk will destroy the device after the timer fires.
  50. *
  51. * Multiple userspace processes can interfere in the working of the timer:
  52. * - Writing to the coredump will reschedule the timer to run immediately,
  53. * if still armed.
  54. *
  55. * This is handled by using "if (cancel_delayed_work()) {
  56. * schedule_delayed_work() }", to prevent re-arming after having
  57. * been previously fired.
  58. * - Writing to /sys/class/devcoredump/disabled will destroy the
  59. * coredump synchronously.
  60. * This is handled by using disable_delayed_work_sync(), and then
  61. * checking if deleted flag is set with &devcd->mutex held.
  62. */
  63. struct delayed_work del_wk;
  64. struct device *failing_dev;
  65. };
  66. static struct devcd_entry *dev_to_devcd(struct device *dev)
  67. {
  68. return container_of(dev, struct devcd_entry, devcd_dev);
  69. }
  70. static void devcd_dev_release(struct device *dev)
  71. {
  72. struct devcd_entry *devcd = dev_to_devcd(dev);
  73. devcd->free(devcd->data);
  74. module_put(devcd->owner);
  75. /*
  76. * this seems racy, but I don't see a notifier or such on
  77. * a struct device to know when it goes away?
  78. */
  79. if (devcd->failing_dev->kobj.sd)
  80. sysfs_delete_link(&devcd->failing_dev->kobj, &dev->kobj,
  81. "devcoredump");
  82. put_device(devcd->failing_dev);
  83. kfree(devcd);
  84. }
  85. static void __devcd_del(struct devcd_entry *devcd)
  86. {
  87. devcd->deleted = true;
  88. device_del(&devcd->devcd_dev);
  89. put_device(&devcd->devcd_dev);
  90. }
  91. static void devcd_del(struct work_struct *wk)
  92. {
  93. struct devcd_entry *devcd;
  94. bool init_completed;
  95. devcd = container_of(wk, struct devcd_entry, del_wk.work);
  96. /* devcd->mutex serializes against dev_coredumpm_timeout */
  97. mutex_lock(&devcd->mutex);
  98. init_completed = devcd->init_completed;
  99. mutex_unlock(&devcd->mutex);
  100. if (init_completed)
  101. __devcd_del(devcd);
  102. }
  103. static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,
  104. const struct bin_attribute *bin_attr,
  105. char *buffer, loff_t offset, size_t count)
  106. {
  107. struct device *dev = kobj_to_dev(kobj);
  108. struct devcd_entry *devcd = dev_to_devcd(dev);
  109. return devcd->read(buffer, offset, count, devcd->data, devcd->datalen);
  110. }
  111. static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
  112. const struct bin_attribute *bin_attr,
  113. char *buffer, loff_t offset, size_t count)
  114. {
  115. struct device *dev = kobj_to_dev(kobj);
  116. struct devcd_entry *devcd = dev_to_devcd(dev);
  117. /*
  118. * Although it's tempting to use mod_delayed work here,
  119. * that will cause a reschedule if the timer already fired.
  120. */
  121. if (cancel_delayed_work(&devcd->del_wk))
  122. schedule_delayed_work(&devcd->del_wk, 0);
  123. return count;
  124. }
  125. static const struct bin_attribute devcd_attr_data =
  126. __BIN_ATTR(data, 0600, devcd_data_read, devcd_data_write, 0);
  127. static const struct bin_attribute *const devcd_dev_bin_attrs[] = {
  128. &devcd_attr_data, NULL,
  129. };
  130. static const struct attribute_group devcd_dev_group = {
  131. .bin_attrs = devcd_dev_bin_attrs,
  132. };
  133. static const struct attribute_group *devcd_dev_groups[] = {
  134. &devcd_dev_group, NULL,
  135. };
  136. static int devcd_free(struct device *dev, void *data)
  137. {
  138. struct devcd_entry *devcd = dev_to_devcd(dev);
  139. /*
  140. * To prevent a race with devcd_data_write(), disable work and
  141. * complete manually instead.
  142. *
  143. * We cannot rely on the return value of
  144. * disable_delayed_work_sync() here, because it might be in the
  145. * middle of a cancel_delayed_work + schedule_delayed_work pair.
  146. *
  147. * devcd->mutex here guards against multiple parallel invocations
  148. * of devcd_free().
  149. */
  150. disable_delayed_work_sync(&devcd->del_wk);
  151. mutex_lock(&devcd->mutex);
  152. if (!devcd->deleted)
  153. __devcd_del(devcd);
  154. mutex_unlock(&devcd->mutex);
  155. return 0;
  156. }
  157. static ssize_t disabled_show(const struct class *class, const struct class_attribute *attr,
  158. char *buf)
  159. {
  160. return sysfs_emit(buf, "%d\n", devcd_disabled);
  161. }
  162. /*
  163. *
  164. * disabled_store() worker()
  165. * class_for_each_device(&devcd_class,
  166. * NULL, NULL, devcd_free)
  167. * ...
  168. * ...
  169. * while ((dev = class_dev_iter_next(&iter))
  170. * devcd_del()
  171. * device_del()
  172. * put_device() <- last reference
  173. * error = fn(dev, data) devcd_dev_release()
  174. * devcd_free(dev, data) kfree(devcd)
  175. *
  176. *
  177. * In the above diagram, it looks like disabled_store() would be racing with parallelly
  178. * running devcd_del() and result in memory abort after dropping its last reference with
  179. * put_device(). However, this will not happens as fn(dev, data) runs
  180. * with its own reference to device via klist_node so it is not its last reference.
  181. * so, above situation would not occur.
  182. */
  183. static ssize_t disabled_store(const struct class *class, const struct class_attribute *attr,
  184. const char *buf, size_t count)
  185. {
  186. long tmp = simple_strtol(buf, NULL, 10);
  187. /*
  188. * This essentially makes the attribute write-once, since you can't
  189. * go back to not having it disabled. This is intentional, it serves
  190. * as a system lockdown feature.
  191. */
  192. if (tmp != 1)
  193. return -EINVAL;
  194. devcd_disabled = true;
  195. class_for_each_device(&devcd_class, NULL, NULL, devcd_free);
  196. return count;
  197. }
  198. static CLASS_ATTR_RW(disabled);
  199. static struct attribute *devcd_class_attrs[] = {
  200. &class_attr_disabled.attr,
  201. NULL,
  202. };
  203. ATTRIBUTE_GROUPS(devcd_class);
  204. static struct class devcd_class = {
  205. .name = "devcoredump",
  206. .dev_release = devcd_dev_release,
  207. .dev_groups = devcd_dev_groups,
  208. .class_groups = devcd_class_groups,
  209. };
  210. static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count,
  211. void *data, size_t datalen)
  212. {
  213. return memory_read_from_buffer(buffer, count, &offset, data, datalen);
  214. }
  215. static void devcd_freev(void *data)
  216. {
  217. vfree(data);
  218. }
  219. /**
  220. * dev_coredumpv - create device coredump with vmalloc data
  221. * @dev: the struct device for the crashed device
  222. * @data: vmalloc data containing the device coredump
  223. * @datalen: length of the data
  224. * @gfp: allocation flags
  225. *
  226. * This function takes ownership of the vmalloc'ed data and will free
  227. * it when it is no longer used. See dev_coredumpm() for more information.
  228. */
  229. void dev_coredumpv(struct device *dev, void *data, size_t datalen,
  230. gfp_t gfp)
  231. {
  232. dev_coredumpm(dev, NULL, data, datalen, gfp, devcd_readv, devcd_freev);
  233. }
  234. EXPORT_SYMBOL_GPL(dev_coredumpv);
  235. static int devcd_match_failing(struct device *dev, const void *failing)
  236. {
  237. struct devcd_entry *devcd = dev_to_devcd(dev);
  238. return devcd->failing_dev == failing;
  239. }
  240. /**
  241. * devcd_free_sgtable - free all the memory of the given scatterlist table
  242. * (i.e. both pages and scatterlist instances)
  243. * NOTE: if two tables allocated with devcd_alloc_sgtable and then chained
  244. * using the sg_chain function then that function should be called only once
  245. * on the chained table
  246. * @data: pointer to sg_table to free
  247. */
  248. static void devcd_free_sgtable(void *data)
  249. {
  250. _devcd_free_sgtable(data);
  251. }
  252. /**
  253. * devcd_read_from_sgtable - copy data from sg_table to a given buffer
  254. * and return the number of bytes read
  255. * @buffer: the buffer to copy the data to it
  256. * @buf_len: the length of the buffer
  257. * @data: the scatterlist table to copy from
  258. * @offset: start copy from @offset@ bytes from the head of the data
  259. * in the given scatterlist
  260. * @data_len: the length of the data in the sg_table
  261. *
  262. * Returns: the number of bytes copied
  263. */
  264. static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset,
  265. size_t buf_len, void *data,
  266. size_t data_len)
  267. {
  268. struct scatterlist *table = data;
  269. if (offset > data_len)
  270. return -EINVAL;
  271. if (offset + buf_len > data_len)
  272. buf_len = data_len - offset;
  273. return sg_pcopy_to_buffer(table, sg_nents(table), buffer, buf_len,
  274. offset);
  275. }
  276. /**
  277. * dev_coredump_put - remove device coredump
  278. * @dev: the struct device for the crashed device
  279. *
  280. * dev_coredump_put() removes coredump, if exists, for a given device from
  281. * the file system and free its associated data otherwise, does nothing.
  282. *
  283. * It is useful for modules that do not want to keep coredump
  284. * available after its unload.
  285. */
  286. void dev_coredump_put(struct device *dev)
  287. {
  288. struct device *existing;
  289. existing = class_find_device(&devcd_class, NULL, dev,
  290. devcd_match_failing);
  291. if (existing) {
  292. devcd_free(existing, NULL);
  293. put_device(existing);
  294. }
  295. }
  296. EXPORT_SYMBOL_GPL(dev_coredump_put);
  297. /**
  298. * dev_coredumpm_timeout - create device coredump with read/free methods with a
  299. * custom timeout.
  300. * @dev: the struct device for the crashed device
  301. * @owner: the module that contains the read/free functions, use %THIS_MODULE
  302. * @data: data cookie for the @read/@free functions
  303. * @datalen: length of the data
  304. * @gfp: allocation flags
  305. * @read: function to read from the given buffer
  306. * @free: function to free the given buffer
  307. * @timeout: time in jiffies to remove coredump
  308. *
  309. * Creates a new device coredump for the given device. If a previous one hasn't
  310. * been read yet, the new coredump is discarded. The data lifetime is determined
  311. * by the device coredump framework and when it is no longer needed the @free
  312. * function will be called to free the data.
  313. */
  314. void dev_coredumpm_timeout(struct device *dev, struct module *owner,
  315. void *data, size_t datalen, gfp_t gfp,
  316. ssize_t (*read)(char *buffer, loff_t offset,
  317. size_t count, void *data,
  318. size_t datalen),
  319. void (*free)(void *data),
  320. unsigned long timeout)
  321. {
  322. static atomic_t devcd_count = ATOMIC_INIT(0);
  323. struct devcd_entry *devcd;
  324. struct device *existing;
  325. if (devcd_disabled)
  326. goto free;
  327. existing = class_find_device(&devcd_class, NULL, dev,
  328. devcd_match_failing);
  329. if (existing) {
  330. put_device(existing);
  331. goto free;
  332. }
  333. if (!try_module_get(owner))
  334. goto free;
  335. devcd = kzalloc_obj(*devcd, gfp);
  336. if (!devcd)
  337. goto put_module;
  338. devcd->owner = owner;
  339. devcd->data = data;
  340. devcd->datalen = datalen;
  341. devcd->read = read;
  342. devcd->free = free;
  343. devcd->failing_dev = get_device(dev);
  344. devcd->deleted = false;
  345. mutex_init(&devcd->mutex);
  346. device_initialize(&devcd->devcd_dev);
  347. dev_set_name(&devcd->devcd_dev, "devcd%d",
  348. atomic_inc_return(&devcd_count));
  349. devcd->devcd_dev.class = &devcd_class;
  350. dev_set_uevent_suppress(&devcd->devcd_dev, true);
  351. /* devcd->mutex prevents devcd_del() completing until init finishes */
  352. mutex_lock(&devcd->mutex);
  353. devcd->init_completed = false;
  354. INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
  355. schedule_delayed_work(&devcd->del_wk, timeout);
  356. if (device_add(&devcd->devcd_dev))
  357. goto put_device;
  358. /*
  359. * These should normally not fail, but there is no problem
  360. * continuing without the links, so just warn instead of
  361. * failing.
  362. */
  363. if (sysfs_create_link(&devcd->devcd_dev.kobj, &dev->kobj,
  364. "failing_device") ||
  365. sysfs_create_link(&dev->kobj, &devcd->devcd_dev.kobj,
  366. "devcoredump"))
  367. dev_warn(dev, "devcoredump create_link failed\n");
  368. dev_set_uevent_suppress(&devcd->devcd_dev, false);
  369. kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
  370. /*
  371. * Safe to run devcd_del() now that we are done with devcd_dev.
  372. * Alternatively we could have taken a ref on devcd_dev before
  373. * dropping the lock.
  374. */
  375. devcd->init_completed = true;
  376. mutex_unlock(&devcd->mutex);
  377. return;
  378. put_device:
  379. mutex_unlock(&devcd->mutex);
  380. cancel_delayed_work_sync(&devcd->del_wk);
  381. put_device(&devcd->devcd_dev);
  382. put_module:
  383. module_put(owner);
  384. free:
  385. free(data);
  386. }
  387. EXPORT_SYMBOL_GPL(dev_coredumpm_timeout);
  388. /**
  389. * dev_coredumpsg - create device coredump that uses scatterlist as data
  390. * parameter
  391. * @dev: the struct device for the crashed device
  392. * @table: the dump data
  393. * @datalen: length of the data
  394. * @gfp: allocation flags
  395. *
  396. * Creates a new device coredump for the given device. If a previous one hasn't
  397. * been read yet, the new coredump is discarded. The data lifetime is determined
  398. * by the device coredump framework and when it is no longer needed
  399. * it will free the data.
  400. */
  401. void dev_coredumpsg(struct device *dev, struct scatterlist *table,
  402. size_t datalen, gfp_t gfp)
  403. {
  404. dev_coredumpm(dev, NULL, table, datalen, gfp, devcd_read_from_sgtable,
  405. devcd_free_sgtable);
  406. }
  407. EXPORT_SYMBOL_GPL(dev_coredumpsg);
  408. static int __init devcoredump_init(void)
  409. {
  410. return class_register(&devcd_class);
  411. }
  412. __initcall(devcoredump_init);
  413. static void __exit devcoredump_exit(void)
  414. {
  415. class_for_each_device(&devcd_class, NULL, NULL, devcd_free);
  416. class_unregister(&devcd_class);
  417. }
  418. __exitcall(devcoredump_exit);